The maximum number of variables is 10; to increase program limits change the respective constant MAXV at the beginning of the program.
Program written, modified, or edited at StatSoft, Inc.}
randomaccess;
NoDataFileVariableNames;
{ max number of variables=10 }
maxc:=ncases;
maxv:=10; {max is 10 variables; change if necessary}
{ allocate memory for arrays}
redim var(maxv);
{ tolerance check against 0}
delta:=1.e-10;
{intro; }
iret:=DisplayMessageBox (MB_YESNOCANCEL,
'Box-Tidwell Transformation',
'This program will compute the parameters for the Box-Tidwell power
transformations: y=b0+b1*x1^l1+...+bk*xk^lk; see Gunst, Mason, Hess (1989, p. 561). Do
you want to include the intercept in the model?');
if iret=IDCANCEL then stop;
if iret=IDYES then inter:=1;
if iret=IDNO then inter:=0;
{ select variables}
if SelectVariables2 ('Select Variables',
1, 1, dv, i,'Dependent variable:',
1, maxv-1-1, var, n, 'Independent variables:')=0 then stop;
{check for missing data }
iflag:=0;
for i:=1 to n do begin
j:=ValCount (data(1,var(i)), 1, ncases);
if j<ncases then iflag:=1;
end;
j:=ValCount (data(1,dv), 1, ncases);
if j<ncases then iflag:=1;
if iflag=1 then begin
DisplayMessageBox(MB_ICONSTOP, 'Missing Data',
'Some cases have missing data; this program requires that all cases are
complete; use the missing data replacement facilities to remove missing data values.');
stop;
end;
for i:=1 to n do begin
ValMin (data(1,var(i)), 1, ncases, xmin);
if xmin<=0 then begin
DisplayMessageBox(MB_ICONSTOP, 'Invalid X Values',
'Some x<=0; all independent (predictor) variable values must be greater than
0.');
stop;
end;
end;
{allocate memory for necessary arrays}
redim b0(n);
redim x(ncases,n+inter+1);
redim xx(n+inter+1,n+inter+1);
{for residual plots}
redim yhat0(ncases);
redim error0(ncases);
{estimate parameters for standard untransformed model}
{set up matrices; fill first column with 1 if intercept is requested}
if inter=1 then MatrixFill (1, x, 1, 1, ncases, 1);
{copy data for predictors}
for i:=1 to n do MatrixCopy (data, 1, var(i), ncases, 1, x, 1, i+inter);
{copy data for predictors}
MatrixCopy (data, 1, dv, ncases, 1, x, 1, n+inter+1);
{compute parameters}
MatrixCrossProductOfDev (x, 0, xx);
MatrixSweep (xx, 1, n+inter, 1);
for i:=1 to n do begin
b0(i):=xx(n+inter+1,i+inter);
if abs(b0(i))<delta then begin
DisplayMessageBox(MB_ICONSTOP, 'Invalid Coefficients',
'At least one coefficient for the standard regression model is (almost) equal
to zero; cannot compute lambda values.');
stop;
end;
end;
{compute yats and errors}
for i:=1 to ncases do begin
for j:=1 to n+inter do yhat0(i):=yhat0(i)+xx(n+inter+1,j)*x(i,j);
error0(i):=x(i,n+inter+1)-yhat0(i);
end;
{compute parameters for expanded model; for each term bi*xi add a term: bi2*ln(xi) }
redim bl(n);
redim x(ncases,n*2+inter+1);
redim xx(n*2+inter+1,n*2+inter+1);
{set up matrices; fill first column with 1 if intercept is requested}
if inter=1 then MatrixFill (1, x, 1, 1, ncases, 1);
{copy data for predictors}
for i:=1 to n do MatrixCopy (data, 1, var(i), ncases, 1, x, 1, i+inter);
{copy data for dependent}
MatrixCopy (data, 1, dv, ncases, 1, x, 1, n*2+inter+1);
{add transformed terms}
for i:=1 to n do begin
for j:=1 to ncases do
x(j,inter+n+i):=x(j,inter+i)*log(x(j,inter+i));
end;
{compute parameters}
MatrixCrossProductOfDev (x, 0, xx);
MatrixSweep (xx, 1, n*2+inter, 1);
bl(1):=1;
for i:=1 to n do bl(i):=xx(n*2+inter+1,inter+n+i);
{ display lambdas=1+bl(i)/b0(i), accept userdefined lambdas }
redim lambda(n);
kname$:='';
for i:=1 to n do begin
kname$:=kname$+varname(var(i))+'|';
lambda(i):=(1+bl(i)/b0(i));
end;
if DisplayNumericInputBox ('Estimates for Lambda',
kname$, lambda)=0 then stop;
{compute parameters for final model; transform x(i)'=x(i)^lambda(i) }
redim x(ncases,n+inter+1);
redim xx(n+inter+1,n+inter+1);
redim yhatl(ncases);
redim errorl(ncases);
{set up matrices; fill first column with 1 if intercept is requested}
if inter=1 then MatrixFill (1, x, 1, 1, ncases, 1);
{copy and transform data for predictors}
for i:=1 to n do begin
for j:=1 to ncases do begin
if abs(lambda(i))>delta then
x(j,inter+i):=data(j,var(i))^lambda(i)
else
x(j,inter+i):=log(data(j,var(i)));
end;
end;
{copy data for dependent variable}
MatrixCopy (data, 1, dv, ncases, 1, x, 1, n+inter+1);
{compute parameters}
MatrixCrossProductOfDev (x, 0, xx);
MatrixSweep (xx, 1, n+inter, 1);
{compute yats and errors}
for i:=1 to ncases do begin
for j:=1 to n+inter do yhatl(i):=yhatl(i)+xx(n+inter+1,j)*x(i,j);
errorl(i):=x(i,n+inter+1)-yhatl(i);
end;
{final results}
redim table(n+inter,9);
{make headers, titles, etc., and prepare to make Scrollsheet of sweep matrix}
kname$:='';
if inter=1 then kname$:='Interc|';
for i:=1 to n do kname$:=kname$+VarName(var(i))+'|'; {make column/row names}
kname$:=kname$+VarName(dv);
header$:='DV: '+VarName(dv)+'|';
line01$:='Final Sweep Matrix|'+header$; {make header}
line03$:='Indep.: ';
for i:=1 to min(n,5) do line03$:=line03$+VarName(var(i))+' ';
if n>5 then line03$:=line03$+'...';
{call Scrollsheet }
NewScrollsheet (n+inter+1, n+inter+1, xx, line01$+line03$,
kname$, kname$);
{regression summary}
dfe:=ncases-inter-n;
t:=VStudent (.975, dfe);
line01$:='Regression Results;'+header$;
kname2$:='Coeff | Std.Err. | t(';
kname2$:=kname2$+Str(dfe,6,0);
kname2$:=kname2$+') | p |-95% Cnf|+95% Cnf|lambda|b(lin.)|b(added)';
msresid:=xx(n+inter+1,n+inter+1)/dfe;
ValVariance(data(1,dv), 1, ncases, mstotal);
rsq:=1-(msresid*(dfe))/(mstotal*(ncases-1));
line01$:=line01$+'R-sqr:'+Str (rsq, 8, 6);
line01$:=line01$+' R:'+Str(sqrt(rsq),8,6);
rsq:=1-msresid/mstotal;
line01$:=line01$+' adj. R-sqr:'+Str (rsq, 8, 6);
kname$:='';
for i:=1 to n+inter do begin;
table(i,1):=xx(n+inter+1,i); {regression coefficients }
table(i,2):=sqrt(-msresid*xx(i,i)); {standard error of coeff.}
table(i,3):=table(i,1)/table(i,2); {t-value }
table(i,4):=(1-IStudent (abs(table(i,3)), dfe))*2; {p-value}
table(i,5):=table(i,1)-t*table(i,2);
table(i,6):=table(i,1)+t*table(i,2);
if (i=1) and (inter=1) then
kname$:='Interc.|'
else
kname$:=kname$+VarName(var(i-inter))+' |'; {row label}
if i>inter then begin
table(i,7):=lambda(i-inter);
table(i,8):=b0(i-inter);
table(i,9):=bl(i-inter);
end;
end;
shandl:=NewScrollsheet (n+inter, 9, table, line01$, kname$, kname2$);
{highlight significant factors (p<.05)}
for i:=1 to n+inter do begin
if table(i,4)<.05 then begin
for j:=1 to 9 do ScrollsheetSetHilite (shandl, i, j, 1);
end;
end;
{set blanks for intercept}
if inter=1 then begin
for i:=7 to 9 do
ScrollsheetSetTextValue (shandl, 1, i, ' ');
end;
{residual plots}
{first make blank graph }
line02$:='Dependent variable: '+varname(dv);
line02$:=line02$+'|Indep.:';
for i:=1 to min(n,5) do
line02$:=line02$+' '+varname(var(i));
if n>5 then line02$:=line02$+'...';
graph:=NewGraph (IGNOREDPLOT, 'Observed vs. Residual Values|'+line02$,
?Left_Title$, ?Bottom_Title$,
0, yhatl, yhatl);
graph1:=NewGraph (SCATTERPLOT, 'Lambda=1 (No Transformation)', 'Residuals',
'Observed Values', ncases, data(1,dv), error0);
graph2:=NewGraph (SCATTERPLOT, 'Transformed Predictors',
'Residuals', 'Observed Values', ncases,
data(1,dv), errorl);
GraphSetPlotFitting (Graph1, 1, fit_linear);
GraphSetPlotFitting (Graph2, 1, fit_linear);
GraphSetDefaultFont (Graph1, ?FontName$, 14, ?Color);
GraphSetDefaultFont (Graph2, ?FontName$, 14, ?Color);
GraphEmbedGraph (Graph, graph1, false, false, ?MappingMode,
0, 0, 50, 90, false);
GraphEmbedGraph (Graph, graph2, false, false, ?MappingMode,
50, 0, 100, 90, false);
DisplayGraph (Graph);
{Normal probability plot of residuals}
{first make blank graph}
redim work1(ncases);
graph:=NewGraph (IGNOREDPLOT,
'Normal Probability Plots of Residuals|'+line02$,
?Left_Title$, ?Bottom_Title$,
0, yhatl, yhatl);
VectorSort (error0, SORT_ASCENDING);
MatrixDuplicate (error0, work1);
VectorRank (work1, SORT_ASCENDING, RANK_MEAN);
for i:=1 to ncases do begin
work1(i):=(3*work1(i)-1)/(3*ncases+1);
work1(i):=VNormal(work1(i), 0, 1);
end;
line01$:='Lambda=1 (No Transformation)';
graph1:=NewGraph (SCATTERPLOT, line01$,
'z-Value','Residual', ncases, error0, work1);
GraphSetPlotFitting (graph1, 1, FIT_LINEAR);
VectorSort (errorl, SORT_ASCENDING);
MatrixDuplicate (errorl, work1);
VectorRank (work1, SORT_ASCENDING, RANK_MEAN);
for i:=1 to ncases do begin
work1(i):=(3*work1(i)-1)/(3*ncases+1);
work1(i):=VNormal (work1(i), 0, 1);
end;
line01$:='Transformed Predictors';
graph2:=NewGraph (SCATTERPLOT, line01$,
'z-Value','Residual', ncases, errorl, work1);
GraphSetPlotFitting (graph2, 1, FIT_LINEAR);
GraphSetDefaultFont (graph1, ?FontName$, 14, ?Color);
GraphSetDefaultFont (graph2, ?FontName$, 14, ?Color);
GraphEmbedGraph (Graph, graph1, false, false, ?MappingMode,
0, 0, 50, 90, false);
GraphEmbedGraph (Graph, graph2, false, false, ?MappingMode,
50, 0, 100, 90, false);
DisplayGraph (Graph);
| Back to List of Programs |
![[StatSoft]](../../../images/sssmall.gif)
2300 East 14th Street, Tulsa, OK 74104
Phone: (918) 749-1119; Fax: (918) 749-2217
e-mail: info@statsoft.com
©Copyright StatSoft, Inc., 1984-2004.
StatSoft, StatSoft logo, STATISTICA, SEWSS, SEDAS, Data Miner, SEPATH and GTrees are trademarks of StatSoft, Inc.