STATISTICA







STATISTICA BASIC Program BoxTid.stb

{ This program will compute the Box-Tidwell transformation for the predictors in a linear regression.
[Results Screenshot] The maximum number of variables is 10; to increase program limits change the respective constant MAXV at the beginning of the program.

For additional information, see Mason, R., L., Gunst, R., F., & Hess. J., L. (1989). Statistical design and analysis of experiments with applications to engineering and science. New York: Wiley (page 561).

The program will compute the l and b parameters for the equation:

   y=b0+b1*x1^l1+...+bk*xk^lk

(note that the intercept b0 is optional) The program will estimate the power parameters by li=ci/ai, where ai denotes the estimated regression coefficients for the standard linear regression model, and bi denotes the estimated coefficients for added terms xi*ln(xi).

If li is equal to 0, then the transformation is xi'=log(xi)

Program written, modified, or edited at StatSoft, Inc.}


randomaccess;
NoDataFileVariableNames;
{ max number of variables=10 }


	maxc:=ncases;
	maxv:=10;	    {max is 10 variables; change if necessary}
{ allocate memory for arrays}
	redim var(maxv);

{ tolerance check against 0}
	delta:=1.e-10;
{intro; }
	iret:=DisplayMessageBox (MB_YESNOCANCEL,
	  'Box-Tidwell Transformation',
	  'This program will compute the parameters for the Box-Tidwell power
transformations: y=b0+b1*x1^l1+...+bk*xk^lk; see Gunst, Mason, Hess (1989, p. 561).  Do
you want to include the intercept in the model?');
	 if iret=IDCANCEL then stop;
	 if iret=IDYES    then inter:=1;
	 if iret=IDNO     then inter:=0;

{ select variables}
	if SelectVariables2 ('Select Variables',
	 1, 1, dv, i,'Dependent variable:',
	 1, maxv-1-1, var, n, 'Independent variables:')=0 then stop;

{check for missing data }
	iflag:=0;
	for i:=1 to n do begin
	  j:=ValCount (data(1,var(i)), 1, ncases);
	  if j<ncases then iflag:=1;
	end;
	j:=ValCount (data(1,dv), 1, ncases);
	if j<ncases then iflag:=1;
	if iflag=1 then begin
	    DisplayMessageBox(MB_ICONSTOP, 'Missing Data',
	     'Some cases have missing data; this program requires that all cases are
complete; use the missing data replacement facilities to remove missing data values.');
	   stop;
	end;
	for i:=1 to n do begin
	 ValMin (data(1,var(i)), 1, ncases, xmin);
	 if xmin<=0 then begin
	    DisplayMessageBox(MB_ICONSTOP, 'Invalid X Values',
	     'Some x<=0; all independent (predictor) variable values must be greater than
0.');
	   stop;
	 end;
	end;

{allocate memory for necessary arrays}
	redim b0(n);
	redim x(ncases,n+inter+1);
	redim xx(n+inter+1,n+inter+1);
{for residual plots}
	redim yhat0(ncases);
	redim error0(ncases);

{estimate parameters for standard untransformed model}
{set up matrices; fill first column with 1 if intercept is requested}
	if inter=1 then MatrixFill (1, x, 1, 1, ncases, 1);
{copy	data for predictors}
	for i:=1 to n do MatrixCopy (data, 1, var(i), ncases, 1, x, 1, i+inter);
{copy data for predictors}
	MatrixCopy (data, 1, dv, ncases, 1, x, 1, n+inter+1);
{compute parameters}
	MatrixCrossProductOfDev (x, 0, xx);
	MatrixSweep (xx, 1, n+inter, 1);
	for i:=1 to n do begin
	 b0(i):=xx(n+inter+1,i+inter);
	 if abs(b0(i))<delta then begin
	  DisplayMessageBox(MB_ICONSTOP, 'Invalid Coefficients',
	     'At least one coefficient for the standard regression model is (almost) equal
to zero; cannot compute lambda values.');
	   stop;
	 end;
	end;
{compute yats and errors}
	for i:=1 to ncases do begin
	 for j:=1 to n+inter do yhat0(i):=yhat0(i)+xx(n+inter+1,j)*x(i,j);
	 error0(i):=x(i,n+inter+1)-yhat0(i);
	end;

{compute parameters for expanded model; for each term bi*xi add a term: bi2*ln(xi) }
	redim bl(n);
	redim x(ncases,n*2+inter+1);
	redim xx(n*2+inter+1,n*2+inter+1);
{set up matrices; fill first column with 1 if intercept is requested}
	if inter=1 then MatrixFill (1, x, 1, 1, ncases, 1);
{copy	data for predictors}
	for i:=1 to n do MatrixCopy (data, 1, var(i), ncases, 1, x, 1, i+inter);
{copy data for dependent}
	MatrixCopy (data, 1, dv, ncases, 1, x, 1, n*2+inter+1);
{add transformed terms}
	for i:=1 to n do begin
	 for j:=1 to ncases do
	  x(j,inter+n+i):=x(j,inter+i)*log(x(j,inter+i));
	end;
{compute parameters}
	MatrixCrossProductOfDev (x, 0, xx);
	MatrixSweep (xx, 1, n*2+inter, 1);

	bl(1):=1;
	for i:=1 to n do bl(i):=xx(n*2+inter+1,inter+n+i);

{ display lambdas=1+bl(i)/b0(i), accept userdefined lambdas }
	redim lambda(n);
	kname$:='';
	for i:=1 to n do begin
	 kname$:=kname$+varname(var(i))+'|';
	 lambda(i):=(1+bl(i)/b0(i));
	end;
	if DisplayNumericInputBox ('Estimates for Lambda',
		kname$, lambda)=0 then stop;

{compute parameters for final model; transform x(i)'=x(i)^lambda(i) }
	redim x(ncases,n+inter+1);
	redim xx(n+inter+1,n+inter+1);
	redim yhatl(ncases);
	redim errorl(ncases);

{set up matrices; fill first column with 1 if intercept is requested}
	if inter=1 then MatrixFill (1, x, 1, 1, ncases, 1);
{copy	and transform data for predictors}
	for i:=1 to n do begin
	 for j:=1 to ncases do begin
	  if abs(lambda(i))>delta then
	    x(j,inter+i):=data(j,var(i))^lambda(i)
	  else
	    x(j,inter+i):=log(data(j,var(i)));
	 end;
	end;
{copy data for dependent variable}
	MatrixCopy (data, 1, dv, ncases, 1, x, 1, n+inter+1);
{compute parameters}
	MatrixCrossProductOfDev (x, 0, xx);
	MatrixSweep (xx, 1, n+inter, 1);
{compute yats and errors}
	for i:=1 to ncases do begin
	 for j:=1 to n+inter do yhatl(i):=yhatl(i)+xx(n+inter+1,j)*x(i,j);
	 errorl(i):=x(i,n+inter+1)-yhatl(i);
	end;


{final results}
	redim table(n+inter,9);
{make headers, titles, etc., and prepare to make Scrollsheet of sweep matrix}
	kname$:='';
	if inter=1 then kname$:='Interc|';
	for i:=1 to n do kname$:=kname$+VarName(var(i))+'|'; {make column/row names}
	kname$:=kname$+VarName(dv);
	header$:='DV: '+VarName(dv)+'|';
	line01$:='Final Sweep Matrix|'+header$;				    {make header}
	line03$:='Indep.: ';
	for i:=1 to min(n,5) do line03$:=line03$+VarName(var(i))+' ';
	if n>5 then line03$:=line03$+'...';
{call Scrollsheet     }
	NewScrollsheet (n+inter+1, n+inter+1, xx, line01$+line03$,
	  kname$, kname$);
{regression summary}
	dfe:=ncases-inter-n;
	t:=VStudent (.975, dfe);
	line01$:='Regression Results;'+header$;
      kname2$:='Coeff | Std.Err. | t(';
	kname2$:=kname2$+Str(dfe,6,0);
	kname2$:=kname2$+') | p |-95% Cnf|+95% Cnf|lambda|b(lin.)|b(added)';
	msresid:=xx(n+inter+1,n+inter+1)/dfe;
	ValVariance(data(1,dv), 1, ncases, mstotal);
      rsq:=1-(msresid*(dfe))/(mstotal*(ncases-1));
	line01$:=line01$+'R-sqr:'+Str (rsq, 8, 6);
	line01$:=line01$+' R:'+Str(sqrt(rsq),8,6);
	rsq:=1-msresid/mstotal;
	line01$:=line01$+' adj. R-sqr:'+Str (rsq, 8, 6);

	kname$:='';
	for i:=1 to n+inter do begin;
	  table(i,1):=xx(n+inter+1,i);           {regression coefficients }
	  table(i,2):=sqrt(-msresid*xx(i,i));       {standard error of coeff.}
	  table(i,3):=table(i,1)/table(i,2);        {t-value                 }
	  table(i,4):=(1-IStudent (abs(table(i,3)), dfe))*2;  {p-value}
	  table(i,5):=table(i,1)-t*table(i,2);
	  table(i,6):=table(i,1)+t*table(i,2);
	  if (i=1) and (inter=1) then
 	    kname$:='Interc.|'
	  else
	    kname$:=kname$+VarName(var(i-inter))+' |';   {row label}
	  if i>inter then begin
	   table(i,7):=lambda(i-inter);
	   table(i,8):=b0(i-inter);
	   table(i,9):=bl(i-inter);
	  end;
	end;
	shandl:=NewScrollsheet (n+inter, 9, table, line01$, kname$, kname2$);
{highlight significant factors (p<.05)}
		for i:=1 to n+inter do begin
	      if table(i,4)<.05 then begin
	       for j:=1 to 9 do ScrollsheetSetHilite (shandl, i, j, 1);
	      end;
		end;
{set blanks for intercept}
	     if inter=1 then begin
	       for i:=7 to 9 do
			ScrollsheetSetTextValue (shandl, 1, i, ' ');
	     end;

{residual plots}
{first make blank graph }
	line02$:='Dependent variable: '+varname(dv);
	line02$:=line02$+'|Indep.:';
	for i:=1 to min(n,5) do
	 line02$:=line02$+' '+varname(var(i));
	if n>5 then line02$:=line02$+'...';
	graph:=NewGraph (IGNOREDPLOT, 'Observed vs. Residual Values|'+line02$,
	  ?Left_Title$, ?Bottom_Title$,
  	   0, yhatl, yhatl);
	graph1:=NewGraph (SCATTERPLOT, 'Lambda=1 (No Transformation)', 'Residuals',
         'Observed Values', ncases, data(1,dv), error0);
	graph2:=NewGraph (SCATTERPLOT, 'Transformed Predictors',
	  'Residuals', 'Observed Values', ncases,
	   data(1,dv), errorl);
	GraphSetPlotFitting (Graph1, 1, fit_linear);
	GraphSetPlotFitting (Graph2, 1, fit_linear);
	GraphSetDefaultFont (Graph1, ?FontName$, 14, ?Color);
	GraphSetDefaultFont (Graph2, ?FontName$, 14, ?Color);
	GraphEmbedGraph (Graph, graph1, false, false, ?MappingMode,
	  0, 0, 50, 90, false);
	GraphEmbedGraph (Graph, graph2, false, false, ?MappingMode,
	  50, 0, 100, 90, false);
	DisplayGraph (Graph);

{Normal probability plot of residuals}
{first make blank graph}
	redim work1(ncases);
	graph:=NewGraph (IGNOREDPLOT,
	  'Normal Probability Plots of Residuals|'+line02$,
	  ?Left_Title$, ?Bottom_Title$,
	    0, yhatl, yhatl);
	VectorSort (error0, SORT_ASCENDING);
	MatrixDuplicate (error0, work1);
	VectorRank (work1, SORT_ASCENDING, RANK_MEAN);
	for i:=1 to ncases do begin
	 work1(i):=(3*work1(i)-1)/(3*ncases+1);
	 work1(i):=VNormal(work1(i), 0, 1);
	end;

      line01$:='Lambda=1 (No Transformation)';
	graph1:=NewGraph (SCATTERPLOT, line01$,
          'z-Value','Residual', ncases, error0, work1);
	GraphSetPlotFitting (graph1, 1, FIT_LINEAR);
	VectorSort (errorl, SORT_ASCENDING);
	MatrixDuplicate (errorl, work1);
	VectorRank (work1, SORT_ASCENDING, RANK_MEAN);
	for i:=1 to ncases do begin
	 work1(i):=(3*work1(i)-1)/(3*ncases+1);
	 work1(i):=VNormal (work1(i), 0, 1);
	end;
	line01$:='Transformed Predictors';
	graph2:=NewGraph (SCATTERPLOT, line01$,
          'z-Value','Residual', ncases, errorl, work1);
	GraphSetPlotFitting (graph2, 1, FIT_LINEAR);
	GraphSetDefaultFont (graph1, ?FontName$, 14, ?Color);
	GraphSetDefaultFont (graph2, ?FontName$, 14, ?Color);
	GraphEmbedGraph (Graph, graph1, false, false, ?MappingMode,
	  0, 0, 50, 90, false);
	GraphEmbedGraph (Graph, graph2, false, false, ?MappingMode,
	  50, 0, 100, 90, false);

	DisplayGraph (Graph);
Back to List of Programs



[StatSoft]
2300 East 14th Street, Tulsa, OK 74104
Phone: (918) 749-1119; Fax: (918) 749-2217

[StatSoft]e-mail: info@statsoft.com

©Copyright StatSoft, Inc., 1984-2004.
StatSoft, StatSoft logo, STATISTICA, SEWSS, SEDAS, Data Miner, SEPATH and GTrees are trademarks of StatSoft, Inc.