STATISTICA







STATISTICA BASIC Program Regressn.stb

{ This program will perform a complete multiple regression analysis for selected variables, and produce various diagnostic plots of the residuals.

If there are missing data, then the program will perform mean substitution or casewise deletion of missing data.

Program written, modified, or edited at StatSoft, Inc.}


randomaccess;
NoDataFileVariableNames;
	iret:=DisplayMessageBox(MB_YESNOCANCEL, 'Multiple Regression',
	     'This program will perform multiple regression for selected variables,
and produce residual plots. Do you want to include the intercept (constant)
 in the multiple regression model?');
	if iret=IDCANCEL then stop;
	if iret=IDYES then inter:=1;
	if iret=IDNO then inter:=0;

{redimension to current file size}
	redim table(ncases,6);
	redim var(nvars);
{Select variables for regression}
	if(SelectVariables2 (
	  'Select Variables',
	  1, 1, idv, i, 'Dependent:',
	  1, nvars-1, var, nidv, 'Independent:')=0) then stop;
{check for overlapping lists}
	for i:=1 to nidv do begin
	 if idv=var(i) then begin;
	    DisplayMessageBox(MB_ICONSTOP, 'Overlapping Variable Lists',
	     'The dependent variable is part of the independent variable list; cannot
perform multiple regression for these data.');
	  stop;
	 end;
	end;
{allocate memory for arrays}
	redim covars(nidv+1+inter,nidv+1+inter);
	redim ddata(ncases,nidv+1+inter);
	redim work1(ncases);
	redim work2(ncases);
	redim casenos(ncases);
{check for missing data, and replace if necessary}
	iflag:=0;
	for i:=1 to ncases do casenos(i):=i;
	for i:=1 to ncases do begin
	 for j:=1 to nidv do begin
	  if valid(data(i,var(j)))=0 then iflag:=1;
	 end;
	 if valid(data(i,idv))=0 then iflag:=1;
	 if iflag=1 then i:=ncases;
	end;
	imiss:=0;
	if iflag=1 then begin
	 iret:=DisplayButtonBox ('How to treat missing data?',
	  'Casewise deletion of MD|Substitute MD with means');
	 if iret=0 then stop;
	 imiss:=iret;
	end;
{set 1 in the first column of ddata (for intercept)}
	if inter=1 then MatrixFill (1, ddata, 1, 1, ncases, 1);
{copy indep. variables data to ddata}
	for i:=1 to nidv do
	  MatrixCopy (data, 1, var(i), ncases, 1, ddata, 1, i+inter);
{copy dep. var. data to ddata, last column}
	  MatrixCopy (data, 1, idv, ncases, 1, ddata, 1, nidv+inter+1);
{deal with missing data if necessary}
	nvalid:=ncases;
	if imiss>0 then begin
{mean substitution of values}
	 if imiss=2 then begin
	  redim means(nidv+inter+1);
	  for i:=1 to nidv+1 do begin
	   ex:=0;di:=0;nn:=0;
	   for j:=1 to ncases do begin
	    if valid(ddata(j,i+inter))=1 then begin
	     nn:=nn+1;
	     di:=ddata(j,i+inter)-ex;
	     ex:=ex+di/nn;
	    end;
	   end;
	   for j:=1 to ncases do begin
	    if valid(ddata(j,i+inter))=0 then ddata(j,i+inter):=ex;
	   end;
	  end;
	 end;
{casewise md deletion}
	 if imiss=1 then begin
	  nvalid:=0;
{compute number of (casewise) valid observations}
	  for i:=1 to ncases do begin
	   iflag:=0;
	   for j:=1 to nidv do begin
	    if valid(data(i,var(j)))=0 then iflag:=1;
	   end;
	   if valid(data(i,idv))=0 then iflag:=1;
	   if iflag=0 then nvalid:=nvalid+1;
	  end;
	  if nvalid<1 then begin
	   DisplayMessageBox(MB_ICONSTOP, 'No (Casewise) Valid Cases in File',
	    'After casewise deletion of MD cases, there are no valid cases left in the
file.');
	   stop;
	  end;
{resize arrays}
	  redim ddata(nvalid,nidv+1+inter);
	  redim table(nvalid,6);
	  redim work1(nvalid);
	  redim work2(nvalid);
	  redim casenos(nvalid);
{recopy data}
	  nvalid:=0;
	  for i:=1 to ncases do begin
	   iflag:=0;
	   for j:=1 to nidv do begin
	    if valid(data(i,var(j)))=0 then iflag:=1;
	   end;
	   if valid(data(i,idv))=0 then iflag:=1;
	   if iflag=0 then begin
	     nvalid:=nvalid+1;
	     if inter=1 then ddata(nvalid,1):=1;
	     for j:=1 to nidv do ddata(nvalid,inter+j):=data(i,var(j));
	     ddata(nvalid,nidv+inter+1):=data(i,idv);
	     casenos(nvalid):=i;
	   end;
	  end;
	 end;
	end;
	if nvalid<nidv+inter then begin
	  DisplayMessageBox(MB_ICONSTOP, 'Not Enough Valid Cases',
	 'There are not enough valid cases in the data file to perform the multiple
regression analysis.');
	 stop;
	end;

{compute sum of raw crossproducts}
	MatrixCrossProductOfDev (ddata, 0, covars);
{sweep on independent variables and intercept (if selected)}
	MatrixSweep (covars, 1, nidv+inter, 1);
{make headers, titles, etc., and prepare to make Scrollsheet of sweep matrix}
	header$:='DV: '+VarName(idv)+'|';
	line01$:='Sweep Matrix|'+header$;
	line03$:='Indep.: ';
	for i:=1 to min(nidv,5) do line03$:=line03$+VarName(var(i))+' ';
	if nidv>5 then line03$:=line03$+'...';
{call Scrollsheet     }
	shandl:=NewScrollsheet (nidv+inter+1, nidv+inter+1, covars, line01$+line03$,
	  '', '');
{set Scrollsheet rownames, column names}
	ScrollsheetSetRowNameWidth (shandl, 8);
	ScrollsheetSetColumnWidth (shandl, 8, 1);
	if inter=1 then begin
	 ScrollsheetSetRowName (shandl, 1, 'Interc.');
	 ScrollsheetSetColumnName (shandl, 1, ?Name1$, 'Interc.');
	end;
	for i:=1 to nidv do begin
	 ScrollsheetSetRowName (shandl, inter+i, VarName(var(i)));
	 ScrollsheetSetColumnName (shandl, inter+i, ?Name1$, VarName(var(i)));
	end;
	ScrollsheetSetRowName (shandl, inter+nidv+1, VarName(idv));
	ScrollsheetSetColumnName (shandl, inter+nidv+1, ?Name1$, VarName(idv));
{regression summary}
	dfe:=nvalid-inter-nidv;
	t:=VStudent (.975, dfe);
	line01$:='Regression Coefficients '+header$;
      kname2$:='Coeff | Std.Err. | t(';
	kname2$:=kname2$+Str(dfe,6,0);
	kname2$:=kname2$+') | p |-95% Cnf|+95% Cnf';
	msresid:=covars(nidv+inter+1,nidv+inter+1)/(dfe);
	MatrixExtract (ddata, 1, nidv+inter+1, nvalid, 1, work1);
	ValVariance(work1, 1, nvalid, mstotal);
      rsq:=1-(msresid*(dfe))/(mstotal*(nvalid-1));
	line01$:=line01$+'R-sqr:'+Str (rsq, 8, 6);
	line01$:=line01$+' R:'+Str(sqrt(rsq),8,6);
	rsq:=1-msresid/mstotal;
	line01$:=line01$+' adj. R-sqr:'+Str (rsq, 8, 6);
	if imiss=1 then line01$:=line01$+'|(Casewise MD deletion)';
	if imiss=2 then line01$:=line01$+'|(Mean substitution of MD)';
	if imiss=0 then line01$:=line01$+'|'+line03$;
	kname$:='';
	for i:=1 to nidv+inter do begin
	  table(i,1):=covars(nidv+inter+1,i);             {regression coefficients }
	  table(i,2):=sqrt(-msresid*covars(i,i));   {standard error of coeff.}
	  table(i,3):=table(i,1)/table(i,2);        {t-value                 }
	  table(i,4):=(1-IStudent (abs(table(i,3)), dfe))*2;  {p-value}
	  table(i,5):=table(i,1)-t*table(i,2);
	  table(i,6):=table(i,1)+t*table(i,2);
	end;
	shandl:=NewScrollsheet (nidv+inter, 6, table, line01$, kname$, kname2$);
{highlight significant factors (p<.05), set rownames}
	     ScrollsheetSetRowNameWidth (shandl, 8);
		for i:=1 to nidv+inter do begin
	      if table(i,4)<.05 then begin
	       for j:=1 to 6 do ScrollsheetSetHilite (shandl, i, j, 1);
	      end;
	 if (i=1) and (inter=1) then
	  ScrollsheetSetRowName (shandl, i, 'Interc.')
	 else
	  ScrollsheetSetRowName (shandl, i, VarName(var(i-inter)));
		end;
{Predicted and residual values}
	kname$:='Pred.|Resid.';
     	for i:=1 to nvalid do begin
	 table(i,1):=0;
	 for j:=1 to nidv+inter do
	  table(i,1):=table(i,1)+ddata(i,j)*covars(nidv+inter+1,j);
	  table(i,2):=ddata(i,nidv+inter+1)-table(i,1);
  	end;
	line01$:='Predicted and Residual Values|'+header$+line03$;
	shandl:=NewScrollsheet (nvalid, 2, table, line01$, '', kname$);
{put casenumbers/names in first column}
	ScrollsheetSetRowNameWidth (shandl, 8);
	for i:=1 to nvalid do
	  ScrollsheetSetRowName (shandl, i, CaseName(casenos(i)));

{Plot of observed versus residuals}
	line01$:='Observed vs. Residual Values|'+header$+line03$;
	kname$:='Observed Values: '+VarName(idv);
	MatrixCopy (ddata, 1,nidv+inter+1, nvalid, 1,table, 1, 1);
	MatrixCopy (table, 1, 1, nvalid, 1, work1, 1, 1);
	MatrixCopy (table, 1, 2, nvalid, 1, work2, 1, 1);
	graph:=NewGraph (SCATTERPLOT, line01$, 'Residuals',kname$, nvalid, work1, work2);
	GraphSetPlotFitting (graph, 1, FIT_LINEAR);

{Plot of case number versus residuals}
	line01$:='Case No. vs. Residual Values|'+header$+line03$;
	kname$:='Case Number';
	for i:=1 to nvalid do work1(i):=i;
	graph:=NewGraph (SCATTERPLOT, line01$, 'Residuals',kname$, nvalid, work1, work2);
	GraphSetPlotFitting (graph, 1, FIT_POLYNOMIAL);

{Normal probability plot of residuals}
	MatrixCopy(work2,1,1,nvalid,1,work1,1,1);
	VectorSort (work1, SORT_ASCENDING);
	MatrixDuplicate (work1, work2);
	VectorRank (work1, SORT_ASCENDING, RANK_MEAN);
	for i:=1 to nvalid do begin
	 work1(i):=(3*work1(i)-1)/(3*nvalid+1);
	 work1(i):=VNormal (work1(i), 0, 1);
	end;
	line01$:='Normal Probability plot of Residuals|'+header$+line03$;
	graph:=NewGraph (SCATTERPLOT, line01$,
          'z-Value','Residual', nvalid, work2, work1);
	GraphSetPlotFitting (graph, 1, FIT_LINEAR);
Back to List of Programs



[StatSoft]
2300 East 14th Street, Tulsa, OK 74104
Phone: (918) 749-1119; Fax: (918) 749-2217

[StatSoft]e-mail: info@statsoft.com

©Copyright StatSoft, Inc., 1984-2004.
StatSoft, StatSoft logo, STATISTICA, SEWSS, SEDAS, Data Miner, SEPATH and GTrees are trademarks of StatSoft, Inc.