STATISTICA







STATISTICA BASIC Program CorrCI(Casewise).stb

{ This STATISTICA BASIC program will provide the approximate confidence interval for the correlation coefficients of a list of variables. This is done using a normal approximation. To demonstrate the manner in which the confidence intervals are produced, a graph can be drawn. This will show the full range of possible correlation coefficients and the confidence intervals for those values at the given percentage and number of observations. Missing data can be handled by casewise deletion in this program. For pairwise deletion, see the alternative BASIC program CorrCI(Pairwise).stb.

Program written, modified, or edited at StatSoft, Inc.}


RandomAccess;
NoDataFileVariableNames;

ReDim Selection (NVars);
ReDim TempColumn1 (NCases);

{make sure that the data can be analyzed}

if NCases > 32767 then begin
	DisplayMessageBox (MB_OK, 'Data Too Large',
	'This BASIC program can analyze up to 32767 cases.  The data has too many cases.');
	stop;
end;

{obtain variable list}

If 0 =
SelectVariables1 ('Select Variables for Correlation',
	2, NVars, Selection, NSel, 'Which Variables?')
then STOP;

{obtain the percentage for the confidence interval}

PctCI := 95;

If 0 =
DisplayNumericInputBox ('Confidence Interval', 'Select Percent', PctCI)
then STOP;

{find the total number of combinations that will be calculated}

NCalcs := NSel * (NSel - 1) / 2;

ReDim ResultVector(NCalcs,6);
ReDim DisplayVector(NCalcs,6);
ReDim NameVector(NCalcs,2);
ReDim NewData (NCases, NSel);
ReDim VarFlag (NSel);
ReDim MissFlag (NSel);
ReDim TempRow (NSel);
ReDim CorrResult (NSel,NSel);

{get new data matrix from selected variables}

For i := 1 to NSel do begin
	MatrixGetColumn (Data, Selection(i), TempColumn1);
	MatrixSetColumn (NewData, i, TempColumn1);
end;

{check for missing data}

For i:= 1 to NSel do begin
	MatrixGetColumn (NewData, i, TempColumn1);
	if NCases - ValCount (TempColumn1, 1, 0) > 0 then MissFlag(i) := 1 else MissFlag(i) := 0;
end;

{deal with missing data}

missingcode := 0;

If MatrixAnyNonZero (MissFlag) > 0 then begin
	missingcode := 1;
	if IDCANCEL = DisplayMessageBox (MB_OKCANCEL, 'Missing Data Found',
		'Chose OK for Casewise Deletion or Cancel to End') then STOP
	else begin
		ReDim NewData2 (NCases, NSel);
		casecounter := 1;
		for i := 1 to NCases do begin
			MatrixGetRow (NewData, i, TempRow);
			If NSel - ValCount (TempRow, 1, 0) = 0 then begin
				MatrixSetRow (NewData2, casecounter, TempRow);
				casecounter := casecounter + 1;
			end;
		end;
		ReDim NewData ((casecounter-1), NSel);
		MatrixCopy (NewData2, 1, 1, 0, 0, NewData, 1, 1);
	end;
end;

{if there are missing data, change the number of cases to be dealt with}

if missingcode = 1 then NewNCases := casecounter - 1 else NewNCases := NCases;

if NewNCases < 3 then begin
	DisplayMessageBox (MB_OK, 'Error', 'Less Than Three Valid Cases Remain.');
	STOP;
end;

{check for variables with no variance}

ReDim TempColumn1 (NewNCases);
ReDim VarFlag (NSel);

For i:= 1 to NSel do begin
	MatrixGetColumn (NewData, i, TempColumn1);
	ValVariance (TempColumn1, 1, 0, varresult);
	if varresult = 0 then VarFlag(i) := 1 else VarFlag(i) := 0;
end;

{if variables have no variance, remove them from calculations}

If MatrixAnyNonZero (VarFlag) > 0 then begin
	ReDim NewData2 (NewNCases, NSel);
	varcounter := 1;
	for i := 1 to NSel do
		if VarFlag(i) = 0 then begin
			MatrixGetColumn (NewData, i, TempColumn1);
			MatrixSetColumn (NewData2, varcounter, TempColumn1);
			varcounter := varcounter + 1;
		end;

	ReDim CorrResult ((varcounter-1), (varcounter-1));
	ReDim NewData (NewNCases, (varcounter-1));
	MatrixCopy (NewData2, 1, 1, 0, 0, NewData, 1, 1);

end;

{get number of valid variables remaining}

ValSum (VarFlag, 1, 0, NNoVar);
NewNSel := NSel - NNoVar;

{get number of calculations for valid variables}

NewNCalcs := NewNSel * (NewNSel - 1) / 2;

{if only one variable has variance then no correlations can be computed}

if NewNSel < 2 then begin
	DisplayMessageBox (MB_OK, 'Error', 'Less Than Two Valid Variables Remain.');
	STOP;
end;

{compute correlations}

MatrixCorrelations (NewData, 1, CorrResult);

{now extract results and compute confidence interval / p-value}

ReDim TempRow (NewNSel);

counter := 1;
for i := 1 to (NewNSel - 1) do begin
	for j := (i + 1) to NewNSel do begin

	ResultVector(counter,1) := NewNCases;
	ResultVector(counter,2) := CorrResult(i,j);

	zscore := Log ((1 + CorrResult(i,j)) / (1 - CorrResult(i,j))) / 2;
	minus := CorrResult(i,j) / (2 * (ResultVector(counter,1) - 1));
	range := (VNormal ((1 - ((100 - PctCI)/200)), 0, 1)) / (Sqrt (ResultVector(counter,1) - 3));

	transLCL := zscore - minus - range;
	transUCL := zscore - minus + range;

	if CorrResult (i,j) > -.999999999999999 then begin
		ResultVector(counter,3) := (Exp (2 * transLCL) - 1) / (Exp (2 * transLCL) + 1);
		ResultVector(counter,4) := (Exp (2 * transUCL) - 1) / (Exp (2 * transUCL) + 1);
	end else begin
		ResultVector(counter,3) := 1/0;
		ResultVector(counter,4) := 1/0;
	end;

	ResultVector(counter,5) :=
		CorrResult(i,j) * Sqrt (NewNCases - 2) / (Sqrt (1 - (CorrResult(i,j)*CorrResult(i,j))));
	ResultVector(counter,6) := 2 * (1 - IStudent (abs(ResultVector(counter,5)), (NewNCases - 2)));

	counter := counter + 1;

	end;
end;

displaycounter := 1;
resultcounter := 1;

{tidy results for display}

for i := 1 to (NSel-1) do
	for j := (i+1) to NSel do begin
		NameVector (displaycounter, 1) := i;
		NameVector (displaycounter, 2) := j;
		if (VarFlag(i) = 1) or (VarFlag(j) = 1) then begin
			DisplayVector (displaycounter, 1) := 1/0;
			DisplayVector (displaycounter, 2) := 1/0;
			DisplayVector (displaycounter, 3) := 1/0;
			DisplayVector (displaycounter, 4) := 1/0;
			DisplayVector (displaycounter, 5) := 1/0;
			DisplayVector (displaycounter, 6) := 1/0;
		end
		else begin
			DisplayVector (displaycounter, 1) := ResultVector (resultcounter, 1);
			DisplayVector (displaycounter, 2) := ResultVector (resultcounter, 2);
			DisplayVector (displaycounter, 3) := ResultVector (resultcounter, 3);
			DisplayVector (displaycounter, 4) := ResultVector (resultcounter, 4);
			DisplayVector (displaycounter, 5) := ResultVector (resultcounter, 5);
			DisplayVector (displaycounter, 6) := ResultVector (resultcounter, 6);
			resultcounter := resultcounter + 1;
		end;
	displaycounter := displaycounter + 1;
	end;

{make the Scrollsheet}

if missingcode = 0 then TopTitle$ := 'Correlations and CI|'
	else TopTitle$ := 'Correlations and CI (Casewise Deletion)|';

TopTitle$ := TopTitle$ + 'Highlighted Correlations do not include|Zero in the '
	+ Str (PctCI, 2, 0) + '% Confidence Interval';

display := NewScrollsheet (NCalcs, 6, DisplayVector, TopTitle$, ?RowNames$,
	 'N|Corr.|-' + Str (PctCI, 2, 0) + '% CI|+' + Str (PctCI, 2, 0) + '% CI|t-score| ');

{format the Scrollsheet}

ScrollsheetSetRowNameWidth (display, 18);
ScrollsheetSetColumnFormat (display, 1, SCF_INTEGER, 5);
ScrollsheetSetColumnName (display, 6, '2-sided', 'p-value');

{make row labels and highlighting where appropriate}

for i := 1 to NCalcs do begin
	NoLetters := Len (VarName (Selection(NameVector(i,1))));
	if NoLetters = 8 then ThisCaseName$ := VarName (Selection(NameVector(i,1))) + '  '
		+ VarName (Selection(NameVector(i,2)))
	else begin
		ThisCaseName$ := '';
		for j := NoLetters to 7 do ThisCaseName$ := ThisCaseName$ + ' ';
		ThisCaseName$ := ThisCaseName$ + VarName (Selection(NameVector(i,1))) + '  '
		+ VarName (Selection(NameVector(i,2)));
	end;
	ScrollsheetSetRowName (display, i, ThisCaseName$);
	ScrollsheetGetValue (display, i, 2, val1);
	if (val1 = 1) or (val1 < -.9999999999999) then ScrollsheetSetHilite (display, i, 2, 1);
	ScrollsheetGetValue (display, i, 3, val2);
	if val2 > 0 then ScrollsheetSetHilite (display, i, 2, 1);
	ScrollsheetGetValue (display, i, 4, val3);
	if val3 < 0 then ScrollsheetSetHilite (display, i, 2, 1);
	ScrollsheetGetValue (display, i, 6, val4);
	if val4 < (1 - (PctCI / 100)) then ScrollsheetSetHilite (display, i, 6, 1);
end;


{create a graph to show the bulge in the confidence limits}

ReDim RValue (251);
ReDim NewLRValue (251);
ReDim NewRValue (251);
ReDim NewURValue (251);


for i := 0 to 250 do begin

	r := i/125 - 1;

	z := Log((1+r)/(1-r))/2;
	minus := r/(2*(NewNCases-1));
	u := VNormal ((1-((100-PctCI)/200)), 0, 1);
	range := u/(Sqrt (NewNCases-3));


{Z transformed range}
	UCL := z-minus+range;
	LCL := z-minus-range;

{transform back}
	rUCL := (Exp(2*UCL)-1)/(Exp(2*UCL)+1);
	rLCL := (Exp(2*LCL)-1)/(Exp(2*LCL)+1);

	RValue (i+1) := r;
	NewLRValue (i+1) := rLCL;
	NewRValue (i+1) := r;
	NewURValue (i+1) := rUCL;

end;

CIgraph := NewGraph (LINEPLOT, 'Display of Confidence Limit Ranges|'
	+ Str (NewNCases, 6, 0) + ' Cases at ' + Str (PctCI, 4, 1) + '% Confidence Interval',
	?Title$, ?Title$, 251, RValue, NewRValue);

GraphAddPlot (CIgraph, LINEPLOT, ?Name$, 251, RValue, NewLRValue);
GraphAddPlot (CIgraph, LINEPLOT, ?Name$, 251, RValue, NewURValue);

GraphSetScaling (CIgraph, AX_X, SCALING_MANUAL_0, -1, 1, .2);
GraphSetScaling (CIgraph, AX_Y, SCALING_MANUAL_0, -1, 1, .2);

GraphSetPlotLineStyle (CIgraph, 1, ON, L_SOLID, ?Size, RED);
GraphSetPlotLineStyle (CIgraph, 2, ON, L_SHORT_DASH, ?Size, BLACK);
GraphSetPlotLineStyle (CIgraph, 3, ON, L_SHORT_DASH, ?Size, BLACK);

Back to List of Programs



[StatSoft]
2300 East 14th Street, Tulsa, OK 74104
Phone: (918) 749-1119; Fax: (918) 749-2217

[StatSoft]e-mail: info@statsoft.com

©Copyright StatSoft, Inc., 1984-2004.
StatSoft, StatSoft logo, STATISTICA, SEWSS, SEDAS, Data Miner, SEPATH and GTrees are trademarks of StatSoft, Inc.