libname sqlbook 'C:\DM-SQL Book\data\final\saslib' ;
%macro buildcolumns;
%let counter=0;
proc sql;
select count(*)
into :numvars
from columns
where nextlevel = '';
%do %WHILE(&numvars > 1);
%let counter=%eval(&counter+1);
/* these values are more useful in a macro variable */
%LET vars=;
data _null_;
set columns;
where nextlevel = '';
call symput('vars' , symget('vars') || ' ' || trim(colname));
run;
%put &counter &numvars &vars;
/* create the correlation matrix */
proc corr data=datacopy outp=corr; var &vars;
run;
/* pivot the matrix to be a fact table */
proc transpose data=corr out=transcorr name=var2 prefix=corr;
by NOTSORTED _NAME_ ;
VAR &vars;
WHERE _type_ = 'CORR' ;
run;
/* rename correlation column names */
data transcorr (rename= _NAME_=var1 rename=corr1=corr);
set transcorr ;
where _NAME_ < var2;
run;
%LET maxvar1=;
%LET maxvar2=;
%LET maxcorr=0;
data maxcorr;
set transcorr;
if (abs(corr) > symget('maxcorr')) then do;
call symput('maxvar1', var1);
call symput('maxvar2', var2);
call symput('maxcorr', abs(corr));
end;
run;
%put maximums are &maxvar1 &maxvar2 &maxcorr;
/* now go back to the original data and calculate the principal component for these two variables */
proc princomp data=datacopy out=datacopy n=1 prefix=temp outstat=pcout;
var &maxvar1 &maxvar2;
run;
proc sql;
select coalesce(&maxvar1, &maxvar2)
into :eigenvalue
from pcout
where strip(_type_) = 'EIGENVAL' ;
%let pcname=cluster&counter;
data datacopy (rename=temp1=&pcname);
set datacopy;
run;
data columns (keep=colname nextlevel child1 child2 correlation eigenvalue counter parentcounter);
set columns;
retain rownum 0;
format counter 10. parentcounter 10. nextlevel $40.;
if (strip(colname) = strip(symget('maxvar1'))) OR (strip(colname) = strip(symget('maxvar2'))) then do;
nextlevel = trim(symget('pcname' ));
parentcounter = trim(symget('counter'));
end;
output;
parentcounter = .;
if (rownum = 0) then do;
colname = trim(symget('pcname' ));
nextlevel = '' ;
child1 = trim(symget('maxvar1' ));
child2 = trim(symget('maxvar2' ));
correlation = trim(symget('maxcorr' ));
eigenvalue = trim(symget('eigenvalue' ));
counter = trim(symget('counter'));
output;
end;
rownum+1;
run;
proc sql;
select count(*)
into :numvars
from columns
where nextlevel is null;
%put &numvars variables left;
%end; /* do while */
%mend buildcolumns;
/* copy the original data to add the additional variables */
data datacopy (keep=zipcode nohhdiploma coldegree hhmedincome hhnopubassist landareamiles);
set sqlbook.zipcensus;
nohhdiploma = popedunone + popedusomehs;
coldegree = popedubach + popedumast + popeduprofdoct;
run;
proc contents data=datacopy out=contents;
run;
proc sql;
create table columns as
select name as colname, 0 as counter
from contents
where type=1;
data columns;
set columns;
format nextlevel $40.;
nextlevel = '';
run;
%buildcolumns;
run;
/*
proc sql;
select *
from columns
where counter <= 3 < parentcounter
;
*/