libname sqlbook 'C:\DM-SQL Book\data\final\saslib' ; %macro buildcolumns; %let counter=0; proc sql; select count(*) into :numvars from columns where nextlevel = ''; %do %WHILE(&numvars > 1); %let counter=%eval(&counter+1); /* these values are more useful in a macro variable */ %LET vars=; data _null_; set columns; where nextlevel = ''; call symput('vars' , symget('vars') || ' ' || trim(colname)); run; %put &counter &numvars &vars; /* create the correlation matrix */ proc corr data=datacopy outp=corr; var &vars; run; /* pivot the matrix to be a fact table */ proc transpose data=corr out=transcorr name=var2 prefix=corr; by NOTSORTED _NAME_ ; VAR &vars; WHERE _type_ = 'CORR' ; run; /* rename correlation column names */ data transcorr (rename= _NAME_=var1 rename=corr1=corr); set transcorr ; where _NAME_ < var2; run; %LET maxvar1=; %LET maxvar2=; %LET maxcorr=0; data maxcorr; set transcorr; if (abs(corr) > symget('maxcorr')) then do; call symput('maxvar1', var1); call symput('maxvar2', var2); call symput('maxcorr', abs(corr)); end; run; %put maximums are &maxvar1 &maxvar2 &maxcorr; /* now go back to the original data and calculate the principal component for these two variables */ proc princomp data=datacopy out=datacopy n=1 prefix=temp outstat=pcout; var &maxvar1 &maxvar2; run; proc sql; select coalesce(&maxvar1, &maxvar2) into :eigenvalue from pcout where strip(_type_) = 'EIGENVAL' ; %let pcname=cluster&counter; data datacopy (rename=temp1=&pcname); set datacopy; run; data columns (keep=colname nextlevel child1 child2 correlation eigenvalue counter parentcounter); set columns; retain rownum 0; format counter 10. parentcounter 10. nextlevel $40.; if (strip(colname) = strip(symget('maxvar1'))) OR (strip(colname) = strip(symget('maxvar2'))) then do; nextlevel = trim(symget('pcname' )); parentcounter = trim(symget('counter')); end; output; parentcounter = .; if (rownum = 0) then do; colname = trim(symget('pcname' )); nextlevel = '' ; child1 = trim(symget('maxvar1' )); child2 = trim(symget('maxvar2' )); correlation = trim(symget('maxcorr' )); eigenvalue = trim(symget('eigenvalue' )); counter = trim(symget('counter')); output; end; rownum+1; run; proc sql; select count(*) into :numvars from columns where nextlevel is null; %put &numvars variables left; %end; /* do while */ %mend buildcolumns; /* copy the original data to add the additional variables */ data datacopy (keep=zipcode nohhdiploma coldegree hhmedincome hhnopubassist landareamiles); set sqlbook.zipcensus; nohhdiploma = popedunone + popedusomehs; coldegree = popedubach + popedumast + popeduprofdoct; run; proc contents data=datacopy out=contents; run; proc sql; create table columns as select name as colname, 0 as counter from contents where type=1; data columns; set columns; format nextlevel $40.; nextlevel = ''; run; %buildcolumns; run; /* proc sql; select * from columns where counter <= 3 < parentcounter ; */