*
; /*Subject: Multicolinearity Summary From: kam@hc1.health.sa.gov.au (Kieran McCaul) A few people have contacted me asking for a summary of the posts related to testing for multcolinearity. I’ve included the responces from Paul Allison, Jim Handsfield and Mathew Zack below. The macro that Matthew sent me works well. ---------------------------------------------------------------------------------------------------------- Just run the run the equivalent model with PROC REG and request the collinearity diagnostics (TOL VIF COLLIN). The degree of collinearity has nothing to do with the left-hand side of the equation. Paul D. Allison allison@ssdc.sas.upenn.edu Sociology Dept. Voice: 215-898-6717 276 McNeil Bldg. Fax: 215-573-2081 3718 Locust Walk Home: 610-642-8876 University of Pennsylvania Philadelphia, PA 19104-6299 ---------------------------------------------------------------------------------------------- Use CATMOD and an over parameterized model. If any of the interaction terms has significant contribution to the model, then there is a collinearity problem. The CATMOD documentation has an extensive discussion of this problem. Jim Handsfield | Voice: (404)488-7665 Centers for Disease Control | FAX: (404)488-7663 and Prevention (CDC) | Mail Stop G25 | 4770 Buford Highway, NE | Atlanta, GA 30341 | jhh0@phpdls1.em.cdc.gov The opinions expressed are my own and do not necessarily reflect those of CDC. Use of product names should not be construed as endorsement by CDC. ---------------------------------------------------------------------------------------------- The following SAS macro, %COLLIN, detects collinearity among the independent variables in logistic regression or Cox proportional hazards modelling. To use the macro, you have to specify first the OUTEST and the COVOUT options on the PROC LOGISTIC/PROC PHREG statement to generate the variance-covariance matrix, which is then read into PROC IML within the macro %COLLIN. For PROC LOGISTIC, condition indexes >= 10 suggest significant collinearity; variables that have large variance decomposition proportions (VDP's) under these large condition indexes may be collinear. For PROC PHREG, the threshold for a large condition index is smaller, probably >=3. Matthew Zack ============================================================ */ option pageno=1 pagesize=54; * Collinearity diagnostics using the information matrix; %macro collin(covdsn=); %* Macro to calculate collinearity diagnostics from ; %* variance-covariance matrix in nonlinear regression; %* REF: DAVIS CE, HYDE JE, BANGDIWALA SI, NELSON JJ.; %* AN EXAMPLE OF DEPENDENCIES AMONG VARIABLES IN A; %* CONDITIONAL LOGISTIC REGRESSION. IN: MOOLGAVKAR SH,; %* PRENTICE RL, EDS. MODERN STATISTICAL METHODS IN; %* CHRONIC DISEASE EPIDEMIOLOGY. NEW YORK:; %* JOHN WILEY & SONS, INC., 1986:140-7.; %* In your nonlinear regression program (PROC LOGISTIC or ; %* PROC PHREG), specify the COVOUT and the OUTEST=SASdsn ; %* options in the PROC statement. Then, specify the SAS data set; %* (SASdsn) in the macro variable, COVDSN, when you invoke this macro.; %* This macro does not work with PROC GENMOD.; %if (&covdsn ne ) %then %do; *option mprint; %let __stop=0; proc iml; use &covdsn; read all var {_name_} into _varname; _nrvname=nrow(_varname); if (_nrvname>1) then do; _varnam2=_varname(|2:_nrvname, |); nmissing=j(nrow(_varnam2),1,.); labels={"Eigenval","CondIndx"," "}; _varnam2=labels//_varnam2; free _varname labels; read all var _num_ into varcov(|colname=_nvname|); _nrcvc=ncol(varcov); lastvnam=_nvname(|1,_nrcvc|); if (lastvnam="_LNLIKE_") then varcov2=varcov(|2:_nrvname,1:_nrcvc-1|); if (lastvnam^="_LNLIKE_") then varcov2=varcov(|2:_nrvname,|); free varcov _nrcvc lastvnam; covbinv=inv(varcov2); scale=inv(sqrt(diag(covbinv))); r=scale*covbinv*scale; free covbinv scale; call eigen(musqr,v,r); free r; srootmus=sqrt(musqr); ci=1/(srootmus/max(srootmus)); phi=(v##2)*diag(musqr##(-1)); sumphi=phi(|,+|); pi=phi#(sumphi##(-1)); free phi sumphi srootmus v; final=(musqr||ci||nmissing||pi`)`; free pi musqr ci nmissing; _ncfinal=ncol(final); _nrfinal=nrow(final); final2=j(_nrfinal,_ncfinal,0); _ncfp1=_ncfinal+1; __vdp="VDP"; do i=1 to _ncfinal; final2(|,_ncfp1-i|)=final(|,i|); x=char(i,3); y=compress(concat(__vdp,x)); if i=1 then _vdpname=y; else _vdpname=_vdpname||y; end; free final _nrfinal _ncfinal i x y; create final2 from final2(|rowname=_varnam2 colname=_vdpname|); append from final2(|rowname=_varnam2 colname=_vdpname|); free _varnam2 _vdpname final2; end; if (_nrvname=1) then do; x="1"; call symput("__stop",left(x)); print " "; print "**********************************************************"; print "You need to specify the COVOUT option"; print " in either PROC LOGISTIC or PROC PHREG."; print " This program will not calculate collinearity diagnostics."; print "**********************************************************"; print " "; end; quit; run; %if (&__stop eq 0) %then %do; proc print data=final2 label noobs; id _varnam2; title8 "Collinearity diagnostics for nonlinear models using"; title9 "the information matrix: Eigenvalues, condition indexes,"; title10 "and variance decomposition proportions (VDP's)"; label _varnam2="Variable"; run; %end; %end; %else %do; %put; %put "*******************************************************"; %put "When you invoke this macro, you have to specify the name"; %put " of a SAS data set that contains the variance-covariance"; %put " matrix from either PROC LOGISTIC or PROC PHREG."; %put; %put "You can create this matrix by including the following options"; %put " on the PROC statement: COVOUT and OUTEST=SASdsn,"; %put " where SASdsn is the name of the SAS data set containing"; %put " the variance-covariance matrix."; %put "*******************************************************"; %put; %end; %mend collin; * ; %collin(covdsn=); *;