*
;

/*Subject: Multicolinearity Summary
From: kam@hc1.health.sa.gov.au (Kieran McCaul)

A few people have contacted me asking for a summary of the posts related to testing for multcolinearity. I’ve 
included the responces from Paul Allison, Jim Handsfield and Mathew Zack below.

The macro that Matthew sent me works well.

----------------------------------------------------------------------------------------------------------


Just run the run the equivalent model with PROC REG and request the
collinearity diagnostics (TOL VIF COLLIN). The degree of collinearity has
nothing to do with the left-hand side of the equation.

Paul D. Allison
allison@ssdc.sas.upenn.edu
Sociology Dept.                                           Voice: 215-898-6717
276 McNeil Bldg.                                        Fax: 215-573-2081
3718 Locust Walk                                        Home: 610-642-8876
University of Pennsylvania
Philadelphia, PA  19104-6299

----------------------------------------------------------------------------------------------


Use CATMOD and an over parameterized model.  If any of the interaction terms 
has significant contribution to the model, then there is a collinearity 
problem.  The CATMOD documentation has an extensive discussion of this 
problem.

Jim Handsfield                          |       Voice:  (404)488-7665
Centers for Disease Control             |       FAX:    (404)488-7663
  and Prevention (CDC)                  |
Mail Stop G25                           |
4770 Buford Highway, NE                 |
Atlanta, GA 30341                       |     jhh0@phpdls1.em.cdc.gov
The opinions expressed are my own and do not necessarily reflect those of
CDC.  Use of product names should not be construed as endorsement by CDC.


----------------------------------------------------------------------------------------------



The following SAS macro, %COLLIN, detects collinearity among the independent 
variables in logistic regression or Cox proportional hazards modelling.

To use the macro, you have to specify first the OUTEST and the COVOUT 
options on the PROC LOGISTIC/PROC PHREG statement to generate the 
variance-covariance matrix, which is then read into PROC IML within the 
macro %COLLIN.  For PROC LOGISTIC, condition indexes >= 10 suggest 
significant collinearity; variables that have large variance decomposition 
proportions (VDP's) under these large condition indexes may be collinear. 
 For PROC PHREG, the threshold for a large condition index is smaller, 
probably >=3.

Matthew Zack
============================================================
*/

option pageno=1 pagesize=54;

* Collinearity diagnostics using the information matrix;

%macro collin(covdsn=);

%* Macro to calculate collinearity diagnostics from ;
%*  variance-covariance matrix in nonlinear regression;

%* REF: DAVIS CE, HYDE JE, BANGDIWALA SI, NELSON JJ.;
%*       AN EXAMPLE OF DEPENDENCIES AMONG VARIABLES IN A;
%*       CONDITIONAL LOGISTIC REGRESSION.  IN: MOOLGAVKAR SH,;
%*       PRENTICE RL, EDS.  MODERN STATISTICAL METHODS IN;
%*       CHRONIC DISEASE EPIDEMIOLOGY.  NEW YORK:;
%*       JOHN WILEY & SONS, INC., 1986:140-7.;

%* In your nonlinear regression program (PROC LOGISTIC or  ;
%*  PROC PHREG), specify the COVOUT and the OUTEST=SASdsn ;
%*  options in the PROC statement.  Then, specify the SAS data set;
%*  (SASdsn) in the macro variable, COVDSN, when you invoke this macro.;

%* This macro does not work with PROC GENMOD.;

%if (&covdsn ne ) %then %do;

*option mprint;

%let __stop=0;

proc iml;
  use &covdsn;
  read all var {_name_} into _varname;
  _nrvname=nrow(_varname);
  if (_nrvname>1) then do;
     _varnam2=_varname(|2:_nrvname, |);
     nmissing=j(nrow(_varnam2),1,.);
     labels={"Eigenval","CondIndx","        "};
     _varnam2=labels//_varnam2;
     free _varname labels;
     read all var _num_ into varcov(|colname=_nvname|);
     _nrcvc=ncol(varcov);
     lastvnam=_nvname(|1,_nrcvc|);
     if (lastvnam="_LNLIKE_")
        then varcov2=varcov(|2:_nrvname,1:_nrcvc-1|);
     if (lastvnam^="_LNLIKE_") then varcov2=varcov(|2:_nrvname,|);
     free varcov _nrcvc lastvnam;
     covbinv=inv(varcov2);
     scale=inv(sqrt(diag(covbinv)));
     r=scale*covbinv*scale;
     free covbinv scale;
     call eigen(musqr,v,r);
     free r;
     srootmus=sqrt(musqr);
     ci=1/(srootmus/max(srootmus));
     phi=(v##2)*diag(musqr##(-1));
     sumphi=phi(|,+|);
     pi=phi#(sumphi##(-1));
     free phi sumphi srootmus v;
     final=(musqr||ci||nmissing||pi`)`;
     free pi musqr ci nmissing;
     _ncfinal=ncol(final);
     _nrfinal=nrow(final);
     final2=j(_nrfinal,_ncfinal,0);
     _ncfp1=_ncfinal+1;
     __vdp="VDP";
     do i=1 to _ncfinal;
        final2(|,_ncfp1-i|)=final(|,i|);
        x=char(i,3);
        y=compress(concat(__vdp,x));
        if i=1 then _vdpname=y;
           else _vdpname=_vdpname||y;
     end;
     free final _nrfinal _ncfinal i x y;
     create final2 from final2(|rowname=_varnam2 colname=_vdpname|);
     append from final2(|rowname=_varnam2 colname=_vdpname|);
     free _varnam2 _vdpname final2;
  end;
  if (_nrvname=1) then do;
     x="1";
     call symput("__stop",left(x));
     print " ";
     print "**********************************************************";
     print "You need to specify the  COVOUT  option";
     print " in either PROC LOGISTIC or PROC PHREG.";
     print " This program will not calculate collinearity diagnostics.";
     print "**********************************************************";
     print " ";
  end;
  quit;
run;

%if (&__stop eq 0) %then %do;
   proc print data=final2 label noobs;
     id _varnam2;
     title8 "Collinearity diagnostics for nonlinear models using";
     title9 "the information matrix:  Eigenvalues, condition indexes,";
     title10 "and variance decomposition proportions (VDP's)";
     label _varnam2="Variable";
   run;
%end;

%end;
%else %do;
   %put;
   %put "*******************************************************";
   %put "When you invoke this macro, you have to specify the name";
   %put " of a SAS data set that contains the variance-covariance";
   %put " matrix from either PROC LOGISTIC or PROC PHREG.";
   %put;
   %put "You can create this matrix by including the following options";
   %put " on the PROC statement:  COVOUT  and  OUTEST=SASdsn,";
   %put " where SASdsn is the name of the SAS data set containing";
   %put " the variance-covariance matrix.";
   %put "*******************************************************";
   %put;
%end;

%mend collin;
*                                                              ;
%collin(covdsn=);

*
;