*
;
%macro decile(y=,w=1,dsin=_last_,dout=,gout=,suffix=,print=1);                  
%* -------------------------------------------------------------------          
   Calculates decile shares etc for variable y with weight w.                   
   (gini coefficients are also calculated).                                     
   Special Note:  cases with weights which place them on the decile             
                  boundaries are split in order to provide exact decile         
                  shares.  However this does not deal with tied cases.          
                  If you want to allocate these you should add a small          
                  random variable to the y variable.                            
   Input:   y     variable whose distribution is to be described.               
            w     weight variable (default = 1).                                
            dsin  data set containing these variables (program is most          
                  efficient if no other variables are present)                  
                  (default = _last_).                                           
            dout  name of output dataset for deciles                            
            gout  name of output dataset for gini                               
            suffix Specify two letters (or numbers) to be appended to           
                  the output variable names.  This is useful if you             
                  want to combine output from separate runs of this             
                  program.                                                      
            print Specify 0 if you do not want printed output (def = 1)         
            Note: The program uses the library format decile.                   
    Output:       Decile boundaries, mean incomes, shares, cumulative           
                  shares and cumulative shares*mean.                            
                  Also gini coefficient.                                        
                  The dataset dsout will contain 11 cases representing          
                  the 10 deciles plus a total (placed last) with the            
                  following variables (where zz represents the suffix           
                  if specified),                                                
                    decile - decile 1...10 and 99 for the total.                
                    wcaseszz-number of weighted cases in decile.                
                    lbzz  -  lower bound of decile,                             
                    ubzz  -  upper bound                                        
                    meanzz - mean income in decile,                             
                    sharezz- income share of decile (in %)                      
                    csharezz-cumulative share (in %)                            
                    cmzz -  cumulative mean.                                    
                  In addition a data set with name gout will be                 
                  created with one case with the following variables,           
                    ginizz - gini coefficient                                   
                    meanzz, wcaseszz - as defined above,                        
                    nzz    - unweighted cases.                                  
    Author:       BWB Sept 90.                                                  
    Modifications BWB May 91, CM rather than CSM output.                        
  --------------------------------------------------------------------;         
                                                                                
* sort file                                                                     
  ---------;                                                                    
proc sort data=&dsin out=sorttmp;by &y;                                         
                                                                                
* Get the grand totals of the weight variable                                   
  -------------------------------------------;                                  
proc means noprint;                                                             
  var &w;                                                                       
  output out=gtotals sum=gtwgts;                                                
                                                                                
* Calculate decile rankings                                                     
  -------------------------;                                                    
data frank;                                                                     
  set sorttmp;                                                                  
  if _n_=1 then set gtotals;                                                    
                                                                                
  * check for missing cases;                                                    
  if &y=. | &w=. then error "missing values found";                             
                                                                                
  * initialise some variables;                                                  
  retain cs 0;                                                                  
  length  decile 2;                                                             
  retain cy 0;                                                                  
                                                                                
  cs = cs+&w;   /* cumulative sum of case weights */                            
  frank = cs/gtwgts;  /* fractional rank */                                     
  * determine the decile of the present case;                                   
  select;                                                                       
      when (frank<=0.10) decile = 1;  /* bottom decile */                       
      when (frank<=0.20) decile = 2;  /* next decile */                         
      when (frank<=0.30) decile = 3;                                            
      when (frank<=0.40) decile = 4;                                            
      when (frank<=0.50) decile = 5;                                            
      when (frank<=0.60) decile = 6;                                            
      when (frank<=0.70) decile = 7;                                            
      when (frank<=0.80) decile = 8;                                            
      when (frank<=0.90) decile = 9;                                            
      otherwise          decile = 10;  /* top decile    */                      
  end;                                                                          
                                                                                
  * Gini calculations;                                                          
  wy = &y * &w;    /* total income represented by present case */               
  cy = cy + wy;   /* cumulative income */                                       
  area = (cy - wy/2) * &w;    /* un-normalised lorenz area */                   
                                                                                
  original = 1;   /* flag for original cases */                                 
  length original 2;                                                            
                                                                                
  keep &y &w decile area wy original;                                           
                                                                                
  * adjustments for cases which straddle the quantile boundary;                 
  ldecile = lag(decile);                                                        
  if ldecile^=. & ldecile^=decile then do;                                      
    * i.e. just for the first case over the boundary;                           
    * find the fractional rank of the boundary just passed;                     
    select;                                                                     
        when (decile = 2) bound = .1         ;                                  
        when (decile = 3) bound = .2         ;                                  
        when (decile = 4) bound = .3;                                           
        when (decile = 5) bound = .4;                                           
        when (decile = 6) bound = .5;                                           
        when (decile = 7) bound = .6;                                           
        when (decile = 8) bound = .7;                                           
        when (decile = 9) bound = .8;                                           
        when (decile = 10) bound = .9;                                          
    end;                                                                        
    * create temporary variables;                                               
    tempw = &w;                                                                 
    tdecile = decile;                                                           
                                                                                
    * output a case below boundary;                                             
    decile = ldecile;  /* decile is value below boundary */                     
    &w = &w-(frank-bound)*gtwgts;                                               
    * e.g. if the boundary is 0.1, and the present cases fractional             
      rank is 0.1004, and there are a total of 1,000,000 weighted cases         
      and the weight of the present case is 1000, then this will split          
      into 1000-(0.1004-0.1)*1,000,000 = 400 cases below the boundary           
      and 600 above.;                                                           
    output;                                                                     
                                                                                
    * output case above quantile boundary;                                      
    decile = tdecile;  /* i.e. the original decile variable */                  
    &w = (frank-bound)*gtwgts;                                                  
    area = 0;  /* not needed a second time for ginis */                         
    wy = 0;                                                                     
    original = 0;  /* used in counting no of unweighted cases */                
    output;                                                                     
  end;  /* of cases straddling the boundary */                                  
                                                                                
  else output;   /* output other cases */                                       
                                                                                
run;                                                                            
                                                                                
* stats for deciles                                                             
  -----------------;                                                            
proc summary data=frank ;    /* this outputs one case per decile + */           
  weight &w;                 /* a case for the whole file          */           
  class decile;                                                                 
  var &y ;                                                                      
  output sumwgt(&y)=wcases&suffix mean(&y)=mean&suffix sum(&y)=totaly           
         min(&y)=lb&suffix max(&y)=ub&suffix                                    
        ;                                                                       
                                                                                
data ;                                                                          
  set _last_;                                                                   
  if _type_=0 then do;   /* ie grand total */                                   
    gtw=wcases&suffix;  /* grand total weighted cases */                        
    gty=totaly;  /* grand total weighted y */                                   
    gtm=mean&suffix;   /* overall mean y */                                     
    decile=99;                                                                  
  end;                                                                          
  retain gtw gty gtm;      /* grand totals to be used */                        
  share&suffix= 100*totaly/gty; /* income share of decile in %*/                
  retain cshare&suffix 0;                                                       
  cshare&suffix = share&suffix + cshare&suffix;                                 
                                       /* cumulative income share */            
  if _type_=0 then cshare&suffix = 0;/* reinitialise for totals case */         
  cm&suffix = (cshare&suffix*gtm/100)/(decile/10);                              
                                                                                
  keep _type_ decile share&suffix mean&suffix wcases&suffix                     
     cshare&suffix cm&suffix lb&suffix ub&suffix;                               
  * these are the variables to be found in the dataset dout;                    
                                                                                
* Put the totals last;                                                          
proc sort;                                                                      
  by decile;                                                                    
                                                                                
data &dout;                                                                     
  set _last_;                                                                   
  if decile=99 then do;                                                         
     cshare&suffix=.;                                                           
     cm&suffix=.;                                                               
     end;                                                                       
run;                                                                            
                                                                                
* print decile shares;                                                          
%if &print %then %do;                                                           
  proc print split='|';                                                         
    Title3 "Decile Shares for &y";                                              
    title4 "from data set &dsin with weight variable &w";                       
    format decile decile. /* this format is in the sasutil library */           
           lb&suffix ub&suffix comma9.0                                         
           mean&suffix comma8.0 share&suffix cshare&suffix 6.2                  
           cm&suffix comma8.0 wcases&suffix comma10.0;                          
    id decile;                                                                  
    var lb&suffix ub&suffix mean&suffix share&suffix cshare&suffix              
        cm&suffix wcases&suffix ;                                               
    label lb&suffix = 'Lower|Bound';                                            
    label ub&suffix = 'Upper|Bound';                                            
    label mean&suffix = 'Mean';                                                 
    label share&suffix = 'Share';                                               
    label cshare&suffix = 'Cumulative|Share';                                   
    label cm&suffix = 'Cumulative|Mean';                                        
    label wcases&suffix = 'Weighted|Cases';                                     
%end;                                                                           
                                                                                
                                                                                
* do gini calculation                                                           
  -------------------;                                                          
proc summary data=frank;   /* produces just one case */                         
  var area wy &w original;                                                      
  output sum(area)=tarea sum(wy)=ty                                             
         sum(&w)=sumwgt sum(original)=n&suffix;                                 
                                                                                
data &gout;                                                                     
  set _last_;                                                                   
  wcases&suffix = sumwgt;  /* grand total weighted cases */                     
  mean&suffix = ty/sumwgt;  /* mean  y */                                       
  gini&suffix = 1 - 2*(tarea/(ty*sumwgt));                                      
  keep gini&suffix mean&suffix wcases&suffix n&suffix ;                         
run;                                                                            
* print gini statistics;                                                        
%if &print %then %do;                                                           
  proc print;                                                                   
    title3 "Gini Coefficient for variable &y weighted by &w";                   
    title4 "from data set &dsin";                                               
    format gini&suffix 5.3 ;                                                    
    format mean&suffix comma10.0  ;                                             
    format wcases&suffix comma20.0;                                             
    format n&suffix comma8.0;                                                   
  run;                                                                          
%end;                                                                           
* reset the titles;                                                             
title3;                                                                         
                                                                                
%mend decile;
                                                                   
*
;