*
; %macro decile(y=,w=1,dsin=_last_,dout=,gout=,suffix=,print=1); %* ------------------------------------------------------------------- Calculates decile shares etc for variable y with weight w. (gini coefficients are also calculated). Special Note: cases with weights which place them on the decile boundaries are split in order to provide exact decile shares. However this does not deal with tied cases. If you want to allocate these you should add a small random variable to the y variable. Input: y variable whose distribution is to be described. w weight variable (default = 1). dsin data set containing these variables (program is most efficient if no other variables are present) (default = _last_). dout name of output dataset for deciles gout name of output dataset for gini suffix Specify two letters (or numbers) to be appended to the output variable names. This is useful if you want to combine output from separate runs of this program. print Specify 0 if you do not want printed output (def = 1) Note: The program uses the library format decile. Output: Decile boundaries, mean incomes, shares, cumulative shares and cumulative shares*mean. Also gini coefficient. The dataset dsout will contain 11 cases representing the 10 deciles plus a total (placed last) with the following variables (where zz represents the suffix if specified), decile - decile 1...10 and 99 for the total. wcaseszz-number of weighted cases in decile. lbzz - lower bound of decile, ubzz - upper bound meanzz - mean income in decile, sharezz- income share of decile (in %) csharezz-cumulative share (in %) cmzz - cumulative mean. In addition a data set with name gout will be created with one case with the following variables, ginizz - gini coefficient meanzz, wcaseszz - as defined above, nzz - unweighted cases. Author: BWB Sept 90. Modifications BWB May 91, CM rather than CSM output. --------------------------------------------------------------------; * sort file ---------; proc sort data=&dsin out=sorttmp;by &y; * Get the grand totals of the weight variable -------------------------------------------; proc means noprint; var &w; output out=gtotals sum=gtwgts; * Calculate decile rankings -------------------------; data frank; set sorttmp; if _n_=1 then set gtotals; * check for missing cases; if &y=. | &w=. then error "missing values found"; * initialise some variables; retain cs 0; length decile 2; retain cy 0; cs = cs+&w; /* cumulative sum of case weights */ frank = cs/gtwgts; /* fractional rank */ * determine the decile of the present case; select; when (frank<=0.10) decile = 1; /* bottom decile */ when (frank<=0.20) decile = 2; /* next decile */ when (frank<=0.30) decile = 3; when (frank<=0.40) decile = 4; when (frank<=0.50) decile = 5; when (frank<=0.60) decile = 6; when (frank<=0.70) decile = 7; when (frank<=0.80) decile = 8; when (frank<=0.90) decile = 9; otherwise decile = 10; /* top decile */ end; * Gini calculations; wy = &y * &w; /* total income represented by present case */ cy = cy + wy; /* cumulative income */ area = (cy - wy/2) * &w; /* un-normalised lorenz area */ original = 1; /* flag for original cases */ length original 2; keep &y &w decile area wy original; * adjustments for cases which straddle the quantile boundary; ldecile = lag(decile); if ldecile^=. & ldecile^=decile then do; * i.e. just for the first case over the boundary; * find the fractional rank of the boundary just passed; select; when (decile = 2) bound = .1 ; when (decile = 3) bound = .2 ; when (decile = 4) bound = .3; when (decile = 5) bound = .4; when (decile = 6) bound = .5; when (decile = 7) bound = .6; when (decile = 8) bound = .7; when (decile = 9) bound = .8; when (decile = 10) bound = .9; end; * create temporary variables; tempw = &w; tdecile = decile; * output a case below boundary; decile = ldecile; /* decile is value below boundary */ &w = &w-(frank-bound)*gtwgts; * e.g. if the boundary is 0.1, and the present cases fractional rank is 0.1004, and there are a total of 1,000,000 weighted cases and the weight of the present case is 1000, then this will split into 1000-(0.1004-0.1)*1,000,000 = 400 cases below the boundary and 600 above.; output; * output case above quantile boundary; decile = tdecile; /* i.e. the original decile variable */ &w = (frank-bound)*gtwgts; area = 0; /* not needed a second time for ginis */ wy = 0; original = 0; /* used in counting no of unweighted cases */ output; end; /* of cases straddling the boundary */ else output; /* output other cases */ run; * stats for deciles -----------------; proc summary data=frank ; /* this outputs one case per decile + */ weight &w; /* a case for the whole file */ class decile; var &y ; output sumwgt(&y)=wcases&suffix mean(&y)=mean&suffix sum(&y)=totaly min(&y)=lb&suffix max(&y)=ub&suffix ; data ; set _last_; if _type_=0 then do; /* ie grand total */ gtw=wcases&suffix; /* grand total weighted cases */ gty=totaly; /* grand total weighted y */ gtm=mean&suffix; /* overall mean y */ decile=99; end; retain gtw gty gtm; /* grand totals to be used */ share&suffix= 100*totaly/gty; /* income share of decile in %*/ retain cshare&suffix 0; cshare&suffix = share&suffix + cshare&suffix; /* cumulative income share */ if _type_=0 then cshare&suffix = 0;/* reinitialise for totals case */ cm&suffix = (cshare&suffix*gtm/100)/(decile/10); keep _type_ decile share&suffix mean&suffix wcases&suffix cshare&suffix cm&suffix lb&suffix ub&suffix; * these are the variables to be found in the dataset dout; * Put the totals last; proc sort; by decile; data &dout; set _last_; if decile=99 then do; cshare&suffix=.; cm&suffix=.; end; run; * print decile shares; %if &print %then %do; proc print split='|'; Title3 "Decile Shares for &y"; title4 "from data set &dsin with weight variable &w"; format decile decile. /* this format is in the sasutil library */ lb&suffix ub&suffix comma9.0 mean&suffix comma8.0 share&suffix cshare&suffix 6.2 cm&suffix comma8.0 wcases&suffix comma10.0; id decile; var lb&suffix ub&suffix mean&suffix share&suffix cshare&suffix cm&suffix wcases&suffix ; label lb&suffix = 'Lower|Bound'; label ub&suffix = 'Upper|Bound'; label mean&suffix = 'Mean'; label share&suffix = 'Share'; label cshare&suffix = 'Cumulative|Share'; label cm&suffix = 'Cumulative|Mean'; label wcases&suffix = 'Weighted|Cases'; %end; * do gini calculation -------------------; proc summary data=frank; /* produces just one case */ var area wy &w original; output sum(area)=tarea sum(wy)=ty sum(&w)=sumwgt sum(original)=n&suffix; data &gout; set _last_; wcases&suffix = sumwgt; /* grand total weighted cases */ mean&suffix = ty/sumwgt; /* mean y */ gini&suffix = 1 - 2*(tarea/(ty*sumwgt)); keep gini&suffix mean&suffix wcases&suffix n&suffix ; run; * print gini statistics; %if &print %then %do; proc print; title3 "Gini Coefficient for variable &y weighted by &w"; title4 "from data set &dsin"; format gini&suffix 5.3 ; format mean&suffix comma10.0 ; format wcases&suffix comma20.0; format n&suffix comma8.0; run; %end; * reset the titles; title3; %mend decile; *;