*
;
/*
Subject:      %spltsas macro: partition SAS datasets
From:         John Blodgett 
Date:         1995/04/10
Message-Id:   
Sender:       "SAS(r) Discussion" 
Comments:     Gated by NETNEWS@AUVM.AMERICAN.EDU
Reply-To:     John Blodgett 
Newsgroups:   comp.soft-Sys.sas

Trina Hosner asked the question about how to output to multiple SAS sets
based on the value of a read variable.  This does not really address that
question directly although it could be used if efficiency were not of
primary importance.  We have used the %spltsas macro (below) to rework a
number of our larger SAS data sets moved from MVS to unix, turning 600-
meg datasets into collections of smallers sets, partitioned and named
using a geographic identifier within the sets.  We present a "sample
invocation file" with 3 typical uses, followed by the source of the macro.
  The macro is "self-documenting" in the comments at the top.

(mautosource assumed so we do not show macro code being included.)


   John Blodgett
   Urban Information Center / Office of Computing
   University of Missouri - St. Louis      8001 Natural Bridge Rd.
   St. Louis, Mo 63121-4499     Phone: (314) 516-6014/6000 FAX: 516-6007
       *<===new tel. exchange Jan. '95:      ***                ***

Example, see bottom below of this code.
*/

*************macro source code begins***************;
 %GLOBAL REVDATE;  %LET REVDATE=10APR95; *<----DATE OF LAST REVISION;

 /***********************************************************************/
 /* NAME: spltsas     SYSTEM: MSCDC   PGMR: Blodgett                    */
 /* FUNCTION: Splits a SAS data set into a series of smaller sets       */
 /*   with names based on the value of a character value stored in      */
 /*   the data set. Can be used, for example, to split a u.s. file      */
 /*   with a state var into 51 sets named per that variable's values.   */
 /*   In most cases you will need to preprocess the set to create the   */
 /*   variable used to name the subsets.                                */
 /*                                                                     */
 /* STATUS: Working under AIX, SAS6.09                                  */
 /*                                                                     */
 /* NOTES: Has option to create index on input set to make it work      */
 /*  more efficiently.  Index can be deleted when macro completes.      */
 /*  If index is created and run bombs or you need to rerun you MUST    */
 /*  turn off the genindex option on subsequent runs to avoid error.    */
 /*                                                                     */
 /* If input set is very large and/or has many different values for     */
 /* the partitioning variable then this macro will generate 1 DATA ste p*/
 /* per value of that variable and will read the entire input set       */
 /* once for each of those values.  Use of an index will reduce the     */
 /* amount of i/o required but it may still be quite significant.       */
 /* Designed more for expediency and ease of use rather than efficiency.*/
 /***********************************************************************/

 /********************************************************************/
 /* BEGIN MACRO DEFINITION                                           */
 /********************************************************************/
 %macro spltsas(
 setin=, /* input sas data set. user should issue libname if necessary */
 pathout=, /* required parm specifying directory for output data lib.
    CAUTION! Program will write over any files in this dir that have the
    same name as an existing file.  If you want to use the work libref get
    its pathname from the libname window. */
 engine=, /* optional.  Engine spec for output set, e.g. xport */
 setvar=, /* required. Names a character var in setin that will be used to
           partition and name the output sets.  Values of this variable
           must be valid sas dataset names, i.e. 1-8 chars, 1st char a
           letter or '_' */
 dropsvar=1,/* the &setvar variable will be dropped from the output data
               sets unless you specify dropsvar=0 */
 genindex=0, /* specify genindex=1 if you want the macro to generate a step
      to create an index on the setin data set.  Note that if an
      index for that variable already exists specifying this option will
      cause an error and program will not execute.  */
 debug=0);



  %PUT %STR( );
  %PUT ***************************************************************;
  %PUT *       SPLTSAS MACRO REV &REVDATE BEGIN EXECUTION             *;
  %PUT *           MISSOURI STATE CENSUS DATA CENTER                 *;
  %PUT ***************************************************************;
  %PUT %STR( );

 *--This program splits a sas data set into sub data sets
    based on the value of a character variable within the input set.;
  %if &genindex %then %do;
   %let inlib=%scan(&setin,1,'.');   %let inset=%scan(&setin,2,'.');
   %if &inset eq %then %do;
     %let inset=&setin;
     %let inlib=work;
     %**put inlib= &inlib  inset= &inset;
     %end;
   proc datasets ddname=&inlib;
     modify &inset;
     index create &setvar;
     run;
    %end;
 proc sort data=&setin(keep=&setvar) out=setnames nodupkeys;
   by &setvar;  run;
 data _null_; set setnames end=last;
   _nsets+1;
   length setparm $8;
   setparm='set'||left(put(_nsets,4.));
   length frstchar $1; frstchar=lowcase(&setvar);
   if length(&setvar) gt 8 or verify(frstchar,
      '_abcdefghijklmnopqrstuvwxyz') then do;
      put // '***invalid value for sas data set: ' &setvar=;
      end;
   call symput(setparm,&setvar);
   setparm='l'||setparm;
   lsetvar=lowcase(&setvar);
   call symput(setparm,lsetvar);  *<--lower case value of parm used for
                                      unix file names--;
   if last then call symput('nsets',put(_nsets,4.));
   run;

  %do i=1 %to &nsets;
    %let set=&&set&i;  %let lset=&&lset&i;
    %if &engine=xport %then %let libdsn=&pathout/&lset ;
     %else %let libdsn=&pathout;
    %if &i=1 or &engine=xport %then %do;
     libname out &engine "&libdsn";
      %end;
    %put **Begin copying to &pathout , set name:&lset;
    data out.&lset;
     set &setin(where=(&setvar="&set"));
     %if &dropsvar %then %str( drop &setvar; );
     run;
    %end;  %*<--do i loop-;
   %put ; %put ***** spltsas - generated steps completed ***** ;
%MEND spltsas;

/ *--sample invocations of %spltsas macro stored in sasmacro-

data test;
   input name $ testno score;
cards;
john  1 89
john  2 78
john  3 94
jim   1 66
jim   2 74
jim   3 99
mary  2 85
jane  1 87
jane  2 88
run;
*--1: partition so that there is one set per name. --;
%spltsas(setin=test,  /* input data set to be split */
  genindex=1,  /* generate step to index input set. silly for this test
                  but should be specified for large input sets with lots
                  of subsets to avoid rereading complete set */
  pathout=%str(/tmp/gaussian/SAS_worka51),  /* got this name from libname
      window - its the path for work libref.  Use of %str not usually
      required */
  setvar=name,  /* output sets will be john, jim, mary and jane: the
                   values of this variable.  */
  dropsvar=0) /* do NOT drop the setvar variable (name) from output sets*/
  run;
*--2: and now partition so there is one set per test --;
*-we must preprocess to create the variable specifying the set name;
data test2; set test;
  length test $3;
  test='t'||put(testno,z2.);   *--values are t01, t02 and t03-;
  run;
%spltsas(setin=test2,
  pathout=%str(/u/john), /* my home directory - must already exist */
  setvar=test,  /* output sets will be named t01, t02 and t03 */
  dropsvar=1) /* this is the default - the test var was created and used
                 to provide the set names but is not kept on the sets */
  run;
*--3: we redo the first test except we do NOT specify genindex (it would
   cause an error - the index already exists and will be used) and we
   specify the xport engine. --;
x mkdir /tmp/gaussian/test_xport;  *<---create the path to be used-;
%spltsas(setin=test,
         pathout=/tmp/gaussian/test_xport,  /* no %str this time */
         setvar=name,
         engine=xport)   /* xport engine for output data sets. if you
              list the &pathout subdir after this you will see files with
              names john, jim, mary and jane - no ".ssd01" extension. */
  run;
*/

*
;