*
;
/*
Subject: macro for basic EASYTRIEVE ops
Newsgroups: comp.soft-sys.sas
Organization: Texas Metronet, Inc  (login info (214/705-2901 - 817/571-0400))
Summary:
Keywords:

To resurrect a thread from last month on SAS vs. EASYTRIEVE, following
is a macro I started developing to do a basic match merge between large
flat files.  It is not as complete as originally intended, but I am
moving on to another assignment where I don't anticipate having the
need to pursue this further, so I offer it up to SAS-L for those who
hate to use EASYTRIEVE, but find SAS too much of a resource hog for
simple operations on large flat files.

The essence of this macro is to skip reformatting the data to SAS
datasets and simply deal with sorted flat files for these basic operations.
The only statistic I have are for one use of this macro to extract
a subset from a file of 10 million card-image records, based on a key
file of 21 million records.  This took 4.5 minutes of CPU on a large
MVS system.  Based on experience, I feel this is at least an order
of magnitude less CPU time than reformatting these 31 million records
to SAS datasets to do the merge, plus avoiding limitations on DASD space
for the WORK library.  I think this would be very competitive with
EASYTRIEVE, but I never bothered to do a comparison.

Dwight Eggers
email: deggers@metronet.com

------ code follows ------  */

%MACRO MMERGE (LNAME, LKEYS, RNAME, RKEYS, OUTNAME,
               KEEP=MATCH);
%*;
%* DO A MATCHED MERGE OF TWO FLAT FILES.  THE FIRST (LEFT) FILE      ;
%* CONTAINS THE FULL RECORD TO BE RETAINED.  THE SECOND (RIGHT) FILE ;
%* CONTAINS THE VALUES OF THE KEYS ON RECORDS WHICH ARE TO BE        ;
%* RETAINED FROM THE LEFT FILE.  THE OUTPUT IS A FLAT FILE IN THE    ;
%* SAME FORMAT AS THE LEFT FILE.                                     ;
%*                                                                   ;
%* THE KEY(S) SPECIFICATION IS A SEQUENCE OF POSITION/FORMAT PAIRS   ;
%* IN SAS SYNTAX.  VARIABLE NAMES ARE OMITTED.  FOR EXAMPLE,         ;
%*       @1 PD6. @33 $4.                                             ;
%* TRANSLATES AS THE PRIMARY KEY BEING A REAL VALUE IN FORMAT PD6.   ;
%* STARTING IN COLUMN 1, FOLLOWED BY THE SECONDARY KEY AS A CHARACTER;
%* VALUE OF LENGTH 4 STARTING IN COLUMN 33.  THE DESCRIPTION FOR     ;
%* EACH VARIABLE SHOULD START WITH A '@' AND END WITH A '.' .        ;
%*                                                                   ;
%* THE CORRESPONDING KEY VARIABLES BETWEEN LEFT AND RIGHT FILES MUST ;
%* BE OF THE SAME TYPE (NUMERIC OR CHARACTER) AND LENGTH, BUT THEIR  ;
%* FORMATS AND POSITIONS DO NOT NEED TO MATCH.  THE NUMBER OF KEYS   ;
%* MUST AGREE BETWEEN BOTH FILES.  THE FILES MUST BE SORTED ON THESE ;
%* KEYS PRIOR TO INVOKING THIS MACRO.                                ;
%*                                                                   ;
%* FILE 'NAMES' -- LNAME, RNAME, AND OUTNAME -- CAN EITHER BE        ;
%* PHYSICAL FILE NAMES OR LOGICAL NAMES (E.G., DD NAMES ON MVS).     ;
%* PHYSICAL FILE NAMES ARE ENCLOSED IN SINGLE QUOTES.  A LOGICAL     ;
%* NAME IS SPECIFIED BY A SHORT NAME (<=8 CHARACTERS) WITHOUT QUOTES.;
%*                                                                   ;
%* OPTIONALLY, THE RIGHT FILE CAN ALSO BE A SAS DATASET.  TO USE THIS;
%* FEATURE THE RIGHT NAME SHOULD BE THE UNQUOTED FULL DATASET NAME,  ;
%* COMPLETE WITH LIBRARY NAME (E.G., WORK.RIGHT).  THE MISSING QUOTES;
%* AND '.' IS WHAT DIFFERENTIATE A DATASET NAME FROM A LOGICAL NAME. ;
%* IN THIS CASE, THE RIGHT KEY(S) ARE SPECIFIED AS VARIABLE NAMES    ;
%* INSTEAD OF POSITION/FORMAT PAIRS.  THE VARIABLE TYPES AND SIZES   ;
%* MUST AGAIN CORRESPOND TO THE LEFT KEYS.                           ;
%*                                                                   ;
%* THE CONVERSE OPERATION OF DISCARDING RECORDS WHICH SATISFY THE    ;
%* MATCH CAN BE PERFORMED BY SETTING THE OPTIONAL KEYWORD PARAMETER  ;
%* KEEP=NOMATCH.  OTHERWISE ONLY MATCHED RECORDS ARE RETAINED.       ;
%*                                                                   ;
%* THE MULTIPLE KEY FEATURE HAS NOT YET BEEN FULLY IMPLEMENTED.      ;
%*                                                                   ;
%* AUTHOR: DWIGHT EGGERS, 9/26/95                                    ;
%*;
%* DEFINE PARAMETERS FOR LEFT AND RIGHT INPUT FILES ;
%LOCAL LINPUT LLENGTH LLEVELS LFORMAT;
%LOCAL RINPUT RLEVELS;

* DEFINE INPUT STATEMENT FOR LEFT KEYS ;
DATA _NULL_;
INFILE &LNAME LENGTH=RECLENG;
LENGTH SEGMENT $20 LINPUT $80;
INPUT SEGMENT;  * DETERMINE RECORD LENGTH ;
CALL SYMPUT ('LLENGTH', LEFT(PUT(RECLENG,3.)));
CALL SYMPUT ('LFORMAT', '$CHAR'||TRIM(LEFT(PUT(RECLENG,3.)))||'.');
LINPUT=' ';
FULLSTR="&LKEYS";
DO I=1 TO 4;
  * PROCESS SEGMENTS WHICH BEGIN WITH  '@' AND END WITH '.' ;
  I1=INDEX(FULLSTR,'@');
  I2=INDEX(FULLSTR,'.');
  IF I1=0 OR I2=0 THEN GOTO ENDLOOP1;
  SEGMENT=SUBSTR(FULLSTR,I1,I2);
  LINPUT=TRIM(LINPUT)||' '||SCAN(SEGMENT,1,' ')||' _LKEY'||PUT(I,1.)
                     ||' '||SCAN(SEGMENT,2,' ');
  IF I2=LENGTH(TRIM(FULLSTR)) THEN GOTO ENDLOOP1;
  FULLSTR=LEFT(SUBSTR(FULLSTR,I2+1));
  END;
ENDLOOP1:
LEVELS=I;
CALL SYMPUT ('LINPUT' ,LINPUT);
CALL SYMPUT ('LLEVELS',PUT(LEVELS,1.));
STOP;
RUN;

* DEFINE STATEMENTS NEEDED FOR RIGHT FILE/DATASET;
DATA _NULL_;
LENGTH SEGMENT $20 RINPUT $80;
*;
* RIGHT FILE;
IF INDEX("&RNAME","'") OR NOT INDEX("&RNAME",'.') THEN DO;
  * CASE 1: RIGHT IS FLAT FILE (NAME IS EITHER ENCOSED IN QUOTES ;
  *         OR DOES NOT CONTAIN A PERIOD) ;
  CALL SYMPUT ('RSAS',' ');
  RINPUT=' ';
  FULLSTR="&RKEYS";
  DO I=1 TO 4;
    * PROCESS SEGMENTS WHICH BEGIN WITH  '@' AND END WITH '.' ;
    I1=INDEX(FULLSTR,'@');
    I2=INDEX(FULLSTR,'.');
    IF I1=0 OR I2=0 THEN GOTO ENDLOOP1;
    SEGMENT=SUBSTR(FULLSTR,I1,I2);
    RINPUT=TRIM(RINPUT)||' '||SCAN(SEGMENT,1,' ')||' _RKEY'||PUT(I,1.)
                       ||' '||SCAN(SEGMENT,2,' ');
    IF I2=LENGTH(TRIM(FULLSTR)) THEN GOTO ENDLOOP1;
    FULLSTR=LEFT(SUBSTR(FULLSTR,I2+1));
    END;
  ENDLOOP1:
  LEVELS=I-1;
  CALL SYMPUT ('RINPUT' ,RINPUT);
  CALL SYMPUT ('RLEVELS',PUT(LEVELS,1.));
  END;
ELSE DO;
  * CASE 2: RIGHT IS SAS DATASET;
  CALL SYMPUT ('RSAS',"&RNAME");
  LENGTH REN_STMT $80;
  REN_STMT='(';
  FULLSTR="&RKEYS";
  DO I=1 TO 4;
    * DEFINE RENAME PAIRS;
    SEGMENT=SCAN(FULLSTR,I,' ');
    IF SEGMENT^=' ' THEN
      REN_STMT=TRIM(REN_STMT)||' '||SEGMENT||'='||'_RKEY'||PUT(I,1.);
    IF I=1 THEN REN_STMT=COMPRESS(REN_STMT);
    END;
  REN_STMT=TRIM(REN_STMT)||')';
  CALL SYMPUT ('RENAME',TRIM(REN_STMT));
  END;
RUN;

DATA _NULL_;
%IF &RSAS^= %THEN %DO;
  SET &RSAS (RENAME=&RENAME) END=__RDONE ;
  INFILE &LNAME END=__LDONE ;
  __RCOUNT+1;
  %END;
FILE &OUTNAME ;

* COUNTS OF RECORDS READ AND AND OUTPUT;
RETAIN __LCOUNT __RCOUNT __OCOUNT 0;
* FLAGS TO READ KEY FROM LEFT/RIGHT SIDE ON NEXT PASS;
RETAIN LFLAG RFLAG 1;
* FULL I/O RECORD;
LENGTH RECORD $&LLENGTH ;
* KEYS;
%DO I=1 %TO &LLEVELS ;
  RETAIN _LKEY&I _RKEY&I ;
  DROP   _LKEY&I _RKEY&I ;
  %END;

NEXTLEFT:
IF LFLAG THEN DO;
  INFILE &LNAME END=__LDONE %STR(;) ;
  INPUT &LINPUT @@ ;
  __LCOUNT+1;
  LFLAG=0;
  END;

%* INPUT RIGHT KEYS IF RIGHT SIDE IS FLATFILE ;
%IF &RSAS= %THEN %DO;
  NEXTRIGH:
  IF RFLAG THEN DO;
    INFILE &RNAME END=__RDONE;
    INPUT &RINPUT ;
    __RCOUNT+1;
    RFLAG=0;
    END;
  %END;

* CHECK RELATIONSHIP BETWEEN LEFT:RIGHT KEYS ;
%IF &LLEVELS=1 %THEN IF _LKEY1<_RKEY1 THEN DO %STR(;) ;
%ELSE %DO;
  %DO I=1 %TO %EVAL(&LLEVELS-1);
    %IF &I=1 %THEN IF ;
             %ELSE AND ;
    %LET J=%EVAL(&LLEVELS-&I+1);
    _LKEY&J=_RKEY&J
    %END;
  %END;

  * LEFT SIDE BEHIND RIGHT SIDE ;
  %IF %UPCASE(&KEEP)=NOMATCH %THEN LINK IOLEFT   %STR(;) ;
                             %ELSE LINK SKIPLEFT %STR(;) ;
  LFLAG=1;
  IF NOT __LDONE THEN GOTO NEXTLEFT;
                 ELSE GOTO FINISHED;
  END;

%* NEXT CHECK FOR EQUALITY OF KEYS ;
%DO I=1 %TO &LLEVELS;
  %IF &I=1 %THEN ELSE IF _LKEY1=_RKEY1 ;
  %ELSE AND _LKEY&I=_RKEY&I ;
  %END;
THEN DO;
  * LEFT AND RIGHT SIDES MATCH ;
  %IF %UPCASE(&KEEP)^=NOMATCH %THEN LINK IOLEFT %STR(;) ;
                              %ELSE LINK SKIPLEFT %STR(;);
  LFLAG=1;
  IF NOT __LDONE THEN GOTO NEXTLEFT;
                 ELSE GOTO FINISHED;
  END;

* RIGHT SIDE BEHIND LEFT SIDE;
ELSE DO;
  %IF %UPCASE(&KEEP)^=NOMATCH %THEN %DO;
  %* WHEN OUTPUTING MATCHES;
    RFLAG=1;
    IF __RDONE THEN GOTO FINISHED;
    %END;
  %ELSE %DO;
  %* WHEN OUTPUTING MISMATCHES;
    IF __RDONE THEN DO;
      * I/O EVERYTHING REMAINING ON LEFT;
      __LCOUNT=__LCOUNT-1;
      __LDONE=0;
      DO WHILE (NOT __LDONE);
        LINK IOLEFT;
        __LCOUNT+1;
        END;
      GOTO FINISHED;
      END;
    * OTHERWISE RECYCLE FOR NEXT RIGHT;
    ELSE RFLAG=1;
    %END;
  END;
RETURN;

IOLEFT:
  INFILE &LNAME END=__LDONE EOF=FINISH0;
  INPUT @1 RECORD &LFORMAT ;
  PUT   RECORD &LFORMAT ;
  __OCOUNT+1;
  RETURN;

SKIPLEFT:
  INFILE &LNAME END=__LDONE ;
  INPUT ;
  RETURN;

FINISH0:
* FORCE LEFT SIDE TO BE DONE;
__LDONE=1;
FINISHED:
  FILE LOG;
  PUT '*************************************************';
  PUT 'COUNT FROM LEFT  FILE= ' __LCOUNT          ;
  IF NOT __LDONE THEN
  PUT '*** MERGE FINISHED BEFORE LEFT FILE EXHAUSTED ***' ;
  PUT 'COUNT FROM RIGHT FILE= ' __RCOUNT          ;
  IF NOT __RDONE THEN
  PUT '*** MERGE FINISHED BEFORE RIGHT FILE EXHAUSTED **' ;
  PUT 'COUNT TO  OUTPUT FILE= ' __OCOUNT          ;
  PUT '*************************************************';
  STOP;
RUN;

%MEND MMERGE;

*
;