- HDISVM05 ;BPFO/JRP - PARSE DELIMITTED TEXT;6/26/2007
- ;;1.0;HEALTH DATA & INFORMATICS;**7**;Feb 22, 2005;Build 33
- ;
- ;**
- ;** The following code was copied from SCMSVUT5, which I
- ;** originally wrote in July of 2002. It was copied to
- ;** prevent the dependancy on the Scheduling package, which
- ;** may not exist on all systems.
- ;**
- ;** It has also been modified to handle fields that contain
- ;** the separator character. These fields are surrounded by
- ;** quotation marks (ex: ...^"Field ^ With ^ Separator"^...).
- ;** This was done because the original parser was written for
- ;** parsing HL7 messages and this parser is meant for parsing
- ;** delimitted text (ie spreadsheets).
- ;**
- ;
- PARSE(INARR,OUTARR,SEP,SUB,MAX) ;Parse array into individual fields
- ;Input : INARR - Array containing data to parse (full global ref)
- ; INARR = First 245 characters of data
- ; INARR(1..n) = Continuation nodes
- ; OR
- ; INARR(x) = First 245 characters of data
- ; INARR(x,1..n) = Continuation nodes
- ; OUTARR - Array to put parsed data into (full global ref)
- ; SEP - Field separator (defaults to ^) (1 character)
- ; SUB - Starting subscript of OUTARR (defaults to 0)
- ; MAX - Maximum length of output node (defaults to 245)
- ;Output : None
- ; OUTARR(SUB) = First piece (MAX characters)
- ; OUTARR(SUB,1..n) = Continuation nodes
- ; OUTARR(SUB+X) = Xth piece (MAX characters)
- ; OUTARR(SUB+X,1..n) = Continuation nodes
- ;Notes : OUTARR is initialized (KILLed) on entry
- ; : Assumes that INARR and OUTARR are defined and valid
- ;
- ;Declare variables
- NEW NODE,STOP,DATA,INFO,FLD,SEPCNT,CN,OUT,TMP,ROOT,OUTNODE
- NEW QUOTE,HASQ,ADDSEP
- KILL @OUTARR
- SET SEP=$GET(SEP) SET SEP=$EXTRACT(SEP,1) SET:SEP="" SEP="^"
- SET SUB=+$GET(SUB)
- SET MAX=+$GET(MAX) SET:'MAX MAX=245
- SET NODE=INARR
- SET INFO=$GET(@NODE)
- SET ROOT=$$OREF^DILF(INARR)
- SET FLD=1
- SET SEPCNT=$LENGTH(INFO,SEP)
- SET STOP=0
- SET OUTNODE=$NAME(@OUTARR@(SUB))
- SET QUOTE=$CHAR(34)
- SET HASQ=0
- SET ADDSEP=0
- SET CN=0
- ;Loop through all columns in all nodes
- FOR SET DATA=$PIECE(INFO,SEP,FLD) DO QUIT:STOP
- .;Check for data in double quotes
- .IF (DATA[QUOTE) IF (($LENGTH(DATA,QUOTE)-1)#2) DO
- ..;Check for leading double quote
- ..IF ('HASQ) DO
- ...;Separator on next node (don't append now in case it isn't)
- ...IF (FLD=SEPCNT) SET ADDSEP=1 QUIT
- ...;Append separator
- ...SET DATA=DATA_SEP
- ...SET ADDSEP=0
- ...QUIT
- ..SET HASQ='HASQ
- ..QUIT
- .;Need to append separator when the leading quotation mark was
- .;on the previous node AND the next separator is encounterd
- .IF (ADDSEP) IF (HASQ) IF (DATA'=QUOTE) IF (INFO[SEP) IF (FLD'=SEPCNT) DO
- ..SET DATA=DATA_SEP
- ..SET ADDSEP=0
- ..QUIT
- .;End of line - store in output global
- .IF FLD=SEPCNT DO QUIT
- ..DO ADDNODE
- ..;Get next line of data from input global
- ..SET NODE=$QUERY(@NODE)
- ..;No more data - stop outer loop
- ..IF (NODE="")!(NODE'[ROOT) SET STOP=1 QUIT
- ..;Set text and column variables
- ..SET INFO=$GET(@NODE)
- ..SET SEPCNT=$LENGTH(INFO,SEP)
- ..SET FLD=1
- ..QUIT
- .;Add column to output global
- .DO ADDNODE
- .;Increment subscript if not in middle of double quotes
- .IF ('HASQ) SET SUB=SUB+1
- .;Set next storage node to use
- .SET OUTNODE=$NAME(@OUTARR@(SUB))
- .;Increment column number
- .SET FLD=FLD+1
- .;Reset continuation node number
- .SET CN=0
- .QUIT
- QUIT
- ADDNODE ;Used by PARSE to add data to output node
- ;Get currently stored column value
- SET TMP=$GET(@OUTNODE)
- ;Length of node won't go over max value - append
- IF ($LENGTH(TMP)+$LENGTH(DATA))<(MAX+1) SET @OUTNODE=TMP_DATA QUIT
- ;Append as much as stored column value can take
- SET @OUTNODE=TMP_$EXTRACT(DATA,1,(MAX-$LENGTH(TMP)))
- ;Increment continuation node number
- SET CN=CN+1
- ;Recursively call self to store remaining text
- SET DATA=$EXTRACT(DATA,(MAX-$LENGTH(TMP)+1),$LENGTH(DATA))
- SET OUTNODE=$NAME(@OUTARR@(SUB,CN))
- IF DATA'="" DO ADDNODE
- QUIT
- --- Routine Detail --- with STRUCTURED ROUTINE LISTING ---[H[J[2J[HHDISVM05 4124 printed Feb 18, 2025@23:23:21 Page 2
- HDISVM05 ;BPFO/JRP - PARSE DELIMITTED TEXT;6/26/2007
- +1 ;;1.0;HEALTH DATA & INFORMATICS;**7**;Feb 22, 2005;Build 33
- +2 ;
- +3 ;**
- +4 ;** The following code was copied from SCMSVUT5, which I
- +5 ;** originally wrote in July of 2002. It was copied to
- +6 ;** prevent the dependancy on the Scheduling package, which
- +7 ;** may not exist on all systems.
- +8 ;**
- +9 ;** It has also been modified to handle fields that contain
- +10 ;** the separator character. These fields are surrounded by
- +11 ;** quotation marks (ex: ...^"Field ^ With ^ Separator"^...).
- +12 ;** This was done because the original parser was written for
- +13 ;** parsing HL7 messages and this parser is meant for parsing
- +14 ;** delimitted text (ie spreadsheets).
- +15 ;**
- +16 ;
- PARSE(INARR,OUTARR,SEP,SUB,MAX) ;Parse array into individual fields
- +1 ;Input : INARR - Array containing data to parse (full global ref)
- +2 ; INARR = First 245 characters of data
- +3 ; INARR(1..n) = Continuation nodes
- +4 ; OR
- +5 ; INARR(x) = First 245 characters of data
- +6 ; INARR(x,1..n) = Continuation nodes
- +7 ; OUTARR - Array to put parsed data into (full global ref)
- +8 ; SEP - Field separator (defaults to ^) (1 character)
- +9 ; SUB - Starting subscript of OUTARR (defaults to 0)
- +10 ; MAX - Maximum length of output node (defaults to 245)
- +11 ;Output : None
- +12 ; OUTARR(SUB) = First piece (MAX characters)
- +13 ; OUTARR(SUB,1..n) = Continuation nodes
- +14 ; OUTARR(SUB+X) = Xth piece (MAX characters)
- +15 ; OUTARR(SUB+X,1..n) = Continuation nodes
- +16 ;Notes : OUTARR is initialized (KILLed) on entry
- +17 ; : Assumes that INARR and OUTARR are defined and valid
- +18 ;
- +19 ;Declare variables
- +20 NEW NODE,STOP,DATA,INFO,FLD,SEPCNT,CN,OUT,TMP,ROOT,OUTNODE
- +21 NEW QUOTE,HASQ,ADDSEP
- +22 KILL @OUTARR
- +23 SET SEP=$GET(SEP)
- SET SEP=$EXTRACT(SEP,1)
- if SEP=""
- SET SEP="^"
- +24 SET SUB=+$GET(SUB)
- +25 SET MAX=+$GET(MAX)
- if 'MAX
- SET MAX=245
- +26 SET NODE=INARR
- +27 SET INFO=$GET(@NODE)
- +28 SET ROOT=$$OREF^DILF(INARR)
- +29 SET FLD=1
- +30 SET SEPCNT=$LENGTH(INFO,SEP)
- +31 SET STOP=0
- +32 SET OUTNODE=$NAME(@OUTARR@(SUB))
- +33 SET QUOTE=$CHAR(34)
- +34 SET HASQ=0
- +35 SET ADDSEP=0
- +36 SET CN=0
- +37 ;Loop through all columns in all nodes
- +38 FOR
- SET DATA=$PIECE(INFO,SEP,FLD)
- Begin DoDot:1
- +39 ;Check for data in double quotes
- +40 IF (DATA[QUOTE)
- IF (($LENGTH(DATA,QUOTE)-1)#2)
- Begin DoDot:2
- +41 ;Check for leading double quote
- +42 IF ('HASQ)
- Begin DoDot:3
- +43 ;Separator on next node (don't append now in case it isn't)
- +44 IF (FLD=SEPCNT)
- SET ADDSEP=1
- QUIT
- +45 ;Append separator
- +46 SET DATA=DATA_SEP
- +47 SET ADDSEP=0
- +48 QUIT
- End DoDot:3
- +49 SET HASQ='HASQ
- +50 QUIT
- End DoDot:2
- +51 ;Need to append separator when the leading quotation mark was
- +52 ;on the previous node AND the next separator is encounterd
- +53 IF (ADDSEP)
- IF (HASQ)
- IF (DATA'=QUOTE)
- IF (INFO[SEP)
- IF (FLD'=SEPCNT)
- Begin DoDot:2
- +54 SET DATA=DATA_SEP
- +55 SET ADDSEP=0
- +56 QUIT
- End DoDot:2
- +57 ;End of line - store in output global
- +58 IF FLD=SEPCNT
- Begin DoDot:2
- +59 DO ADDNODE
- +60 ;Get next line of data from input global
- +61 SET NODE=$QUERY(@NODE)
- +62 ;No more data - stop outer loop
- +63 IF (NODE="")!(NODE'[ROOT)
- SET STOP=1
- QUIT
- +64 ;Set text and column variables
- +65 SET INFO=$GET(@NODE)
- +66 SET SEPCNT=$LENGTH(INFO,SEP)
- +67 SET FLD=1
- +68 QUIT
- End DoDot:2
- QUIT
- +69 ;Add column to output global
- +70 DO ADDNODE
- +71 ;Increment subscript if not in middle of double quotes
- +72 IF ('HASQ)
- SET SUB=SUB+1
- +73 ;Set next storage node to use
- +74 SET OUTNODE=$NAME(@OUTARR@(SUB))
- +75 ;Increment column number
- +76 SET FLD=FLD+1
- +77 ;Reset continuation node number
- +78 SET CN=0
- +79 QUIT
- End DoDot:1
- if STOP
- QUIT
- +80 QUIT
- ADDNODE ;Used by PARSE to add data to output node
- +1 ;Get currently stored column value
- +2 SET TMP=$GET(@OUTNODE)
- +3 ;Length of node won't go over max value - append
- +4 IF ($LENGTH(TMP)+$LENGTH(DATA))<(MAX+1)
- SET @OUTNODE=TMP_DATA
- QUIT
- +5 ;Append as much as stored column value can take
- +6 SET @OUTNODE=TMP_$EXTRACT(DATA,1,(MAX-$LENGTH(TMP)))
- +7 ;Increment continuation node number
- +8 SET CN=CN+1
- +9 ;Recursively call self to store remaining text
- +10 SET DATA=$EXTRACT(DATA,(MAX-$LENGTH(TMP)+1),$LENGTH(DATA))
- +11 SET OUTNODE=$NAME(@OUTARR@(SUB,CN))
- +12 IF DATA'=""
- DO ADDNODE
- +13 QUIT