- HDISVM01 ;BPFO/JRP - PARSE XML DOCUMENT USING SAX;12/20/2004
- ;;1.0;HEALTH DATA & INFORMATICS;;Feb 22, 2005
- ;
- SAX(XMLARR,PRSARR) ;Parse XML document using SAX interface
- ; Input: XMLARR - Global array containing XML document (closed root)
- ; PRSARR - Array to output parsed XML document (closed root)
- ;Output: None
- ; @PRSARR@("ESUBS",ESN) = Element name
- ; @PRSARR@("EINDX",ElementName) = Subscript number (ESN)
- ; @PRSARR@("ASUBS",ESN,ASN) = Attribute name
- ; @PRSARR@("AINDX",ESN,AttributeName) = Subscript number (ASN)
- ; @PRSARR@("DATA",ESN,Repetition,"V") = Value of element
- ; @PRSARR@("DATA",ESN,Repetition,"A",ASN) = Value of attribute
- ; @PRSARR@("DATA",ESN1,Rep1,ESN2,Rep2,"V") = Value of child
- ; element
- ; @PRSARR@("DATA",ESN1,Rep1,ESN2,Rep2,"A",ASN) = Value of child
- ; attribute
- ; Notes : XMLARR must be a global array (i.e. no local arrays)
- ; : PRSARR is initialized (i.e KILLed) on input
- Q:$G(XMLARR)=""
- Q:'$D(@XMLARR)
- Q:$G(PRSARR)=""
- N HDICBK,SUBNUM,TAGNAME
- N ESUBS,EINDX,ASUBS,AINDX,DATA
- S ESUBS=$NA(@PRSARR@("ESUBS"))
- S EINDX=$NA(@PRSARR@("EINDX"))
- S ASUBS=$NA(@PRSARR@("ASUBS"))
- S AINDX=$NA(@PRSARR@("AINDX"))
- S DATA=$NA(@PRSARR@("DATA"))
- ;Set callbacks
- S HDICBK("STARTDOCUMENT")="STRTDOC^HDISVM01"
- S HDICBK("ENDDOCUMENT")="ENDDOC^HDISVM01"
- S HDICBK("DOCTYPE")="DOCTYPE^HDISVM01"
- S HDICBK("STARTELEMENT")="STRTLMNT^HDISVM01"
- S HDICBK("ENDELEMENT")="ENDLMNT^HDISVM01"
- S HDICBK("CHARACTERS")="CHARS^HDISVM01"
- S HDICBK("PI")="PI^HDISVM01"
- S HDICBK("EXTERNAL")="EXTERN^HDISVM01"
- S HDICBK("NOTATION")="NOTATION^HDISVM01"
- S HDICBK("COMMENT")="COMMENT^HDISVM01"
- S HDICBK("ERROR")="ERROR^HDISVM01"
- ;Parse XML document using SAX
- K @PRSARR
- D EN^MXMLPRSE(XMLARR,.HDICBK,"")
- Q
- ;
- STRTDOC ;Start document
- Q
- ;
- ENDDOC ;End document
- Q
- ;
- DOCTYPE(ROOT,PUBID,SYSID) ;DOCTYPE declaration
- Q
- ;
- STRTLMNT(NAME,ATTRLIST) ;Start element
- N ESN,REP,ATTR,ASN,TMPREF
- ;Determine element subscript number
- S ESN=+$G(@EINDX@(NAME))
- I 'ESN D
- .S ESN=1+$O(@ESUBS@(""),-1)
- .S @ESUBS@(ESN)=NAME
- .S @EINDX@(NAME)=ESN
- ;Determine repetition number
- S REP=1+$O(@DATA@(ESN,""),-1)
- ;Add element subscript number and repetition number to output array
- S TMPREF=$$OREF^DILF(DATA)
- S TMPREF=TMPREF_ESN_","_REP_","
- S DATA=$$CREF^DILF(TMPREF)
- ;Store attributes
- S ATTR=""
- F S ATTR=$O(ATTRLIST(ATTR)) Q:ATTR="" D
- .;Get attribute subscript number
- .S ASN=+$G(@AINDX@(ESN,ATTR))
- .I 'ASN D
- ..S ASN=1+$O(@ASUBS@(""),-1)
- ..S @ASUBS@(ESN,ASN)=ATTR
- ..S @AINDX@(ESN,ATTR)=ASN
- .;Store value
- .S @DATA@("A",ASN)=ATTRLIST(ATTR)
- Q
- ;
- ENDLMNT(NAME) ;End element
- N TMPREF,SUBCNT,SUBCHK
- ;Remove element subscript number and repition number from output array
- S TMPREF=$$OREF^DILF(DATA)
- S SUBCNT=$L(TMPREF,",")
- S SUBCHK=SUBCNT-3
- I SUBCHK>0 S TMPREF=$P(TMPREF,",",1,SUBCHK)_","
- I SUBCHK<1 S TMPREF=$P(TMPREF,"(",1)_"("
- S DATA=$$CREF^DILF(TMPREF)
- Q
- ;
- CHARS(TEXT) ;Non-markup content
- ;Store element value
- S @DATA@("V")=TEXT
- Q
- ;
- PI(TARGET,TEXT) ;Processing instruction
- Q
- ;
- EXTERN(SYSID,PUBID,GLOBAL) ;External entity reference
- Q
- ;
- NOTATION(NAME,SYSID,PUBID) ;Notation declaration
- Q
- ;
- Q
- ;
- ERROR(ERR) ;Error
- Q
- ;
- UNESC(TEXT) ;Convert escaped characters
- ;Assumes TEXT is not corrupt
- N ESCBEG,ESCEND,ESCTXT,ESCCHAR,OUTPUT
- S TEXT=$G(TEXT)
- I TEXT="" Q TEXT
- I TEXT'["&" Q TEXT
- ;Do conversion
- S OUTPUT=""
- F Q:TEXT'["&" D
- .;Find escaped character
- .S ESCBEG=$F(TEXT,"&")
- .S ESCEND=$F(TEXT,";",ESCBEG)
- .S ESCTXT=$E(TEXT,ESCBEG,ESCEND-2)
- .;Convert escaped character
- .S ESCCHAR=""
- .I ESCTXT="amp" S ESCCHAR="&"
- .I ESCTXT="lt" S ESCCHAR="<"
- .I ESCTXT="gt" S ESCCHAR=">"
- .I ESCTXT="apos" S ESCCHAR="'"
- .I ESCTXT="quot" S ESCCHAR=$C(34)
- .;Replace escaped character with actual character
- .S OUTPUT=OUTPUT_$E(TEXT,1,ESCBEG-2)_ESCCHAR
- .;Continue processing rest of string
- .S TEXT=$E(TEXT,ESCEND,$L(TEXT))
- ;Add on remainder of text
- S OUTPUT=OUTPUT_TEXT
- Q OUTPUT
- --- Routine Detail --- with STRUCTURED ROUTINE LISTING ---[H[J[2J[HHDISVM01 4236 printed Feb 18, 2025@23:23:17 Page 2
- HDISVM01 ;BPFO/JRP - PARSE XML DOCUMENT USING SAX;12/20/2004
- +1 ;;1.0;HEALTH DATA & INFORMATICS;;Feb 22, 2005
- +2 ;
- SAX(XMLARR,PRSARR) ;Parse XML document using SAX interface
- +1 ; Input: XMLARR - Global array containing XML document (closed root)
- +2 ; PRSARR - Array to output parsed XML document (closed root)
- +3 ;Output: None
- +4 ; @PRSARR@("ESUBS",ESN) = Element name
- +5 ; @PRSARR@("EINDX",ElementName) = Subscript number (ESN)
- +6 ; @PRSARR@("ASUBS",ESN,ASN) = Attribute name
- +7 ; @PRSARR@("AINDX",ESN,AttributeName) = Subscript number (ASN)
- +8 ; @PRSARR@("DATA",ESN,Repetition,"V") = Value of element
- +9 ; @PRSARR@("DATA",ESN,Repetition,"A",ASN) = Value of attribute
- +10 ; @PRSARR@("DATA",ESN1,Rep1,ESN2,Rep2,"V") = Value of child
- +11 ; element
- +12 ; @PRSARR@("DATA",ESN1,Rep1,ESN2,Rep2,"A",ASN) = Value of child
- +13 ; attribute
- +14 ; Notes : XMLARR must be a global array (i.e. no local arrays)
- +15 ; : PRSARR is initialized (i.e KILLed) on input
- +16 if $GET(XMLARR)=""
- QUIT
- +17 if '$DATA(@XMLARR)
- QUIT
- +18 if $GET(PRSARR)=""
- QUIT
- +19 NEW HDICBK,SUBNUM,TAGNAME
- +20 NEW ESUBS,EINDX,ASUBS,AINDX,DATA
- +21 SET ESUBS=$NAME(@PRSARR@("ESUBS"))
- +22 SET EINDX=$NAME(@PRSARR@("EINDX"))
- +23 SET ASUBS=$NAME(@PRSARR@("ASUBS"))
- +24 SET AINDX=$NAME(@PRSARR@("AINDX"))
- +25 SET DATA=$NAME(@PRSARR@("DATA"))
- +26 ;Set callbacks
- +27 SET HDICBK("STARTDOCUMENT")="STRTDOC^HDISVM01"
- +28 SET HDICBK("ENDDOCUMENT")="ENDDOC^HDISVM01"
- +29 SET HDICBK("DOCTYPE")="DOCTYPE^HDISVM01"
- +30 SET HDICBK("STARTELEMENT")="STRTLMNT^HDISVM01"
- +31 SET HDICBK("ENDELEMENT")="ENDLMNT^HDISVM01"
- +32 SET HDICBK("CHARACTERS")="CHARS^HDISVM01"
- +33 SET HDICBK("PI")="PI^HDISVM01"
- +34 SET HDICBK("EXTERNAL")="EXTERN^HDISVM01"
- +35 SET HDICBK("NOTATION")="NOTATION^HDISVM01"
- +36 SET HDICBK("COMMENT")="COMMENT^HDISVM01"
- +37 SET HDICBK("ERROR")="ERROR^HDISVM01"
- +38 ;Parse XML document using SAX
- +39 KILL @PRSARR
- +40 DO EN^MXMLPRSE(XMLARR,.HDICBK,"")
- +41 QUIT
- +42 ;
- STRTDOC ;Start document
- +1 QUIT
- +2 ;
- ENDDOC ;End document
- +1 QUIT
- +2 ;
- DOCTYPE(ROOT,PUBID,SYSID) ;DOCTYPE declaration
- +1 QUIT
- +2 ;
- STRTLMNT(NAME,ATTRLIST) ;Start element
- +1 NEW ESN,REP,ATTR,ASN,TMPREF
- +2 ;Determine element subscript number
- +3 SET ESN=+$GET(@EINDX@(NAME))
- +4 IF 'ESN
- Begin DoDot:1
- +5 SET ESN=1+$ORDER(@ESUBS@(""),-1)
- +6 SET @ESUBS@(ESN)=NAME
- +7 SET @EINDX@(NAME)=ESN
- End DoDot:1
- +8 ;Determine repetition number
- +9 SET REP=1+$ORDER(@DATA@(ESN,""),-1)
- +10 ;Add element subscript number and repetition number to output array
- +11 SET TMPREF=$$OREF^DILF(DATA)
- +12 SET TMPREF=TMPREF_ESN_","_REP_","
- +13 SET DATA=$$CREF^DILF(TMPREF)
- +14 ;Store attributes
- +15 SET ATTR=""
- +16 FOR
- SET ATTR=$ORDER(ATTRLIST(ATTR))
- if ATTR=""
- QUIT
- Begin DoDot:1
- +17 ;Get attribute subscript number
- +18 SET ASN=+$GET(@AINDX@(ESN,ATTR))
- +19 IF 'ASN
- Begin DoDot:2
- +20 SET ASN=1+$ORDER(@ASUBS@(""),-1)
- +21 SET @ASUBS@(ESN,ASN)=ATTR
- +22 SET @AINDX@(ESN,ATTR)=ASN
- End DoDot:2
- +23 ;Store value
- +24 SET @DATA@("A",ASN)=ATTRLIST(ATTR)
- End DoDot:1
- +25 QUIT
- +26 ;
- ENDLMNT(NAME) ;End element
- +1 NEW TMPREF,SUBCNT,SUBCHK
- +2 ;Remove element subscript number and repition number from output array
- +3 SET TMPREF=$$OREF^DILF(DATA)
- +4 SET SUBCNT=$LENGTH(TMPREF,",")
- +5 SET SUBCHK=SUBCNT-3
- +6 IF SUBCHK>0
- SET TMPREF=$PIECE(TMPREF,",",1,SUBCHK)_","
- +7 IF SUBCHK<1
- SET TMPREF=$PIECE(TMPREF,"(",1)_"("
- +8 SET DATA=$$CREF^DILF(TMPREF)
- +9 QUIT
- +10 ;
- CHARS(TEXT) ;Non-markup content
- +1 ;Store element value
- +2 SET @DATA@("V")=TEXT
- +3 QUIT
- +4 ;
- PI(TARGET,TEXT) ;Processing instruction
- +1 QUIT
- +2 ;
- EXTERN(SYSID,PUBID,GLOBAL) ;External entity reference
- +1 QUIT
- +2 ;
- NOTATION(NAME,SYSID,PUBID) ;Notation declaration
- +1 QUIT
- +2 ;
- +1 QUIT
- +2 ;
- ERROR(ERR) ;Error
- +1 QUIT
- +2 ;
- UNESC(TEXT) ;Convert escaped characters
- +1 ;Assumes TEXT is not corrupt
- +2 NEW ESCBEG,ESCEND,ESCTXT,ESCCHAR,OUTPUT
- +3 SET TEXT=$GET(TEXT)
- +4 IF TEXT=""
- QUIT TEXT
- +5 IF TEXT'["&"
- QUIT TEXT
- +6 ;Do conversion
- +7 SET OUTPUT=""
- +8 FOR
- if TEXT'["&"
- QUIT
- Begin DoDot:1
- +9 ;Find escaped character
- +10 SET ESCBEG=$FIND(TEXT,"&")
- +11 SET ESCEND=$FIND(TEXT,";",ESCBEG)
- +12 SET ESCTXT=$EXTRACT(TEXT,ESCBEG,ESCEND-2)
- +13 ;Convert escaped character
- +14 SET ESCCHAR=""
- +15 IF ESCTXT="amp"
- SET ESCCHAR="&"
- +16 IF ESCTXT="lt"
- SET ESCCHAR="<"
- +17 IF ESCTXT="gt"
- SET ESCCHAR=">"
- +18 IF ESCTXT="apos"
- SET ESCCHAR="'"
- +19 IF ESCTXT="quot"
- SET ESCCHAR=$CHAR(34)
- +20 ;Replace escaped character with actual character
- +21 SET OUTPUT=OUTPUT_$EXTRACT(TEXT,1,ESCBEG-2)_ESCCHAR
- +22 ;Continue processing rest of string
- +23 SET TEXT=$EXTRACT(TEXT,ESCEND,$LENGTH(TEXT))
- End DoDot:1
- +24 ;Add on remainder of text
- +25 SET OUTPUT=OUTPUT_TEXT
- +26 QUIT OUTPUT