Coverage Report - ca.uhn.hl7v2.preparser.XML
 
Classes in this File Line Coverage Branch Coverage Complexity
XML
37%
13/35
33%
2/6
5.733
XML$HL7MessageHandler
84%
116/138
68%
71/104
5.733
XML$StopParsingException
0%
0/2
N/A
5.733
 
 1  
 package ca.uhn.hl7v2.preparser;
 2  
 
 3  
 import java.io.IOException;
 4  
 import java.util.ArrayList;
 5  
 import java.util.Collection;
 6  
 import java.util.Iterator;
 7  
 import java.util.List;
 8  
 import java.util.Map;
 9  
 import java.util.Properties;
 10  
 import java.util.SortedMap;
 11  
 import java.util.TreeMap;
 12  
 
 13  
 import javax.xml.parsers.ParserConfigurationException;
 14  
 import javax.xml.parsers.SAXParser;
 15  
 import javax.xml.parsers.SAXParserFactory;
 16  
 
 17  
 import org.xml.sax.Attributes;
 18  
 import org.xml.sax.InputSource;
 19  
 import org.xml.sax.SAXException;
 20  
 import org.xml.sax.SAXParseException;
 21  
 import org.xml.sax.helpers.DefaultHandler;
 22  
 
 23  
 import ca.uhn.hl7v2.HL7Exception;
 24  
 
 25  0
 public class XML
 26  
 {
 27  
         @SuppressWarnings("serial")
 28  
         protected static class StopParsingException extends SAXException
 29  
         {
 30  
                 public StopParsingException() 
 31  
                 {
 32  0
                         super("ca.uhn.hl7.....StopParsingException");
 33  0
                 }
 34  
         }
 35  
 
 36  
         /** the SAXParser reports parsing events to an object of this class.
 37  
         We keep track of some parsing state, and the Properties object that 
 38  
         we're supposed to write our data to.
 39  
         */
 40  0
         static protected class HL7MessageHandler extends DefaultHandler 
 41  
         {
 42  
                 /* m_props & m_msgMask should be set by the user of this handler before
 43  
                 they pass this handler to SAXParser.parse() or whatever */
 44  
 
 45  
                 /** The data that is found while parsing, and which passes m_msgMask, 
 46  
                 will be dumped to m_props, as (DatumPath.toString() / text) key/value
 47  
                 pairs */
 48  7
                 public Properties m_props = null;
 49  
 
 50  
                 /** Specifies what parts of a message should be dumped to m_props. 
 51  
                 */
 52  7
                 public Collection<DatumPath> m_msgMask = null;
 53  
 
 54  
                 /* All other fields are parser state. */
 55  
 
 56  7
                 protected boolean m_startedDocument = false;
 57  
 
 58  
                 /* m_msgID / m_curPath together keep track of where we are in the document.
 59  
 
 60  
                 If m_msgID.length() != 0, then we're within the message element.  (We're only
 61  
                 expecting one message per document.)  Then m_msgID will be the name of the 
 62  
                 message.  ("ACK" or whatever).  
 63  
 
 64  
                 m_curPath keeps track of where within the message we are.  See notes at 
 65  
                 DatumPath class definition.  If m_curPath.size() != 0, then we must be 
 66  
                 within a message.
 67  
 
 68  
                 At any point in the code below: 
 69  
 
 70  
                 if m_msgID.length() == 0, 
 71  
                         then m_curPath().size() == 0
 72  
 
 73  
                 if m_curPath.length()  != 0
 74  
                         then m_msgID.length() != 0
 75  
                 
 76  
                 Note that our DatumPaths count indices starting from 0 (not 1) -- they're 
 77  
                 only converted to 1-based in the string representations that wind up 
 78  
                 as m_props keys.
 79  
                 */
 80  7
                 StringBuffer m_msgID = new StringBuffer();
 81  7
                 DatumPath m_curPath = new DatumPath();
 82  
 
 83  
                 /* the location in the document of the last datum we dumped to m_props. */
 84  7
                 DatumPath m_lastDumpedPath = new DatumPath();
 85  
 
 86  
                 /** For handling repeat segments.   segmentID (String) -> next repeat idx
 87  
                 (Integer).  So when we hit a segment ZYX, we'll know how many times we've
 88  
                 hit a ZYX before, and set the segmentRepIdx part of m_curPath
 89  
                 appropriately. */
 90  7
                 SortedMap<String, Integer> m_segmentId2nextRepIdx = new TreeMap<String, Integer>();
 91  
 
 92  
                 /* m_depthWithinUselessElement and m_depthWithinUsefulElement 
 93  
                 reflect what m_msgMask thinks about our location in the document at any
 94  
                 given time.  
 95  
 
 96  
                 Both should always be >= -1.  Note that both can be >= 0 at the same time
 97  
                 -- explained in a minute....
 98  
 
 99  
                 If m_depthWithinUsefulElement >= 0, this means that we are however deep
 100  
                 (in terms of nested elements: 0 => just within) within an area of the
 101  
                 message that passes m_msgMask.  We should should dump whatever we find
 102  
                 there to m_props.  As we move around within such an element, we will still
 103  
                 update m_curPath appropriately.
 104  
 
 105  
                 If m_depthWithinUsefulElement >= 0, we are however deep within an element
 106  
                 which either made no sense (eg. <ZZZ.1> where we were expecting a <ZYX.1>
 107  
                 -- a few other things maybe), or more importantly that we're within an
 108  
                 element that otherwise has no hope of having any useful elements within it
 109  
                 according to m_msgMask.  (eg. m_msgMask says it wants only ZYX segment
 110  
                 contents, we're in an <MSH>).  So we can safely ignore all content within,
 111  
                 and just keep track of how deep we are within this useless element (with
 112  
                 m_depthWithinUselessElement, of course.)  We don't update m_curPath when
 113  
                 m_depthWithinUselessElement >= 0, there's no point and how would we
 114  
                 extract information for the DatumPath out of nonsensical element names
 115  
                 anyway.
 116  
 
 117  
                 If they are both >= 0, this means that there we've found some useless
 118  
                 stuff (nonsensical element names?) within a known-useful element.
 119  
                 */
 120  7
                 int m_depthWithinUsefulElement = -1, m_depthWithinUselessElement = -1;
 121  
 
 122  
                 /* With this we keep the text that we've found within a certain element.
 123  
                 It's cleared whenever we enter a (sub) element or leave an element. */
 124  7
                 StringBuffer m_chars = new StringBuffer(10);
 125  
 
 126  
                 public HL7MessageHandler()
 127  7
                 {
 128  7
                         this.clear();
 129  7
                 }
 130  
 
 131  
                 void clear()
 132  
                 {
 133  
                         // reset the state (m_props & m_msgMask are not state)
 134  14
                         m_startedDocument = false;
 135  14
                         m_msgID.delete(0, m_msgID.length());
 136  14
                         m_curPath.clear();
 137  
                         // will always be "less than" (according to DatumPath.numbersLessThan)
 138  
                         // any sensible DatumPath: 
 139  14
                         m_lastDumpedPath.clear().add(new String()).add(-42).add(-42).add(-42).add(-42).add(-42);
 140  14
                         m_segmentId2nextRepIdx.clear();
 141  14
                         m_depthWithinUsefulElement = -1;
 142  14
                         m_depthWithinUselessElement = -1;
 143  14
                         m_chars.delete(0, m_chars.length());
 144  14
                 }
 145  
 
 146  
                 public void startDocument() throws SAXException
 147  
                 {
 148  7
                         boolean ok = false;
 149  7
                         if(!m_startedDocument && (m_props != null)) {
 150  7
                                 m_startedDocument = true;
 151  7
                                 ok = true;
 152  
                         }
 153  
 
 154  7
                         if(!ok) {
 155  0
                                 clear();
 156  0
                                 throw new StopParsingException();
 157  
                         }
 158  7
                 }
 159  
 
 160  
                 public void endDocument() throws SAXException
 161  
                 {
 162  7
                         boolean ok = false;
 163  7
                         if(m_startedDocument) {
 164  7
                                 this.clear();
 165  7
                                 ok = true;
 166  
                         }
 167  
 
 168  7
                         if(!ok) {
 169  0
                                 clear();
 170  0
                                 throw new StopParsingException();
 171  
                         }
 172  7
                 }
 173  
 
 174  
                 public void startElement(String uri, String localName, String qName, 
 175  
                                 Attributes attributes) throws SAXException 
 176  
                 {
 177  
                         //System.err.println("startelem: " + qName + " curpathsize; " +
 178  
                         //m_curPath.size());
 179  391
                         boolean ok = false;
 180  391
                         if(m_startedDocument) {
 181  
                                 // A single unit of text data will be within a single element, 
 182  
                                 // -- none of it will be in sub-elements and there will be no 
 183  
                                 // sub-elements fragmenting the data text.
 184  
                                 // Right now we're entering a new element: this means that anything
 185  
                                 // in m_chars will be whitespace (likely), or text left over from, 
 186  
                                 // say, the last field, or text that was somewhere it shouldn't have been.
 187  
                                 // (ex. "<ZYX.9> shouldn't be here <PT.1> P </PT.1> </ZYX.9>"
 188  391
                                 m_chars.delete(0, m_chars.length());
 189  
 
 190  391
                                 if(m_depthWithinUselessElement >= 0) {
 191  42
                                         ++m_depthWithinUselessElement;
 192  
                                 }
 193  
                                 else {
 194  349
                                         int oldCurPathSize = m_curPath.size();
 195  349
                                         if(tryToGrowDocLocationFromElementName(m_msgID, m_curPath, 
 196  
                                                 m_segmentId2nextRepIdx, m_lastDumpedPath, qName)) 
 197  
                                         {
 198  328
                                                 if(m_curPath.size() > oldCurPathSize) {
 199  
                                                         // assert (m_depthWithinUselessElement == -1) // m_curPath
 200  
                                                         // should not have grown if we're within a useless element.
 201  313
                                                         if(m_depthWithinUsefulElement == -1) {
 202  
                                                                 // this new element could match one of the DatumPaths in
 203  
                                                                 // m_msgMask -- if that's the case, we've just entered a
 204  
                                                                 // useful element.
 205  
                                                                 // TODO: functional stylee (a la C++'s std::accumulate) ? 
 206  248
                                                                 boolean curPathStartsWithAMaskElem = false;
 207  248
                                                                 for(Iterator<DatumPath> maskIt = m_msgMask.iterator(); 
 208  496
                                                                         !curPathStartsWithAMaskElem && maskIt.hasNext(); )
 209  
                                                                 {
 210  248
                                                                         curPathStartsWithAMaskElem 
 211  
                                                                                 = m_curPath.startsWith(maskIt.next());
 212  
                                                                 }
 213  
 
 214  248
                                                                 if(curPathStartsWithAMaskElem) 
 215  248
                                                                         m_depthWithinUsefulElement = 0;
 216  
                                                                 else {
 217  
                                                                         // so this element we're entering is not specified by m_msgMask
 218  
                                                                         // to be useful -- but might it contains elements that
 219  
                                                                         // are?
 220  0
                                                                         boolean aMaskElemStartsWithCurPath = false;
 221  0
                                                                         for(Iterator<DatumPath> maskIt = m_msgMask.iterator(); 
 222  0
                                                                                 !aMaskElemStartsWithCurPath && maskIt.hasNext(); )
 223  
                                                                         {
 224  0
                                                                                 aMaskElemStartsWithCurPath 
 225  
                                                                                         = maskIt.next().startsWith(m_curPath);
 226  
                                                                         }
 227  
 
 228  0
                                                                         if(!aMaskElemStartsWithCurPath) {
 229  
                                                                                 // ... nope!  useless.
 230  0
                                                                                 m_depthWithinUselessElement = 0;
 231  0
                                                                                 m_curPath.setSize(oldCurPathSize);
 232  
                                                                         } // else => ok, carry on, m_depthWithinUse{less,ful}Element
 233  
                                                                         // still both -1.
 234  
                                                                 }
 235  248
                                                         }
 236  
                                                         // else => already within a useful element, don't need to compare 
 237  
                                                         // against m_msgMask.
 238  
                                                 }
 239  
                                         }
 240  
                                         else
 241  21
                                                 m_depthWithinUselessElement = 0;
 242  
                                 }
 243  391
                                 ok = true;
 244  
                         }
 245  
 
 246  391
                         if(!ok) {
 247  0
                                 clear();
 248  0
                                 throw new StopParsingException();
 249  
                         }
 250  391
                 }
 251  
 
 252  
                 /* doc location == msgID & curPath together.  
 253  
                 If we've encountered an element called "elementNam", then this tries 
 254  
                 to determine what it is, based on what we already know about the document.
 255  
                 returns true if we can make sense of this new element name given the
 256  
                 position we're at (represented by msgID / curPath), 
 257  
                 false if we can't (which probably means this should be a useless element). 
 258  
                 returning true doesn't mean that we actually changed msgID or curPath, it
 259  
                 might mean that we just passed through a segment group element OK.
 260  
                 */
 261  
                 protected static boolean tryToGrowDocLocationFromElementName(
 262  
                         StringBuffer msgID /*in/out*/, DatumPath curPath /*in/out*/, 
 263  
                         Map<String, Integer> segmentId2nextRepIdx /*in/out*/, DatumPath lastDumpedPath /*in*/, 
 264  
                         String elementName /*in*/)
 265  
                 {
 266  349
                         boolean ok = false; // ok == can we make sense of this new element?
 267  
                         // hmm ... where are we in the document: 
 268  349
                         if((msgID.length() == 0) && (curPath.size() == 0)) {
 269  
                                 // we're entering a message
 270  7
                                 msgID.replace(0, msgID.length(), elementName);
 271  7
                                 segmentId2nextRepIdx.clear();
 272  7
                                 ok = true;
 273  
                         }
 274  342
                         else if((msgID.length() > 0) && (curPath.size() == 0)) {
 275  
                                 // we're entering either a segment-group element (eg. <ADT_A01.PROCEDURE>)
 276  
                                 // or an actual segment element.
 277  133
                                 if(!(elementName.startsWith("" + msgID + '.'))) {
 278  
                                         // must be an actual segment.
 279  125
                                         curPath.add(elementName);
 280  
 
 281  125
                                         if(segmentId2nextRepIdx.containsKey(elementName)) 
 282  16
                                                 curPath.add(segmentId2nextRepIdx.get(elementName));
 283  
                                         else
 284  109
                                                 curPath.add(new Integer(0));
 285  
 
 286  125
                                         segmentId2nextRepIdx.put(elementName, ((Integer)curPath.get(curPath.size()-1)).intValue() + 1);
 287  
                                 }
 288  133
                                 ok = true;
 289  
                         }
 290  209
                         else if((msgID.length() > 0) && (curPath.size() > 0)) {
 291  
                                 // we're entering a field or a component or a subcomponent.
 292  209
                                 if(curPath.size() == 2) { // we're entering a field element
 293  
                                         // all fields should start with segment-ID + '.' 
 294  105
                                         if(elementName.startsWith("" + curPath.get(0) + '.')) {
 295  
                                                 try {
 296  105
                                                         int fieldIdxFromElementName 
 297  
                                                                 = Integer.parseInt(elementName.substring(elementName.indexOf('.') + 1));
 298  
 
 299  105
                                                         curPath.add(new Integer(fieldIdxFromElementName));
 300  
 
 301  
                                                         // now add the repetition idx to curPath: 
 302  105
                                                         if((lastDumpedPath.size() >= 4) 
 303  
                                                                 && (((Integer)lastDumpedPath.get(2)).intValue() 
 304  
                                                                         == fieldIdxFromElementName))
 305  
                                                         {
 306  
                                                                 // lastDumpedPath has a fieldIdx and a fieldRepIdx.
 307  0
                                                                 curPath.add(new Integer(((Integer)lastDumpedPath.get(3)).intValue() + 1));
 308  
                                                         }
 309  
                                                         else
 310  105
                                                                 curPath.add(new Integer(0));
 311  
 
 312  105
                                                         ok = true;
 313  105
                                                 } catch(NumberFormatException e) {}
 314  
                                         } // else => this isn't a field -- must be useless.
 315  
                                 }
 316  104
                                 else if((curPath.size() == 4) || (curPath.size() == 5)) {
 317  
                                         // we're entering a component or subcomponent element
 318  
                                         try {
 319  104
                                                 int idxFromElementName 
 320  
                                                         = Integer.parseInt(elementName.substring(elementName.indexOf('.') + 1));
 321  83
                                                 curPath.add(new Integer(idxFromElementName));
 322  83
                                                 ok = true;
 323  83
                                         } catch(NumberFormatException e) {}
 324  
                                 }
 325  
                         }
 326  349
                         return ok;
 327  
                 }
 328  
 
 329  
                 public void endElement(String uri, String localName, String qName) 
 330  
                         throws SAXException 
 331  
                 {
 332  
                         //System.err.println("endElement: " + qName);
 333  391
                         boolean ok = false;
 334  391
                         if(m_startedDocument) {
 335  391
                                 if(m_depthWithinUselessElement >= 0) {
 336  63
                                         --m_depthWithinUselessElement;
 337  63
                                         ok = true;
 338  
                                 }
 339  
                                 else {
 340  328
                                         if((m_msgID.length() > 0) && (m_curPath.size() == 0)) {
 341  
                                                 // we're exiting either a message element or a 
 342  
                                                 // segment group element.
 343  15
                                                 if((""+qName).compareTo(""+m_msgID) == 0)
 344  7
                                                         m_msgID.delete(0, m_msgID.length()); // => exiting message element
 345  
                                                 // else => segment group element -- do nothing.
 346  
 
 347  15
                                                 ok = true;
 348  
                                         }
 349  313
                                         else if((m_msgID.length() > 0) && (m_curPath.size() > 0)) {
 350  313
                                                 tryToDumpDataToProps();
 351  
 
 352  313
                                                 if(m_curPath.size() == 2) {
 353  
                                                         // exiting a segment element
 354  125
                                                         m_curPath.setSize(0);
 355  125
                                                         ok = true;
 356  
                                                 }
 357  188
                                                 else if(m_curPath.size() == 4) {
 358  
                                                         // exiting a field element 
 359  105
                                                         m_curPath.setSize(2);
 360  105
                                                         ok = true;
 361  
                                                 }
 362  83
                                                 else if((m_curPath.size() == 5) || (m_curPath.size() == 6)) {
 363  
                                                         // exiting a component or a subcomponent
 364  83
                                                         m_curPath.setSize(m_curPath.size() - 1);
 365  83
                                                         ok = true;
 366  
                                                 }
 367  
                                         }
 368  
 
 369  328
                                         if(m_depthWithinUsefulElement >= 0) 
 370  248
                                                 --m_depthWithinUsefulElement;
 371  
                                 }
 372  
                         }
 373  
 
 374  391
                         if(!ok) {
 375  0
                                 clear();
 376  0
                                 throw new StopParsingException();
 377  
                         }
 378  391
                 }
 379  
 
 380  
                 /** try to dump whatever we've got in m_chars to m_props, 
 381  
                 with a key of m_curPath.toString(). 
 382  
                 */
 383  
                 protected void tryToDumpDataToProps()
 384  
                 {
 385  313
                         if((m_curPath.size() >= 2) && (m_depthWithinUselessElement == -1)) {
 386  
                                 /* m_curPath.toString() will be the property key whose value will be
 387  
                                 m_chars.
 388  
 
 389  
                                 This is (part of) what m_lastDumpedPath is for: With, for example "<ZYX.9>
 390  
                                 <PT.1>P</PT.1> </ZYX.9>" we might have had a m_curPath containing something
 391  
                                 like [ZYX, 0, 9, 0, 0] when we exited the PT.1 element.  (note: internal
 392  
                                 DatumPath elements are 0-indexed, string representations of DatumPaths and
 393  
                                 the XML text is 1-indexed.)  So in m_props the key for "P" would have been
 394  
                                 "ZYX[0]-9[0]-1-1".  (the last "-1" is a default that got added by
 395  
                                 toString()).
 396  
                                 
 397  
                                 Then we would have exited the PT.3 element, changed m_curPath to [ZYX, 0,
 398  
                                 9, 0], picked up the whitespace between </PT.3> and </ZYX.9>, and when
 399  
                                 exiting the ZYX.9 element, we might have written that whitespace to m_props
 400  
                                 with a key of the toString() of [ZYX, 0, 9, 0]; that is, "ZYX[0]-9[0]-1-1":
 401  
                                 the same as the key for the "P" ... clobbering "P" in m_props with
 402  
                                 whitespace.
 403  
 
 404  
                                 But since we know that HL7 fields / components / etc are always in order
 405  
                                 (numerically), we can count on m_lastDumpedPath and use
 406  
                                 DatumPath.numbersLessThan to avoid the clobbering.
 407  
                                 */
 408  313
                                 if((m_lastDumpedPath.get(0).equals(m_curPath.get(0))) 
 409  
                                                 ? (m_lastDumpedPath.numbersLessThan(m_curPath)) 
 410  
                                                 : true)
 411  
                                 {
 412  248
                                         if(m_depthWithinUsefulElement >= 0) {
 413  
                                                 // TODO: remove!  or assert 
 414  248
                                                 if(m_props.containsKey("" + m_curPath)) 
 415  0
                                                         System.err.println("ALAAAARM: CLOBBERING PROPERTY in " + getClass());
 416  
 
 417  248
                                                 m_props.setProperty("" + m_curPath, "" + m_chars);
 418  248
                                                 m_lastDumpedPath.copy(m_curPath);
 419  248
                                                 m_chars.delete(0, m_chars.length());
 420  
                                         }
 421  
                                 }
 422  
                         }
 423  313
                 }
 424  
 
 425  
                 public void characters(char[] chars, int start, int length)
 426  
                 {
 427  
                         // note that a contiguous run of characters in the document 
 428  
                         // might get reported to us in several chunks. 
 429  
                         // (In the order that the text appears in the document, 
 430  
                         // non-overlapping and with no gaps between chunks.) 
 431  
                         // An entity like &amp; will reach us as an actual & character.
 432  
                         
 433  560
                         if((m_msgID.length() > 0) && (m_curPath.size() >= 4)) {
 434  302
                                 m_chars.append(chars, start, length);
 435  
                         }
 436  560
                 }
 437  
 
 438  
                 public void ignoreableWhitespace(char []chars, int start, int length)
 439  
                 {
 440  
                         // it's unclear which whitespace is considered ignorable for us.  
 441  
                         // what the heck, add it to m_chars. 
 442  0
                         characters(chars, start, length);
 443  0
                 }
 444  
 
 445  
                 public void error(SAXParseException e)
 446  
                 {
 447  
                         // TODO: remove.
 448  0
                         System.err.println("Error in " + getClass() + ": " + e);
 449  0
                 }
 450  
 
 451  
                 public void fatalError(SAXParseException e) throws SAXException 
 452  
                 {
 453  0
                         throw e;
 454  
                 }
 455  
         }
 456  
 
 457  
         /** parse message according to our HL7 XML handler, and dump the data found
 458  
         to props.  
 459  
         
 460  
         returns true if we parsed ok, which means well-formed XML, and
 461  
         that's about it.  We just barely check against HL7 structure, and ignore any
 462  
         elements / text that is unexpected (that is, impossible in any HL7 message:
 463  
         independant of any message / segment definitions).
 464  
 
 465  
         "message" should be an XML document with one top-level element -- that being
 466  
         the message.  (<ACK> or whatever).  We're only expecting one message to be in
 467  
         "message".
 468  
 
 469  
         props can be null if you don't want the data (we still parse).  The message
 470  
         data found in message (that passes msgMask) will be added to props as key /
 471  
         value pairs with the key a toString() of the appropriate DatumPath for the
 472  
         location where the data is found (i.e. in the ZYX[a]-b[c]-d-e style), and
 473  
         the value the corresponding text.  So, after calling parseMessage
 474  
         successfully, if you wanted to retrieve the message data from props you
 475  
         might call something like 
 476  
         props.getProperty((new DatumPath()).add("MSH").add(1).toString())
 477  
         and that would return a String with "|", probably.
 478  
 
 479  
         Note that this package facilitates the extraction of message data in a way
 480  
         independent of message version (i.e. components and whatever getting added):
 481  
 
 482  
         With a message of "<FOO><ZYX><ZYX.42>fieldy-field-field</ZYX.42></ZYX></FOO>",
 483  
         "ZYX[0]-1[0]-1-1" will be the key that ends up in props (see notes at
 484  
         DatumPath.toString())
 485  
 
 486  
         So if you, coding for a future version of the FOO message but
 487  
         recieving old-version message data, tried
 488  
         props.getProperty((new DatumPath()).add("ZYX").add(0).add(42).add(0).add(1).toString()) 
 489  
         with the message above (that is, trying to extract a repetition and
 490  
         component that aren't there), you would get "ZYX[0]-42[0]-1-1" mapping to 
 491  
         "fieldy-field-field" in the resulting props.  
 492  
 
 493  
         If the message was
 494  
         "<FOO><ZYX><ZYX.42><ARG.1>component data</ARG.1></ZYX.42></ZYX></FOO>"
 495  
         and you, coding for an old version of this FOO message but recieving
 496  
         new-version FOO message data, tried 
 497  
         props.getProperty((new DatumPath()).add("ZYX").add(0).add(42).toString())
 498  
         you would get "ZYX[0]-42[0]-1-1" mapping to "component data" in the resulting 
 499  
         props.
 500  
 
 501  
         msgMask lets you specify which parts of the message you want dumped to props.
 502  
         Passing in null gets you everything.  Otherwise, msgMask's elements should
 503  
         all be DatumPaths (! => ClassCastException), and a particular part of the
 504  
         message will be dumped to props only if it's location, as represented by a
 505  
         DatumPath, startsWith (as in DatumPath.startsWith()) at least one element of
 506  
         msgMask.  So if one element of msgMask was a (new DatumPath()).add(new
 507  
         String("ZYX")), then everything in all ZYX segment would get dumped to props.
 508  
         A (new DatumPath()).add(new String("ZYX")).add(1) would get only the first
 509  
         repetitions of same (if there is one) dumped to props.  etc. etc.  Note that
 510  
         a DatumPath of size() == 0 in msgMask will get you everything, no matter what
 511  
         the other elements of msgMask are, because all DatumPaths startsWith the
 512  
         zero-length DatumPath.
 513  
 
 514  
         Segment group elements (eg. ADT_A01.PROCEDURE) are handled fine, but they
 515  
         aren't addressed in msgMask or in the output in props -- basically any
 516  
         element tags at the level immediately inside the message element, and having
 517  
         a name that starts with the message element name + '.', is ignored (meaning
 518  
         it's contents are dealt with the same as if the start and end tags' just 
 519  
         wasn't there.)
 520  
         */
 521  
         public static boolean parseMessage(Properties props, String message, 
 522  
                         Collection<DatumPath> msgMask) throws HL7Exception
 523  
         {
 524  7
                 boolean ret = false;
 525  
                 try {
 526  7
                         SAXParserFactory factory = SAXParserFactory.newInstance();
 527  7
                         SAXParser parser = factory.newSAXParser();
 528  
 
 529  7
                         InputSource inSrc = new InputSource(new java.io.StringReader(message));
 530  
 
 531  7
                         HL7MessageHandler handler = new HL7MessageHandler();
 532  7
                         handler.m_props = (props != null 
 533  
                                 ? props : new Properties()); // it's expecting a props.
 534  
 
 535  7
                         if(msgMask != null)
 536  0
                                 handler.m_msgMask = msgMask;
 537  
                         else {
 538  7
                                 handler.m_msgMask = new ArrayList<DatumPath>();
 539  7
                                 handler.m_msgMask.add(new DatumPath());
 540  
                         }
 541  
 
 542  7
                         parser.parse(inSrc, handler);
 543  7
                         ret = true;
 544  0
         } catch (ParserConfigurationException e) {
 545  0
             throw new HL7Exception(e);
 546  0
         } catch (IOException e) {
 547  0
             throw new HL7Exception(e);
 548  0
         } catch (StopParsingException e) {
 549  0
             throw new HL7Exception(e);
 550  0
         } catch (SAXException e) {
 551  0
             throw new HL7Exception(e);
 552  7
         }
 553  
 
 554  7
                 return ret;
 555  
         }
 556  
 
 557  
         public static void main(String args[]) 
 558  
         {
 559  0
                 if(args.length >= 1) {
 560  0
                         Properties props = new Properties();
 561  0
                         List<DatumPath> msgMask = new ArrayList<DatumPath>();
 562  0
                         msgMask.add(new DatumPath().add("MSH").add(0).add(9));
 563  
                         //msgMask.add(new DatumPath());
 564  
                         boolean parseret;
 565  
             try {
 566  0
                 parseret = XML.parseMessage(props, args[0], msgMask);
 567  0
                 System.err.println("parseMessage returned " + parseret);
 568  0
             } catch (HL7Exception e) {
 569  0
                 e.printStackTrace();
 570  0
             }
 571  0
                         props.list(System.err);
 572  
                 }
 573  0
         }
 574  
 }
 575