View Javadoc

1   /**
2   The contents of this file are subject to the Mozilla Public License Version 1.1
3   (the "License"); you may not use this file except in compliance with the License.
4   You may obtain a copy of the License at http://www.mozilla.org/MPL/
5   Software distributed under the License is distributed on an "AS IS" basis,
6   WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
7   specific language governing rights and limitations under the License.
8   
9   The Initial Developer of the Original Code is University Health Network. Copyright (C)
10  2001.  All Rights Reserved.
11  
12  Contributor(s): ______________________________________.
13  
14  Alternatively, the contents of this file may be used under the terms of the
15  GNU General Public License (the  �GPL�), in which case the provisions of the GPL are
16  applicable instead of those above.  If you wish to allow use of your version of this
17  file only under the terms of the GPL and not to allow others to use your version
18  of this file under the MPL, indicate your decision by deleting  the provisions above
19  and replace  them with the notice and other provisions required by the GPL License.
20  If you do not delete the provisions above, a recipient may use your version of
21  this file under either the MPL or the GPL.
22  
23  */
24  package ca.uhn.hl7v2.parser;
25  
26  import java.io.File;
27  import java.io.FileReader;
28  import java.util.ArrayList;
29  import java.util.HashSet;
30  import java.util.List;
31  import java.util.Set;
32  
33  import org.slf4j.Logger;
34  import org.slf4j.LoggerFactory;
35  import org.w3c.dom.DOMException;
36  import org.w3c.dom.Document;
37  import org.w3c.dom.Element;
38  import org.w3c.dom.Node;
39  import org.w3c.dom.NodeList;
40  
41  import ca.uhn.hl7v2.HL7Exception;
42  import ca.uhn.hl7v2.HapiContext;
43  import ca.uhn.hl7v2.model.Group;
44  import ca.uhn.hl7v2.model.Message;
45  import ca.uhn.hl7v2.model.Segment;
46  import ca.uhn.hl7v2.model.Structure;
47  import ca.uhn.hl7v2.util.XMLUtils;
48  import ca.uhn.hl7v2.validation.impl.NoValidation;
49  import ca.uhn.hl7v2.validation.impl.ValidationContextFactory;
50  
51  /**
52   * <p>A default XMLParser.  This class assigns segment elements (in an XML-encoded message) 
53   * to Segment objects (in a Message object) using the name of a segment and the names 
54   * of any groups in which the segment is nested.  The names of group classes must correspond
55   * to the names of group elements (they must be identical except that a dot in the element 
56   * name, following the message name, is replaced with an underscore, in order to consitute a 
57   * valid class name). </p>
58   * <p>At the time of writing, the group names in the XML spec are changing.  Many of the group 
59   * names have been automatically generated based on the group contents.  However, these automatic 
60   * names are gradually being replaced with manually assigned names.  This process is expected to 
61   * be complete by November 2002.  As a result, mismatches are likely.  Messages could be  
62   * transformed prior to parsing (using XSLT) as a work-around.  Alternatively the group class names 
63   * could be changed to reflect updates in the XML spec.  Ultimately, HAPI group classes will be 
64   * changed to correspond with the official group names, once these are all assigned.  </p>
65   * 
66   * @see ParserConfiguration for configuration options which may affect parser encoding and decoding behaviour
67   * @author Bryan Tripp
68   */
69  public class DefaultXMLParser extends XMLParser {
70  
71      private static final Logger log = LoggerFactory.getLogger(DefaultXMLParser.class);
72  
73      private static final Set<String> ourForceGroupNames;
74      
75      static {
76      	ourForceGroupNames = new HashSet<String>();
77      	ourForceGroupNames.add("DIET");
78      }
79      
80      public DefaultXMLParser() {
81      	super();
82      }
83      
84      public DefaultXMLParser(HapiContext context) {
85  		super(context);
86  	}
87  
88  	/** 
89       * Creates a new instance of DefaultXMLParser 
90       *  
91       * @param theFactory custom factory to use for model class lookup 
92       */
93      public DefaultXMLParser(ModelClassFactory theFactory) {
94      	super(theFactory);
95      }
96      
97      /**
98       * <p>Creates an XML Document that corresponds to the given Message object. </p>
99       * <p>If you are implementing this method, you should create an XML Document, and insert XML Elements
100      * into it that correspond to the groups and segments that belong to the message type that your subclass
101      * of XMLParser supports.  Then, for each segment in the message, call the method
102      * <code>encode(Segment segmentObject, Element segmentElement)</code> using the Element for
103      * that segment and the corresponding Segment object from the given Message.</p>
104      */
105     public Document encodeDocument(Message source) throws HL7Exception {
106         String messageClassName = source.getClass().getName();
107         String messageName = messageClassName.substring(messageClassName.lastIndexOf('.') + 1);
108         try {
109             Document doc = XMLUtils.emptyDocument(messageName);
110             //Element root = doc.createElement(messageName);
111             //doc.appendChild(root);
112             encode(source, doc.getDocumentElement());
113             return doc;
114         } catch (Exception e) {
115             throw new HL7Exception(
116                 "Can't create XML document - " + e.getClass().getName(), e);
117         }
118     }
119 
120     /**
121      * Copies data from a group object into the corresponding group element, creating any 
122      * necessary child nodes.  
123      */
124     private void encode(Group groupObject, Element groupElement) throws HL7Exception {
125         String[] childNames = groupObject.getNames();
126         String messageName = groupObject.getMessage().getName();
127         
128         try {
129         	for (String name : childNames) {
130                 Structure[] reps = groupObject.getAll(name);
131                 for (Structure rep : reps) {
132                     String elementName = makeGroupElementName(messageName, name);
133 					Element childElement;
134 					try {
135 						childElement = groupElement.getOwnerDocument().createElement(elementName);
136 			        } catch (DOMException e) {
137 			            throw new HL7Exception(
138 			                "Can't encode element " + elementName + " in group " + groupObject.getClass().getName(), e);
139 			        }
140                     groupElement.appendChild(childElement);
141                     if (rep instanceof Group) {
142                         encode((Group) rep, childElement);
143                     }
144                     else if (rep instanceof Segment) {
145                         encode((Segment) rep, childElement);
146                     }
147 				}
148             }
149         } catch (DOMException e) {
150             throw new HL7Exception(
151                 "Can't encode group " + groupObject.getClass().getName(), e);
152         }
153     }
154 
155     /**
156      * <p>Creates and populates a Message object from an XML Document that contains an XML-encoded HL7 message.</p>
157      * <p>The easiest way to implement this method for a particular message structure is as follows:
158      * <ol><li>Create an instance of the Message type you are going to handle with your subclass
159      * of XMLParser</li>
160      * <li>Go through the given Document and find the Elements that represent the top level of
161      * each message segment. </li>
162      * <li>For each of these segments, call <code>parse(Segment segmentObject, Element segmentElement)</code>,
163      * providing the appropriate Segment from your Message object, and the corresponding Element.</li></ol>
164      * At the end of this process, your Message object should be populated with data from the XML
165      * Document.</p>
166      * @throws HL7Exception if the message is not correctly formatted.
167      * @throws EncodingNotSupportedException if the message encoded
168      *     is not supported by this parser.
169      */
170     public Message parseDocument(Document XMLMessage, String version) throws HL7Exception {
171         String messageName = XMLMessage.getDocumentElement().getTagName();
172         Message message = instantiateMessage(messageName, version, true);
173     	// Note: this will change in future to reuse the Parser's/HapiContext's
174     	// ValidationContext.
175         message.setValidationContext(getValidationContext());
176         parse(message, XMLMessage.getDocumentElement());
177         return message;
178     }
179 
180     /**
181      * Populates the given group object with data from the given group element, ignoring 
182      * any unrecognized nodes.  
183      */
184     private void parse(Group groupObject, Element groupElement) throws HL7Exception {
185         String[] childNames = groupObject.getNames();
186         String messageName = groupObject.getMessage().getName();
187         
188         NodeList allChildNodes = groupElement.getChildNodes();
189         List<String> unparsedElementList = new ArrayList<String>();
190         for (int i = 0; i < allChildNodes.getLength(); i++) {
191             Node node = allChildNodes.item(i);
192             String name = node.getNodeName();
193             if (node.getNodeType() == Node.ELEMENT_NODE && !unparsedElementList.contains(name)) {
194                 unparsedElementList.add(name);                
195             }
196         }
197         
198         //we're not too fussy about order here (all occurrences get parsed as repetitions) ... 
199         for (String nextChildName : childNames) {
200             String childName = nextChildName;
201             if(groupObject.isGroup(nextChildName)) {
202             	childName = makeGroupElementName(groupObject.getMessage().getName(), nextChildName);
203             }
204 			unparsedElementList.remove(childName);
205             
206             // 4 char segment names are second occurrences of a segment within a single message
207             // structure. e.g. the second PID segment in an A17 patient swap message is known
208             // to hapi's code represenation as PID2
209             if (nextChildName.length() == 4 && Character.isDigit(nextChildName.charAt(3))) {
210             	log.trace("Skipping rep segment: {}", nextChildName);
211             } else {   
212             	parseReps(groupElement, groupObject, messageName, nextChildName, nextChildName);
213             }
214         }
215         
216         for (String segName : unparsedElementList) {
217             String segIndexName = groupObject.addNonstandardSegment(segName);
218             parseReps(groupElement, groupObject, messageName, segName, segIndexName);
219         }
220     }
221     
222     //param childIndexName may have an integer on the end if >1 sibling with same name (e.g. NTE2) 
223     private void parseReps(Element groupElement, Group groupObject, 
224             String messageName, String childName, String childIndexName) throws HL7Exception {
225         
226     	String groupName = makeGroupElementName(messageName, childName);
227         List<Element> reps = getChildElementsByTagName(groupElement, groupName);
228         log.trace("# of elements matching {}: {}", groupName, reps.size());
229 
230 		if (groupObject.isRepeating(childIndexName)) {
231 			for (int i = 0; i < reps.size(); i++) {
232 				parseRep(reps.get(i), groupObject.get(childIndexName, i));
233 			}        			        
234 		} else {
235 			if (reps.size() > 0) {
236 				parseRep(reps.get(0), groupObject.get(childIndexName, 0));				
237 			}
238 
239 //			if (reps.size() > 1) {			
240 //				String newIndexName = groupObject.addNonstandardSegment(childName);			
241 //				for (int i = 1; i < reps.size(); i++) {
242 //					parseRep((Element) reps.get(i), groupObject.get(newIndexName, i-1));
243 //				}        			        			
244 //			}
245 			if (reps.size() > 1) {
246 				String newIndexName;
247 				int i=1;
248 				try	{
249 					for (i = 1; i < reps.size(); i++) {
250 						newIndexName = childName+(i+1);
251 						Structure st = groupObject.get(newIndexName);
252 						parseRep(reps.get(i), st);
253 					}
254 				} catch(Throwable t) {
255 					log.info("Issue Parsing: " + t);
256 					newIndexName = groupObject.addNonstandardSegment(childName);
257 					for (int j = i; j < reps.size(); j++) {
258 						parseRep(reps.get(j), groupObject.get(newIndexName, j-i));
259 					}
260 				}
261 			}
262 			
263 		}
264     }
265     
266     private void parseRep(Element theElem, Structure theObj) throws HL7Exception {
267 		if (theObj instanceof Group) {
268 			parse((Group) theObj, theElem);
269 		}
270 		else if (theObj instanceof Segment) {
271 			parse((Segment) theObj, theElem);
272 		}                
273 		log.trace("Parsed element: {}", theElem.getNodeName());    	
274     }
275     
276     //includes direct children only
277     private List<Element> getChildElementsByTagName(Element theElement, String theName) {
278     	List<Element> result = new ArrayList<Element>(10);
279     	NodeList children = theElement.getChildNodes();
280     	
281     	for (int i = 0; i < children.getLength(); i++) {
282     		Node child = children.item(i);
283     		if (child.getNodeType() == Node.ELEMENT_NODE && child.getNodeName().equals(theName)) {
284     			result.add((Element)child);
285     		}
286     	}
287     	
288     	return result; 
289     }
290     
291     /** 
292      * Given the name of a group element in an XML message, returns the corresponding 
293      * group class name.  This name is identical except in order to be a valid class 
294      * name, the dot character immediately following the message name is replaced with 
295      * an underscore.  For example, there is a group element called ADT_A01.INSURANCE and the 
296      * corresponding group Class is called ADT_A01_INSURANCE. 
297      */
298 //    protected static String makeGroupClassName(String elementName) {
299 //        return elementName.replace('.', '_');
300 //    }
301 
302     /** 
303      * Given the name of a message and a Group class, returns the corresponding group element name in an 
304      * XML-encoded message.  This is the message name and group name separated by a dot. For example, 
305      * ADT_A01.INSURANCE.
306      * 
307      * If it looks like a segment name (i.e. has 3 characters), no change is made. 
308      */
309     protected static String makeGroupElementName(String messageName, String className) {
310         String ret;
311         
312         if (className.length() > 4 || ourForceGroupNames.contains(className)) {
313             StringBuilder elementName = new StringBuilder();
314             elementName.append(messageName);
315             elementName.append('.');
316             elementName.append(className);
317             ret = elementName.toString();
318         } else if (className.length() == 4) {
319             // It is not clear why this case is needed.. We should figure out
320         	// why it was added, since removing it or optimizing its use would
321         	// prevent the need for "ourForGroupNames" above
322         	ret = className.substring(0,3);
323         } else {
324             ret = className;
325         }
326         
327         return ret;
328     }
329 
330     /** Test harness */
331     public static void main(String args[]) {
332         if (args.length != 1) {
333             System.out.println("Usage: DefaultXMLParser pipe_encoded_file");
334             System.exit(1);
335         }
336 
337         //read and parse message from file 
338         try {
339             File messageFile = new File(args[0]);
340             long fileLength = messageFile.length();
341             FileReader r = new FileReader(messageFile);
342             char[] cbuf = new char[(int) fileLength];
343             System.out.println("Reading message file ... " + r.read(cbuf) + " of " + fileLength + " chars");
344             r.close();
345             String messString = String.valueOf(cbuf);
346 
347             Parser inParser = null;
348             Parser outParser = null;
349             PipeParser pp = new PipeParser();
350             ca.uhn.hl7v2.parser.XMLParser xp = new DefaultXMLParser();
351             System.out.println("Encoding: " + pp.getEncoding(messString));
352             if (pp.getEncoding(messString) != null) {
353                 inParser = pp;
354                 outParser = xp;
355             }
356             else if (xp.getEncoding(messString) != null) {
357                 inParser = xp;
358                 outParser = pp;
359             }
360 
361             Message mess = inParser.parse(messString);
362             System.out.println("Got message of type " + mess.getClass().getName());
363 
364             String otherEncoding = outParser.encode(mess);
365             System.out.println(otherEncoding);
366         }
367         catch (Exception e) {
368             e.printStackTrace();
369         }
370     }
371 
372     /**
373      * {@inheritDoc}
374      */
375 	@Override
376 	public void parse(Message theMessage, String theString) throws HL7Exception {
377 		Document doc = parseStringIntoDocument(theString);
378         parse(theMessage, doc.getDocumentElement());
379 
380         applySuperStructureName(theMessage);
381 	}
382 
383     /**
384      * Convenience factory method which returns an instance that has a 
385      * {@link NoValidation NoValidation validation context}. 
386      */
387     public static DefaultXMLParser getInstanceWithNoValidation() {
388         DefaultXMLParser retVal = new DefaultXMLParser();
389         retVal.setValidationContext(ValidationContextFactory.noValidation());
390         return retVal;
391     }
392 
393 }