1 /**
2 The contents of this file are subject to the Mozilla Public License Version 1.1
3 (the "License"); you may not use this file except in compliance with the License.
4 You may obtain a copy of the License at http://www.mozilla.org/MPL/
5 Software distributed under the License is distributed on an "AS IS" basis,
6 WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
7 specific language governing rights and limitations under the License.
8
9 The Initial Developer of the Original Code is University Health Network. Copyright (C)
10 2001. All Rights Reserved.
11
12 Contributor(s): ______________________________________.
13
14 Alternatively, the contents of this file may be used under the terms of the
15 GNU General Public License (the �GPL�), in which case the provisions of the GPL are
16 applicable instead of those above. If you wish to allow use of your version of this
17 file only under the terms of the GPL and not to allow others to use your version
18 of this file under the MPL, indicate your decision by deleting the provisions above
19 and replace them with the notice and other provisions required by the GPL License.
20 If you do not delete the provisions above, a recipient may use your version of
21 this file under either the MPL or the GPL.
22
23 */
24 package ca.uhn.hl7v2.parser;
25
26 import java.io.File;
27 import java.io.FileReader;
28 import java.util.ArrayList;
29 import java.util.HashSet;
30 import java.util.List;
31 import java.util.Set;
32
33 import org.slf4j.Logger;
34 import org.slf4j.LoggerFactory;
35 import org.w3c.dom.DOMException;
36 import org.w3c.dom.Document;
37 import org.w3c.dom.Element;
38 import org.w3c.dom.Node;
39 import org.w3c.dom.NodeList;
40
41 import ca.uhn.hl7v2.HL7Exception;
42 import ca.uhn.hl7v2.HapiContext;
43 import ca.uhn.hl7v2.model.Group;
44 import ca.uhn.hl7v2.model.Message;
45 import ca.uhn.hl7v2.model.Segment;
46 import ca.uhn.hl7v2.model.Structure;
47 import ca.uhn.hl7v2.util.XMLUtils;
48 import ca.uhn.hl7v2.validation.impl.NoValidation;
49 import ca.uhn.hl7v2.validation.impl.ValidationContextFactory;
50
51 /**
52 * <p>A default XMLParser. This class assigns segment elements (in an XML-encoded message)
53 * to Segment objects (in a Message object) using the name of a segment and the names
54 * of any groups in which the segment is nested. The names of group classes must correspond
55 * to the names of group elements (they must be identical except that a dot in the element
56 * name, following the message name, is replaced with an underscore, in order to consitute a
57 * valid class name). </p>
58 * <p>At the time of writing, the group names in the XML spec are changing. Many of the group
59 * names have been automatically generated based on the group contents. However, these automatic
60 * names are gradually being replaced with manually assigned names. This process is expected to
61 * be complete by November 2002. As a result, mismatches are likely. Messages could be
62 * transformed prior to parsing (using XSLT) as a work-around. Alternatively the group class names
63 * could be changed to reflect updates in the XML spec. Ultimately, HAPI group classes will be
64 * changed to correspond with the official group names, once these are all assigned. </p>
65 *
66 * @see ParserConfiguration for configuration options which may affect parser encoding and decoding behaviour
67 * @author Bryan Tripp
68 */
69 public class DefaultXMLParser extends XMLParser {
70
71 private static final Logger log = LoggerFactory.getLogger(DefaultXMLParser.class);
72
73 private static final Set<String> ourForceGroupNames;
74
75 static {
76 ourForceGroupNames = new HashSet<String>();
77 ourForceGroupNames.add("DIET");
78 }
79
80 public DefaultXMLParser() {
81 super();
82 }
83
84 public DefaultXMLParser(HapiContext context) {
85 super(context);
86 }
87
88 /**
89 * Creates a new instance of DefaultXMLParser
90 *
91 * @param theFactory custom factory to use for model class lookup
92 */
93 public DefaultXMLParser(ModelClassFactory theFactory) {
94 super(theFactory);
95 }
96
97 /**
98 * <p>Creates an XML Document that corresponds to the given Message object. </p>
99 * <p>If you are implementing this method, you should create an XML Document, and insert XML Elements
100 * into it that correspond to the groups and segments that belong to the message type that your subclass
101 * of XMLParser supports. Then, for each segment in the message, call the method
102 * <code>encode(Segment segmentObject, Element segmentElement)</code> using the Element for
103 * that segment and the corresponding Segment object from the given Message.</p>
104 */
105 public Document encodeDocument(Message source) throws HL7Exception {
106 String messageClassName = source.getClass().getName();
107 String messageName = messageClassName.substring(messageClassName.lastIndexOf('.') + 1);
108 try {
109 Document doc = XMLUtils.emptyDocument(messageName);
110 //Element root = doc.createElement(messageName);
111 //doc.appendChild(root);
112 encode(source, doc.getDocumentElement());
113 return doc;
114 } catch (Exception e) {
115 throw new HL7Exception(
116 "Can't create XML document - " + e.getClass().getName(), e);
117 }
118 }
119
120 /**
121 * Copies data from a group object into the corresponding group element, creating any
122 * necessary child nodes.
123 */
124 private void encode(Group groupObject, Element groupElement) throws HL7Exception {
125 String[] childNames = groupObject.getNames();
126 String messageName = groupObject.getMessage().getName();
127
128 try {
129 for (String name : childNames) {
130 Structure[] reps = groupObject.getAll(name);
131 for (Structure rep : reps) {
132 String elementName = makeGroupElementName(messageName, name);
133 Element childElement;
134 try {
135 childElement = groupElement.getOwnerDocument().createElement(elementName);
136 } catch (DOMException e) {
137 throw new HL7Exception(
138 "Can't encode element " + elementName + " in group " + groupObject.getClass().getName(), e);
139 }
140 groupElement.appendChild(childElement);
141 if (rep instanceof Group) {
142 encode((Group) rep, childElement);
143 }
144 else if (rep instanceof Segment) {
145 encode((Segment) rep, childElement);
146 }
147 }
148 }
149 } catch (DOMException e) {
150 throw new HL7Exception(
151 "Can't encode group " + groupObject.getClass().getName(), e);
152 }
153 }
154
155 /**
156 * <p>Creates and populates a Message object from an XML Document that contains an XML-encoded HL7 message.</p>
157 * <p>The easiest way to implement this method for a particular message structure is as follows:
158 * <ol><li>Create an instance of the Message type you are going to handle with your subclass
159 * of XMLParser</li>
160 * <li>Go through the given Document and find the Elements that represent the top level of
161 * each message segment. </li>
162 * <li>For each of these segments, call <code>parse(Segment segmentObject, Element segmentElement)</code>,
163 * providing the appropriate Segment from your Message object, and the corresponding Element.</li></ol>
164 * At the end of this process, your Message object should be populated with data from the XML
165 * Document.</p>
166 * @throws HL7Exception if the message is not correctly formatted.
167 * @throws EncodingNotSupportedException if the message encoded
168 * is not supported by this parser.
169 */
170 public Message parseDocument(Document XMLMessage, String version) throws HL7Exception {
171 String messageName = XMLMessage.getDocumentElement().getTagName();
172 Message message = instantiateMessage(messageName, version, true);
173 // Note: this will change in future to reuse the Parser's/HapiContext's
174 // ValidationContext.
175 message.setValidationContext(getValidationContext());
176 parse(message, XMLMessage.getDocumentElement());
177 return message;
178 }
179
180 /**
181 * Populates the given group object with data from the given group element, ignoring
182 * any unrecognized nodes.
183 */
184 private void parse(Group groupObject, Element groupElement) throws HL7Exception {
185 String[] childNames = groupObject.getNames();
186 String messageName = groupObject.getMessage().getName();
187
188 NodeList allChildNodes = groupElement.getChildNodes();
189 List<String> unparsedElementList = new ArrayList<String>();
190 for (int i = 0; i < allChildNodes.getLength(); i++) {
191 Node node = allChildNodes.item(i);
192 String name = node.getNodeName();
193 if (node.getNodeType() == Node.ELEMENT_NODE && !unparsedElementList.contains(name)) {
194 unparsedElementList.add(name);
195 }
196 }
197
198 //we're not too fussy about order here (all occurrences get parsed as repetitions) ...
199 for (String nextChildName : childNames) {
200 String childName = nextChildName;
201 if(groupObject.isGroup(nextChildName)) {
202 childName = makeGroupElementName(groupObject.getMessage().getName(), nextChildName);
203 }
204 unparsedElementList.remove(childName);
205
206 // 4 char segment names are second occurrences of a segment within a single message
207 // structure. e.g. the second PID segment in an A17 patient swap message is known
208 // to hapi's code represenation as PID2
209 if (nextChildName.length() == 4 && Character.isDigit(nextChildName.charAt(3))) {
210 log.trace("Skipping rep segment: {}", nextChildName);
211 } else {
212 parseReps(groupElement, groupObject, messageName, nextChildName, nextChildName);
213 }
214 }
215
216 for (String segName : unparsedElementList) {
217 String segIndexName = groupObject.addNonstandardSegment(segName);
218 parseReps(groupElement, groupObject, messageName, segName, segIndexName);
219 }
220 }
221
222 //param childIndexName may have an integer on the end if >1 sibling with same name (e.g. NTE2)
223 private void parseReps(Element groupElement, Group groupObject,
224 String messageName, String childName, String childIndexName) throws HL7Exception {
225
226 String groupName = makeGroupElementName(messageName, childName);
227 List<Element> reps = getChildElementsByTagName(groupElement, groupName);
228 log.trace("# of elements matching {}: {}", groupName, reps.size());
229
230 if (groupObject.isRepeating(childIndexName)) {
231 for (int i = 0; i < reps.size(); i++) {
232 parseRep(reps.get(i), groupObject.get(childIndexName, i));
233 }
234 } else {
235 if (reps.size() > 0) {
236 parseRep(reps.get(0), groupObject.get(childIndexName, 0));
237 }
238
239 // if (reps.size() > 1) {
240 // String newIndexName = groupObject.addNonstandardSegment(childName);
241 // for (int i = 1; i < reps.size(); i++) {
242 // parseRep((Element) reps.get(i), groupObject.get(newIndexName, i-1));
243 // }
244 // }
245 if (reps.size() > 1) {
246 String newIndexName;
247 int i=1;
248 try {
249 for (i = 1; i < reps.size(); i++) {
250 newIndexName = childName+(i+1);
251 Structure st = groupObject.get(newIndexName);
252 parseRep(reps.get(i), st);
253 }
254 } catch(Throwable t) {
255 log.info("Issue Parsing: " + t);
256 newIndexName = groupObject.addNonstandardSegment(childName);
257 for (int j = i; j < reps.size(); j++) {
258 parseRep(reps.get(j), groupObject.get(newIndexName, j-i));
259 }
260 }
261 }
262
263 }
264 }
265
266 private void parseRep(Element theElem, Structure theObj) throws HL7Exception {
267 if (theObj instanceof Group) {
268 parse((Group) theObj, theElem);
269 }
270 else if (theObj instanceof Segment) {
271 parse((Segment) theObj, theElem);
272 }
273 log.trace("Parsed element: {}", theElem.getNodeName());
274 }
275
276 //includes direct children only
277 private List<Element> getChildElementsByTagName(Element theElement, String theName) {
278 List<Element> result = new ArrayList<Element>(10);
279 NodeList children = theElement.getChildNodes();
280
281 for (int i = 0; i < children.getLength(); i++) {
282 Node child = children.item(i);
283 if (child.getNodeType() == Node.ELEMENT_NODE && child.getNodeName().equals(theName)) {
284 result.add((Element)child);
285 }
286 }
287
288 return result;
289 }
290
291 /**
292 * Given the name of a group element in an XML message, returns the corresponding
293 * group class name. This name is identical except in order to be a valid class
294 * name, the dot character immediately following the message name is replaced with
295 * an underscore. For example, there is a group element called ADT_A01.INSURANCE and the
296 * corresponding group Class is called ADT_A01_INSURANCE.
297 */
298 // protected static String makeGroupClassName(String elementName) {
299 // return elementName.replace('.', '_');
300 // }
301
302 /**
303 * Given the name of a message and a Group class, returns the corresponding group element name in an
304 * XML-encoded message. This is the message name and group name separated by a dot. For example,
305 * ADT_A01.INSURANCE.
306 *
307 * If it looks like a segment name (i.e. has 3 characters), no change is made.
308 */
309 protected static String makeGroupElementName(String messageName, String className) {
310 String ret;
311
312 if (className.length() > 4 || ourForceGroupNames.contains(className)) {
313 StringBuilder elementName = new StringBuilder();
314 elementName.append(messageName);
315 elementName.append('.');
316 elementName.append(className);
317 ret = elementName.toString();
318 } else if (className.length() == 4) {
319 // It is not clear why this case is needed.. We should figure out
320 // why it was added, since removing it or optimizing its use would
321 // prevent the need for "ourForGroupNames" above
322 ret = className.substring(0,3);
323 } else {
324 ret = className;
325 }
326
327 return ret;
328 }
329
330 /** Test harness */
331 public static void main(String args[]) {
332 if (args.length != 1) {
333 System.out.println("Usage: DefaultXMLParser pipe_encoded_file");
334 System.exit(1);
335 }
336
337 //read and parse message from file
338 try {
339 File messageFile = new File(args[0]);
340 long fileLength = messageFile.length();
341 FileReader r = new FileReader(messageFile);
342 char[] cbuf = new char[(int) fileLength];
343 System.out.println("Reading message file ... " + r.read(cbuf) + " of " + fileLength + " chars");
344 r.close();
345 String messString = String.valueOf(cbuf);
346
347 Parser inParser = null;
348 Parser outParser = null;
349 PipeParser pp = new PipeParser();
350 ca.uhn.hl7v2.parser.XMLParser xp = new DefaultXMLParser();
351 System.out.println("Encoding: " + pp.getEncoding(messString));
352 if (pp.getEncoding(messString) != null) {
353 inParser = pp;
354 outParser = xp;
355 }
356 else if (xp.getEncoding(messString) != null) {
357 inParser = xp;
358 outParser = pp;
359 }
360
361 Message mess = inParser.parse(messString);
362 System.out.println("Got message of type " + mess.getClass().getName());
363
364 String otherEncoding = outParser.encode(mess);
365 System.out.println(otherEncoding);
366 }
367 catch (Exception e) {
368 e.printStackTrace();
369 }
370 }
371
372 /**
373 * {@inheritDoc}
374 */
375 @Override
376 public void parse(Message theMessage, String theString) throws HL7Exception {
377 Document doc = parseStringIntoDocument(theString);
378 parse(theMessage, doc.getDocumentElement());
379
380 applySuperStructureName(theMessage);
381 }
382
383 /**
384 * Convenience factory method which returns an instance that has a
385 * {@link NoValidation NoValidation validation context}.
386 */
387 public static DefaultXMLParser getInstanceWithNoValidation() {
388 DefaultXMLParser retVal = new DefaultXMLParser();
389 retVal.setValidationContext(ValidationContextFactory.noValidation());
390 return retVal;
391 }
392
393 }