View Javadoc

1   package ca.uhn.hl7v2.parser;
2   
3   import java.util.Collections;
4   import java.util.HashSet;
5   import java.util.Set;
6   
7   import ca.uhn.hl7v2.HapiContext;
8   import ca.uhn.hl7v2.model.GenericMessage;
9   import ca.uhn.hl7v2.model.Varies;
10  import ca.uhn.hl7v2.util.Terser;
11  import ca.uhn.hl7v2.util.idgenerator.FileBasedHiLoGenerator;
12  import ca.uhn.hl7v2.util.idgenerator.IDGenerator;
13  import ca.uhn.hl7v2.validation.ValidationContext;
14  
15  /**
16   * Contains
17   * 
18   * @see HapiContext#getParserConfiguration()
19   * 
20   */
21  public class ParserConfiguration {
22  
23  	/**
24  	 * @link {@link UnexpectedSegmentBehaviourEnum#ADD_INLINE}
25  	 */
26  	// NB if you change the default, edit the javadoc for the enum itself
27  	public static final UnexpectedSegmentBehaviourEnum DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR = UnexpectedSegmentBehaviourEnum.ADD_INLINE;
28  
29  	private boolean allowUnknownVersions;
30  	private boolean escapeSubcomponentDelimiterInPrimitive = false;
31  	private IDGenerator idGenerator = new FileBasedHiLoGenerator();
32  	private String myDefaultObx2Type;
33  	private boolean myEncodeEmptyMandatorySegments = true;
34  	private Set<String> myForcedEncode = new HashSet<String>();
35  	private String myInvalidObx2Type;
36  	private UnexpectedSegmentBehaviourEnum myUnexpectedSegmentBehaviour;
37  	private boolean nonGreedyMode = false;
38  	private boolean prettyPrintWhenEncodingXml = true;
39  	private boolean validating = true;
40  
41  	/**
42  	 * <p>
43  	 * Forces the parser to encode certain segments/fields, even if they contain
44  	 * no content. This method may be called multiple times with multiple path
45  	 * definitions, and each path definition contains the path to the segment or
46  	 * field which needs to be forced.
47  	 * </p>
48  	 * <p>
49  	 * Path definitions are similar in format to {@link Terser Terser} paths.
50  	 * They contain a slash-separated lookup path to reach a given segment, and
51  	 * optionally a field number. The following are examples of paths which
52  	 * could be added here, as well as the sample output for an otherwise empty
53  	 * ORU^R01 message:
54  	 * </p>
55  	 * <table cellpadding="2" cellspacing="2" border="0">
56  	 * <thead>
57  	 * <tr>
58  	 * <th style="background: #FFA0FF;">Forced Encode Path</th>
59  	 * <th style="background: #FFA0FF;">Encode Output</th>
60  	 * </tr>
61  	 * </thead>
62  	 * <tr>
63  	 * <td>None (for illustration purposes)</td>
64  	 * <td style=" font-family: monospace;">
65  	 * MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4</td>
66  	 * </tr>
67  	 * <tr>
68  	 * <td style="background: #E0E0E0;">PATIENT_RESULT/ORDER_OBSERVATION/ORC</td>
69  	 * <td style="background: #E0E0E0; font-family: monospace;">
70  	 * MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
71  	 * ORC|</td>
72  	 * </tr>
73  	 * <tr>
74  	 * <td>PATIENT_RESULT/ORDER_OBSERVATION/ORC-4</td>
75  	 * <td style=" font-family: monospace;">
76  	 * MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
77  	 * ORC||||</td>
78  	 * </tr>
79  	 * <tr>
80  	 * <td style="background: #E0E0E0;">PATIENT_RESULT/ORDER_OBSERVATION/ORC-4-2
81  	 * </td>
82  	 * <td style="background: #E0E0E0; font-family: monospace;">
83  	 * MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
84  	 * ORC||||^</td>
85  	 * </tr>
86  	 * </table>
87  	 * <p>
88  	 * While empty segments do not generally have any meaning according to HL7,
89  	 * this may be useful when transmitting to systems which rely on segments
90  	 * being received even if they have no content.
91  	 * </p>
92  	 * <p>
93  	 * Note that this configuration item currently only applies to
94  	 * {@link PipeParser}
95  	 * </p>
96  	 *
97       * @param theForcedEncode path definition
98  	 * @since 2.0
99  	 */
100 	public void addForcedEncode(String theForcedEncode) {
101 		if (theForcedEncode == null) {
102 			throw new NullPointerException("forced encode may not be null");
103 		}
104 
105 		int lastSlashIndex = theForcedEncode.lastIndexOf('/');
106 		lastSlashIndex = Math.max(lastSlashIndex, 0);
107 
108 		if (lastSlashIndex == 0) {
109 			if (!theForcedEncode.matches("[A-Z0-9]{3}(-[0-9]+){0,2}$")) {
110 				throw new IllegalArgumentException("Definition must end with a segment name or field lookup, e.g. MSH or MSH-2");
111 			}
112 		} else {
113 			if (lastSlashIndex == theForcedEncode.length() || !theForcedEncode.substring(lastSlashIndex + 1).matches("[A-Z0-9]{3}(-[0-9]+){0,2}$")) {
114 				throw new IllegalArgumentException("Definition must end with a segment name or field lookup, e.g. MSH or MSH-2");
115 			}
116 		}
117 		myForcedEncode.add(theForcedEncode);
118 	}
119 
120 	boolean determineForcedEncodeIncludesTerserPath(String theTerserPath) {
121 		for (String next : getForcedEncode()) {
122 			if (next.startsWith(theTerserPath)) {
123 				return true;
124 			}
125 		}
126 		return false;
127 	}
128 
129 	int determineForcedFieldNumForTerserPath(String theCurrentTerserPath) {
130 		int forceUpToFieldNum = 0;
131 		for (String nextPath : getForcedEncode()) {
132 			if (nextPath.startsWith(theCurrentTerserPath) && nextPath.length() > theCurrentTerserPath.length()) {
133 				int endOfFieldDef = nextPath.indexOf('-', theCurrentTerserPath.length() + 1);
134 				if (endOfFieldDef == -1) {
135 					endOfFieldDef = nextPath.length();
136 				}
137 				String fieldNumString = nextPath.substring(theCurrentTerserPath.length() + 1, endOfFieldDef);
138 				forceUpToFieldNum = Math.max(forceUpToFieldNum, Integer.parseInt(fieldNumString));
139 			}
140 		}
141 		return forceUpToFieldNum;
142 	}
143 
144 	/**
145 	 * Returns the default datatype ("ST", "NM", etc) for an OBX segment with a
146 	 * missing OBX-2 value
147 	 * 
148 	 * @return Returns the default datatype ("ST", "NM", etc) for an OBX segment
149 	 *         with a missing OBX-2 value
150 	 * @see #setDefaultObx2Type(String)
151 	 */
152 	public String getDefaultObx2Type() {
153 		return myDefaultObx2Type;
154 	}
155 
156 	/**
157 	 * @return Returns the forced encode strings added by
158 	 *         {@link #addForcedEncode(String)}
159 	 * 
160 	 * @see #addForcedEncode(String)
161 	 * @since 1.3
162 	 */
163 	public Set<String> getForcedEncode() {
164 		return Collections.unmodifiableSet(myForcedEncode);
165 	}
166 
167 	/**
168 	 * @return the ID Generator to be used for generating IDs for new messages
169 	 */
170 	public IDGenerator getIdGenerator() {
171 		return idGenerator;
172 	}
173 
174 	/**
175 	 * Returns the value provides a default datatype ("ST", "NM", etc) for an
176 	 * OBX segment with an invalid OBX-2 value.
177 	 * 
178 	 * @return Returns the value provides a default datatype ("ST", "NM", etc)
179 	 *         for an OBX segment with an invalid OBX-2 value.
180 	 * @see #setInvalidObx2Type(String)
181 	 */
182 	public String getInvalidObx2Type() {
183 		return myInvalidObx2Type;
184 	}
185 
186 	/**
187 	 * Returns the behaviour to use when parsing a message and a nonstandard
188 	 * segment is found. Default is
189 	 * {@link #DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR}
190      *
191      * @return the behaviour to use when a nonstandard egment is found
192 	 */
193 	public UnexpectedSegmentBehaviourEnum getUnexpectedSegmentBehaviour() {
194 		if (myUnexpectedSegmentBehaviour == null) {
195 			myUnexpectedSegmentBehaviour = DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR;
196 		}
197 		return myUnexpectedSegmentBehaviour;
198 	}
199 
200 	/**
201 	 * If set to <code>true</code> (default is <code>false</code>) the parser
202 	 * will allow messages to parse, even if they contain a version which is not
203 	 * known to the parser. When operating in this mode, if a message arrives
204 	 * with an unknown version string, the parser will attempt to parse it using
205 	 * a {@link GenericMessage Generic Message} class instead of a specific HAPI
206 	 * structure class. Default is <code>false</code>.
207      *
208      * @return true if parsing messages with unknown versions is allowed
209 	 */
210 	public boolean isAllowUnknownVersions() {
211 		return this.allowUnknownVersions;
212 	}
213 
214 	/**
215      * Returns <code>true</code> if empty segments should still be encoded
216      * if they are mandatory within their message structure.  Default is <code>false</code>.
217 	 * @return <code>true</code> if empty segments should still be encoded
218      *
219 	 * @see #setEncodeEmptyMandatoryFirstSegments(boolean)
220 	 */
221 	public boolean isEncodeEmptyMandatorySegments() {
222 		return myEncodeEmptyMandatorySegments;
223 	}
224 
225 	/**
226      * Returns code>true</code> if subcomponent delimiters in OBX-5 shall be
227      *         ignored. Default is <code>false</code>.
228 	 * @return <code>true</code> if subcomponent delimiters in OBX-5 shall be
229 	 *         ignored
230 	 */
231 	public boolean isEscapeSubcomponentDelimiterInPrimitive() {
232 		return escapeSubcomponentDelimiterInPrimitive;
233 	}
234 
235 	/**
236 	 * Returns <code>true</code> if the parser should parse in non-greedy mode. Default
237 	 * is <code>false</code>
238 	 * 
239 	 * @see #setNonGreedyMode(boolean) for an explanation of non-greedy mode
240 	 */
241 	public boolean isNonGreedyMode() {
242 		return nonGreedyMode;
243 	}
244 
245 	/**
246 	 * If set to <code>true</code> (which is the default), {@link XMLParser XML Parsers}
247 	 * will attempt to pretty-print the XML they generate. This means the messages will look
248 	 * nicer to humans, but may take up slightly more space/bandwidth.
249 	 */
250 	public boolean isPrettyPrintWhenEncodingXml() {
251 		return prettyPrintWhenEncodingXml;
252 	}
253 
254 	/**
255      * Returns <code>true</code> if the parser validates using a configured
256      *         {@link ValidationContext}. Default is <code>true</code>.
257 	 * @return <code>true</code> if the parser validates using a configured
258 	 *         {@link ValidationContext}
259 	 */
260 	public boolean isValidating() {
261 		return validating;
262 	}
263 
264 	/**
265 	 * Removes a forced encode entry
266 	 *
267      * @param theForcedEncode path definition to be removed
268 	 * @see #addForcedEncode(String)
269 	 * @since 1.3
270 	 */
271 	public void removeForcedEncode(String theForcedEncode) {
272 		if (theForcedEncode == null) {
273 			throw new NullPointerException("forced encode may not be null");
274 		}
275 
276 		myForcedEncode.remove(theForcedEncode);
277 	}
278 
279 	/**
280 	 * If set to <code>true</code> (default is <code>false</code>) the parser
281 	 * will allow messages to parse, even if they contain a version which is not
282 	 * known to the parser. When operating in this mode, if a message arrives
283 	 * with an unknown version string, the parser will attempt to parse it using
284 	 * a {@link GenericMessage Generic Message} class instead of a specific HAPI
285 	 * structure class.
286      *
287      * @param theAllowUnknownVersions true if parsing unknown versions shall be allowed
288 	 */
289 	public void setAllowUnknownVersions(boolean theAllowUnknownVersions) {
290 		allowUnknownVersions = theAllowUnknownVersions;
291 	}
292 
293 	/**
294 	 * <p>
295 	 * If this property is set, the value provides a default datatype ("ST",
296 	 * "NM", etc) for an OBX segment with a missing OBX-2 value. This is useful
297 	 * when parsing messages from systems which do not correctly populate OBX-2.
298 	 * </p>
299 	 * <p>
300 	 * For example, if this property is set to "ST", and the following OBX
301 	 * segment is encountered:
302 	 * 
303 	 * <pre>
304 	 * OBX|||||This is a value
305 	 * </pre>
306 	 * 
307 	 * It will be parsed as though it had read:
308 	 * 
309 	 * <pre>
310 	 * OBX||ST|||This is a value
311 	 * </pre>
312 	 * 
313 	 * </p>
314 	 * <p>
315 	 * Note that this configuration can also be set globally using the system
316 	 * property {@link Varies#DEFAULT_OBX2_TYPE_PROP}, but any value provided to
317 	 * {@link ParserConfiguration} takes priority over the system property.
318 	 * </p>
319 	 * 
320 	 * @param theDefaultObx2Type
321 	 *            If this property is set, the value provides a default datatype
322 	 *            ("ST", "NM", etc) for an OBX segment with a missing OBX-2
323 	 *            value
324 	 * @see #setInvalidObx2Type(String)
325 	 * @see Varies#INVALID_OBX2_TYPE_PROP
326 	 */
327 	public void setDefaultObx2Type(String theDefaultObx2Type) {
328 		myDefaultObx2Type = theDefaultObx2Type;
329 	}
330 
331 	/**
332 	 * <p>
333 	 * If set to <code>true</code> (default is <code>true</code>), when encoding
334 	 * a group using the PipeParser where the first segment is required, but no
335 	 * data has been populated in that segment, the empty segment is now still
336 	 * encoded if needed as a blank segment in order to give parsers a hint
337 	 * about which group subsequent segments are in. This helps to ensure that
338 	 * messages can be "round tripped", meaning that a message which is parsed,
339 	 * encoded, and then re-parsed should contain exactly the same structure
340 	 * from beginning to end.
341 	 * </p>
342 	 * <p>
343 	 * </p>
344 	 * For example, in an ORU^R01 message with a populated OBX segment, but no
345 	 * data in the mandatory OBR segment which begins the ORDER_OBSERVATION
346 	 * group the message would still contain an empty OBR segment when encoded:
347 	 * 
348 	 * <pre>
349 	 * 	MSH|^~\&|REG|W|||201103230042||ORU^R01|32153168|P|2.5
350 	 * 	OBR|
351 	 * 	OBX||ST|||Value Data
352 	 * </pre>
353 	 * 
354 	 * Previously, the following encoding would have occurred, which would have
355 	 * incorrectly been parsed as having a custom OBX segment instead of having
356 	 * a normal ORDER_OBSERVATION group:
357 	 * 
358 	 * <pre>
359 	 * 	MSH|^~\&|REG|W|||201103230042||ORU^R01|32153168|P|2.5
360 	 * 	OBX||ST|||Value Data
361 	 * </pre>
362 	 * 
363 	 * @param theEncodeEmptyMandatorySegments
364 	 *            If set to <code>true</code> (default is <code>true</code>),
365 	 *            when encoding a group using the PipeParser where the first
366 	 *            segment is required, but no data has been populated in that
367 	 *            segment, the empty segment is now still encoded if needed as a
368 	 *            blank segment in order to give parsers a hint about which
369 	 *            group subsequent segments are in
370 	 */
371 	public void setEncodeEmptyMandatoryFirstSegments(boolean theEncodeEmptyMandatorySegments) {
372 		myEncodeEmptyMandatorySegments = theEncodeEmptyMandatorySegments;
373 	}
374 
375 	/**
376 	 * Set to <code>true</code> if subcomponent delimiters in OBX-5 shall be
377 	 * ignored
378      * @param escapeSubcomponentDelimiterInPrimitive boolean flag to enable or disable this behavior
379 	 */
380 	public void setEscapeSubcomponentDelimiterInPrimitive(boolean escapeSubcomponentDelimiterInPrimitive) {
381 		this.escapeSubcomponentDelimiterInPrimitive = escapeSubcomponentDelimiterInPrimitive;
382 	}
383 
384 	/**
385 	 * @param idGenerator
386 	 *            the {@link IDGenerator} to be used for generating IDs for new
387 	 *            messages, preferable initialized using the methods described
388 	 *            in IDGeneratorFactory.
389 	 * 
390 	 * @see IDGenerator
391 	 */
392 	public void setIdGenerator(IDGenerator idGenerator) {
393 		this.idGenerator = idGenerator;
394 	}
395 
396 	/**
397 	 * <p>
398 	 * If this property is set, the value provides a default datatype ("ST",
399 	 * "NM", etc) for an OBX segment with an invalid OBX-2 value. This is useful
400 	 * when parsing messages from systems which do not correctly populate OBX-2.
401 	 * </p>
402 	 * <p>
403 	 * For example, if this property is set to "ST", and the following OBX
404 	 * segment is encountered:
405 	 * 
406 	 * <pre>
407 	 * OBX||INVALID|||This is a value
408 	 * </pre>
409 	 * 
410 	 * It will be parsed as though it had read:
411 	 * 
412 	 * <pre>
413 	 * OBX||ST|||This is a value
414 	 * </pre>
415 	 * 
416 	 * </p>
417 	 * <p>
418 	 * Note that this configuration can also be set globally using the system
419 	 * property {@link Varies#INVALID_OBX2_TYPE_PROP}, but any value provided to
420 	 * {@link ParserConfiguration} takes priority over the system property.
421 	 * </p>
422 	 * 
423 	 * @param theInvalidObx2Type
424 	 *            If this property is set, the value provides a default datatype
425 	 *            ("ST", "NM", etc) for an OBX segment with an invalid OBX-2
426 	 *            value. This is useful when parsing messages from systems which
427 	 *            do not correctly populate OBX-2.
428 	 * @see ParserConfiguration#setDefaultObx2Type(String)
429 	 * @see Varies#DEFAULT_OBX2_TYPE_PROP
430 	 */
431 	public void setInvalidObx2Type(String theInvalidObx2Type) {
432 		myInvalidObx2Type = theInvalidObx2Type;
433 	}
434 
435 	/**
436 	 * If set to <code>true</code> (default is <code>false</code>), pipe parser will be
437 	 * put in non-greedy mode. This setting applies only to {@link PipeParser Pipe Parsers} and
438 	 * will have no effect on {@link XMLParser XML Parsers}.
439 	 * 
440 	 * <p>
441 	 * In non-greedy mode, if the message structure being parsed has an ambiguous
442 	 * choice of where to put a segment because there is a segment matching the
443 	 * current segment name in both a later position in the message, and
444 	 * in an earlier position as a part of a repeating group, the earlier
445 	 * position will be chosen.
446 	 * </p>
447 	 * <p>
448 	 * This is perhaps best explained with an example. Consider the following structure:
449 	 * </p>
450 	 * <pre>
451 	 * MSH
452 	 * GROUP_1 (start)
453 	 * {
454 	 *    AAA
455 	 *    BBB
456 	 *    GROUP_2 (start)
457 	 *    {
458 	 *       AAA
459 	 *    }
460 	 *    GROUP_2 (end)
461 	 * }
462 	 * GROUP_1 (end)
463 	 * </pre>
464 	 * <p>
465 	 * </p>
466 	 * For the above example, consider a message containing the following segments:<br/>
467 	 * <code>MSH<br/>
468 	 * AAA<br/>
469 	 * BBB<br/>
470 	 * AAA</code>
471 	 * </p>
472 	 * <p>
473 	 * In this example, when the second AAA segment is encountered, there are two
474 	 * possible choices. It would be placed in GROUP_2, or it could be placed in 
475 	 * a second repetition of GROUP_1. By default it will be placed in GROUP_2, but
476 	 * in non-greedy mode it will be put in a new repetition of GROUP_1.
477 	 * </p>
478 	 * <p>
479 	 * This mode is useful for example when parsing OML^O21 messages containing
480 	 * multiple orders.
481 	 * </p>
482 	 */
483 	public void setNonGreedyMode(boolean theNonGreedyMode) {
484 		nonGreedyMode = theNonGreedyMode;
485 	}
486 
487 	/**
488 	 * If set to <code>true</code> (which is the default), {@link XMLParser XML Parsers}
489 	 * will attempt to pretty-print the XML they generate. This means the messages will look
490 	 * nicer to humans, but may take up slightly more space/bandwidth.
491 	 */
492 	public void setPrettyPrintWhenEncodingXml(boolean thePrettyPrintWhenEncodingXml) {
493 		prettyPrintWhenEncodingXml = thePrettyPrintWhenEncodingXml;
494 	}
495 
496 	/**
497 	 * Sets the behaviour to use when parsing a message and a nonstandard
498 	 * segment is found
499      *
500      * @param theUnexpectedSegmentBehaviour behaviour to use when a nonstandard segment is found
501      */
502 	public void setUnexpectedSegmentBehaviour(UnexpectedSegmentBehaviourEnum theUnexpectedSegmentBehaviour) {
503 		if (theUnexpectedSegmentBehaviour == null) {
504 			throw new NullPointerException("UnexpectedSegmentBehaviour can not be null");
505 		}
506 		myUnexpectedSegmentBehaviour = theUnexpectedSegmentBehaviour;
507 	}
508 
509 	/**
510 	 * Determines whether the parser validates using a configured
511 	 * {@link ValidationContext} or not. This allows to disable message
512 	 * validation although a validation context is defined.
513 	 * 
514 	 * @param validating
515 	 *            <code>true</code> if parser shall validate, <code>false</code>
516 	 *            if not
517 	 */
518 	public void setValidating(boolean validating) {
519 		this.validating = validating;
520 	}
521 
522 }