Coverage Report - ca.uhn.hl7v2.parser.ParserConfiguration
 
Classes in this File Line Coverage Branch Coverage Complexity
ParserConfiguration
74%
55/74
82%
23/28
1.8
 
 1  
 package ca.uhn.hl7v2.parser;
 2  
 
 3  
 import java.util.Collections;
 4  
 import java.util.HashSet;
 5  
 import java.util.Set;
 6  
 
 7  
 import ca.uhn.hl7v2.HapiContext;
 8  
 import ca.uhn.hl7v2.model.GenericMessage;
 9  
 import ca.uhn.hl7v2.model.Varies;
 10  
 import ca.uhn.hl7v2.util.Terser;
 11  
 import ca.uhn.hl7v2.util.idgenerator.FileBasedHiLoGenerator;
 12  
 import ca.uhn.hl7v2.util.idgenerator.IDGenerator;
 13  
 import ca.uhn.hl7v2.validation.ValidationContext;
 14  
 
 15  
 /**
 16  
  * Contains
 17  
  * 
 18  
  * @see HapiContext#getParserConfiguration()
 19  
  * 
 20  
  */
 21  458
 public class ParserConfiguration {
 22  
 
 23  
         /**
 24  
          * @link {@link UnexpectedSegmentBehaviourEnum#ADD_INLINE}
 25  
          */
 26  
         // NB if you change the default, edit the javadoc for the enum itself
 27  1
         public static final UnexpectedSegmentBehaviourEnum DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR = UnexpectedSegmentBehaviourEnum.ADD_INLINE;
 28  
 
 29  
         private boolean allowUnknownVersions;
 30  458
         private boolean escapeSubcomponentDelimiterInPrimitive = false;
 31  458
         private IDGenerator idGenerator = new FileBasedHiLoGenerator();
 32  
         private String myDefaultObx2Type;
 33  458
         private boolean myEncodeEmptyMandatorySegments = true;
 34  458
         private Set<String> myForcedEncode = new HashSet<String>();
 35  
         private String myInvalidObx2Type;
 36  
         private UnexpectedSegmentBehaviourEnum myUnexpectedSegmentBehaviour;
 37  458
         private boolean nonGreedyMode = false;
 38  458
         private boolean prettyPrintWhenEncodingXml = true;
 39  458
         private boolean validating = true;
 40  
 
 41  
         /**
 42  
          * <p>
 43  
          * Forces the parser to encode certain segments/fields, even if they contain
 44  
          * no content. This method may be called multiple times with multiple path
 45  
          * definitions, and each path definition contains the path to the segment or
 46  
          * field which needs to be forced.
 47  
          * </p>
 48  
          * <p>
 49  
          * Path definitions are similar in format to {@link Terser Terser} paths.
 50  
          * They contain a slash-separated lookup path to reach a given segment, and
 51  
          * optionally a field number. The following are examples of paths which
 52  
          * could be added here, as well as the sample output for an otherwise empty
 53  
          * ORU^R01 message:
 54  
          * </p>
 55  
          * <table cellpadding="2" cellspacing="2" border="0">
 56  
          * <thead>
 57  
          * <tr>
 58  
          * <th style="background: #FFA0FF;">Forced Encode Path</th>
 59  
          * <th style="background: #FFA0FF;">Encode Output</th>
 60  
          * </tr>
 61  
          * </thead>
 62  
          * <tr>
 63  
          * <td>None (for illustration purposes)</td>
 64  
          * <td style=" font-family: monospace;">
 65  
          * MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4</td>
 66  
          * </tr>
 67  
          * <tr>
 68  
          * <td style="background: #E0E0E0;">PATIENT_RESULT/ORDER_OBSERVATION/ORC</td>
 69  
          * <td style="background: #E0E0E0; font-family: monospace;">
 70  
          * MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
 71  
          * ORC|</td>
 72  
          * </tr>
 73  
          * <tr>
 74  
          * <td>PATIENT_RESULT/ORDER_OBSERVATION/ORC-4</td>
 75  
          * <td style=" font-family: monospace;">
 76  
          * MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
 77  
          * ORC||||</td>
 78  
          * </tr>
 79  
          * <tr>
 80  
          * <td style="background: #E0E0E0;">PATIENT_RESULT/ORDER_OBSERVATION/ORC-4-2
 81  
          * </td>
 82  
          * <td style="background: #E0E0E0; font-family: monospace;">
 83  
          * MSH|^~\&amp;|||||||ORU^R01^ORU_R01||T|2.4<br>
 84  
          * ORC||||^</td>
 85  
          * </tr>
 86  
          * </table>
 87  
          * <p>
 88  
          * While empty segments do not generally have any meaning according to HL7,
 89  
          * this may be useful when transmitting to systems which rely on segments
 90  
          * being received even if they have no content.
 91  
          * </p>
 92  
          * <p>
 93  
          * Note that this configuration item currently only applies to
 94  
          * {@link PipeParser}
 95  
          * </p>
 96  
          *
 97  
      * @param theForcedEncode path definition
 98  
          * @since 2.0
 99  
          */
 100  
         public void addForcedEncode(String theForcedEncode) {
 101  20
                 if (theForcedEncode == null) {
 102  0
                         throw new NullPointerException("forced encode may not be null");
 103  
                 }
 104  
 
 105  20
                 int lastSlashIndex = theForcedEncode.lastIndexOf('/');
 106  20
                 lastSlashIndex = Math.max(lastSlashIndex, 0);
 107  
 
 108  20
                 if (lastSlashIndex == 0) {
 109  12
                         if (!theForcedEncode.matches("[A-Z0-9]{3}(-[0-9]+){0,2}$")) {
 110  4
                                 throw new IllegalArgumentException("Definition must end with a segment name or field lookup, e.g. MSH or MSH-2");
 111  
                         }
 112  
                 } else {
 113  8
                         if (lastSlashIndex == theForcedEncode.length() || !theForcedEncode.substring(lastSlashIndex + 1).matches("[A-Z0-9]{3}(-[0-9]+){0,2}$")) {
 114  1
                                 throw new IllegalArgumentException("Definition must end with a segment name or field lookup, e.g. MSH or MSH-2");
 115  
                         }
 116  
                 }
 117  15
                 myForcedEncode.add(theForcedEncode);
 118  15
         }
 119  
 
 120  
         boolean determineForcedEncodeIncludesTerserPath(String theTerserPath) {
 121  5467
                 for (String next : getForcedEncode()) {
 122  229
                         if (next.startsWith(theTerserPath)) {
 123  14
                                 return true;
 124  
                         }
 125  215
                 }
 126  5453
                 return false;
 127  
         }
 128  
 
 129  
         int determineForcedFieldNumForTerserPath(String theCurrentTerserPath) {
 130  5467
                 int forceUpToFieldNum = 0;
 131  5467
                 for (String nextPath : getForcedEncode()) {
 132  229
                         if (nextPath.startsWith(theCurrentTerserPath) && nextPath.length() > theCurrentTerserPath.length()) {
 133  12
                                 int endOfFieldDef = nextPath.indexOf('-', theCurrentTerserPath.length() + 1);
 134  12
                                 if (endOfFieldDef == -1) {
 135  5
                                         endOfFieldDef = nextPath.length();
 136  
                                 }
 137  12
                                 String fieldNumString = nextPath.substring(theCurrentTerserPath.length() + 1, endOfFieldDef);
 138  12
                                 forceUpToFieldNum = Math.max(forceUpToFieldNum, Integer.parseInt(fieldNumString));
 139  
                         }
 140  229
                 }
 141  5467
                 return forceUpToFieldNum;
 142  
         }
 143  
 
 144  
         /**
 145  
          * Returns the default datatype ("ST", "NM", etc) for an OBX segment with a
 146  
          * missing OBX-2 value
 147  
          * 
 148  
          * @return Returns the default datatype ("ST", "NM", etc) for an OBX segment
 149  
          *         with a missing OBX-2 value
 150  
          * @see #setDefaultObx2Type(String)
 151  
          */
 152  
         public String getDefaultObx2Type() {
 153  0
                 return myDefaultObx2Type;
 154  
         }
 155  
 
 156  
         /**
 157  
          * @return Returns the forced encode strings added by
 158  
          *         {@link #addForcedEncode(String)}
 159  
          * 
 160  
          * @see #addForcedEncode(String)
 161  
          * @since 1.3
 162  
          */
 163  
         public Set<String> getForcedEncode() {
 164  139776
                 return Collections.unmodifiableSet(myForcedEncode);
 165  
         }
 166  
 
 167  
         /**
 168  
          * @return the ID Generator to be used for generating IDs for new messages
 169  
          */
 170  
         public IDGenerator getIdGenerator() {
 171  207
                 return idGenerator;
 172  
         }
 173  
 
 174  
         /**
 175  
          * Returns the value provides a default datatype ("ST", "NM", etc) for an
 176  
          * OBX segment with an invalid OBX-2 value.
 177  
          * 
 178  
          * @return Returns the value provides a default datatype ("ST", "NM", etc)
 179  
          *         for an OBX segment with an invalid OBX-2 value.
 180  
          * @see #setInvalidObx2Type(String)
 181  
          */
 182  
         public String getInvalidObx2Type() {
 183  2
                 return myInvalidObx2Type;
 184  
         }
 185  
 
 186  
         /**
 187  
          * Returns the behaviour to use when parsing a message and a nonstandard
 188  
          * segment is found. Default is
 189  
          * {@link #DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR}
 190  
      *
 191  
      * @return the behaviour to use when a nonstandard egment is found
 192  
          */
 193  
         public UnexpectedSegmentBehaviourEnum getUnexpectedSegmentBehaviour() {
 194  185
                 if (myUnexpectedSegmentBehaviour == null) {
 195  42
                         myUnexpectedSegmentBehaviour = DEFAULT_UNEXPECTED_SEGMENT_BEHAVIOUR;
 196  
                 }
 197  185
                 return myUnexpectedSegmentBehaviour;
 198  
         }
 199  
 
 200  
         /**
 201  
          * If set to <code>true</code> (default is <code>false</code>) the parser
 202  
          * will allow messages to parse, even if they contain a version which is not
 203  
          * known to the parser. When operating in this mode, if a message arrives
 204  
          * with an unknown version string, the parser will attempt to parse it using
 205  
          * a {@link GenericMessage Generic Message} class instead of a specific HAPI
 206  
          * structure class. Default is <code>false</code>.
 207  
      *
 208  
      * @return true if parsing messages with unknown versions is allowed
 209  
          */
 210  
         public boolean isAllowUnknownVersions() {
 211  532
                 return this.allowUnknownVersions;
 212  
         }
 213  
 
 214  
         /**
 215  
      * Returns <code>true</code> if empty segments should still be encoded
 216  
      * if they are mandatory within their message structure.  Default is <code>false</code>.
 217  
          * @return <code>true</code> if empty segments should still be encoded
 218  
      *
 219  
          * @see #setEncodeEmptyMandatoryFirstSegments(boolean)
 220  
          */
 221  
         public boolean isEncodeEmptyMandatorySegments() {
 222  17
                 return myEncodeEmptyMandatorySegments;
 223  
         }
 224  
 
 225  
         /**
 226  
      * Returns code>true</code> if subcomponent delimiters in OBX-5 shall be
 227  
      *         ignored. Default is <code>false</code>.
 228  
          * @return <code>true</code> if subcomponent delimiters in OBX-5 shall be
 229  
          *         ignored
 230  
          */
 231  
         public boolean isEscapeSubcomponentDelimiterInPrimitive() {
 232  248
                 return escapeSubcomponentDelimiterInPrimitive;
 233  
         }
 234  
 
 235  
         /**
 236  
          * Returns <code>true</code> if the parser should parse in non-greedy mode. Default
 237  
          * is <code>false</code>
 238  
          * 
 239  
          * @see #setNonGreedyMode(boolean) for an explanation of non-greedy mode
 240  
          */
 241  
         public boolean isNonGreedyMode() {
 242  6753
                 return nonGreedyMode;
 243  
         }
 244  
 
 245  
         /**
 246  
          * If set to <code>true</code> (which is the default), {@link XMLParser XML Parsers}
 247  
          * will attempt to pretty-print the XML they generate. This means the messages will look
 248  
          * nicer to humans, but may take up slightly more space/bandwidth.
 249  
          */
 250  
         public boolean isPrettyPrintWhenEncodingXml() {
 251  18
                 return prettyPrintWhenEncodingXml;
 252  
         }
 253  
 
 254  
         /**
 255  
      * Returns <code>true</code> if the parser validates using a configured
 256  
      *         {@link ValidationContext}. Default is <code>true</code>.
 257  
          * @return <code>true</code> if the parser validates using a configured
 258  
          *         {@link ValidationContext}
 259  
          */
 260  
         public boolean isValidating() {
 261  2532
                 return validating;
 262  
         }
 263  
 
 264  
         /**
 265  
          * Removes a forced encode entry
 266  
          *
 267  
      * @param theForcedEncode path definition to be removed
 268  
          * @see #addForcedEncode(String)
 269  
          * @since 1.3
 270  
          */
 271  
         public void removeForcedEncode(String theForcedEncode) {
 272  0
                 if (theForcedEncode == null) {
 273  0
                         throw new NullPointerException("forced encode may not be null");
 274  
                 }
 275  
 
 276  0
                 myForcedEncode.remove(theForcedEncode);
 277  0
         }
 278  
 
 279  
         /**
 280  
          * If set to <code>true</code> (default is <code>false</code>) the parser
 281  
          * will allow messages to parse, even if they contain a version which is not
 282  
          * known to the parser. When operating in this mode, if a message arrives
 283  
          * with an unknown version string, the parser will attempt to parse it using
 284  
          * a {@link GenericMessage Generic Message} class instead of a specific HAPI
 285  
          * structure class.
 286  
      *
 287  
      * @param theAllowUnknownVersions true if parsing unknown versions shall be allowed
 288  
          */
 289  
         public void setAllowUnknownVersions(boolean theAllowUnknownVersions) {
 290  0
                 allowUnknownVersions = theAllowUnknownVersions;
 291  0
         }
 292  
 
 293  
         /**
 294  
          * <p>
 295  
          * If this property is set, the value provides a default datatype ("ST",
 296  
          * "NM", etc) for an OBX segment with a missing OBX-2 value. This is useful
 297  
          * when parsing messages from systems which do not correctly populate OBX-2.
 298  
          * </p>
 299  
          * <p>
 300  
          * For example, if this property is set to "ST", and the following OBX
 301  
          * segment is encountered:
 302  
          * 
 303  
          * <pre>
 304  
          * OBX|||||This is a value
 305  
          * </pre>
 306  
          * 
 307  
          * It will be parsed as though it had read:
 308  
          * 
 309  
          * <pre>
 310  
          * OBX||ST|||This is a value
 311  
          * </pre>
 312  
          * 
 313  
          * </p>
 314  
          * <p>
 315  
          * Note that this configuration can also be set globally using the system
 316  
          * property {@link Varies#DEFAULT_OBX2_TYPE_PROP}, but any value provided to
 317  
          * {@link ParserConfiguration} takes priority over the system property.
 318  
          * </p>
 319  
          * 
 320  
          * @param theDefaultObx2Type
 321  
          *            If this property is set, the value provides a default datatype
 322  
          *            ("ST", "NM", etc) for an OBX segment with a missing OBX-2
 323  
          *            value
 324  
          * @see #setInvalidObx2Type(String)
 325  
          * @see Varies#INVALID_OBX2_TYPE_PROP
 326  
          */
 327  
         public void setDefaultObx2Type(String theDefaultObx2Type) {
 328  0
                 myDefaultObx2Type = theDefaultObx2Type;
 329  0
         }
 330  
 
 331  
         /**
 332  
          * <p>
 333  
          * If set to <code>true</code> (default is <code>true</code>), when encoding
 334  
          * a group using the PipeParser where the first segment is required, but no
 335  
          * data has been populated in that segment, the empty segment is now still
 336  
          * encoded if needed as a blank segment in order to give parsers a hint
 337  
          * about which group subsequent segments are in. This helps to ensure that
 338  
          * messages can be "round tripped", meaning that a message which is parsed,
 339  
          * encoded, and then re-parsed should contain exactly the same structure
 340  
          * from beginning to end.
 341  
          * </p>
 342  
          * <p>
 343  
          * </p>
 344  
          * For example, in an ORU^R01 message with a populated OBX segment, but no
 345  
          * data in the mandatory OBR segment which begins the ORDER_OBSERVATION
 346  
          * group the message would still contain an empty OBR segment when encoded:
 347  
          * 
 348  
          * <pre>
 349  
          *         MSH|^~\&|REG|W|||201103230042||ORU^R01|32153168|P|2.5
 350  
          *         OBR|
 351  
          *         OBX||ST|||Value Data
 352  
          * </pre>
 353  
          * 
 354  
          * Previously, the following encoding would have occurred, which would have
 355  
          * incorrectly been parsed as having a custom OBX segment instead of having
 356  
          * a normal ORDER_OBSERVATION group:
 357  
          * 
 358  
          * <pre>
 359  
          *         MSH|^~\&|REG|W|||201103230042||ORU^R01|32153168|P|2.5
 360  
          *         OBX||ST|||Value Data
 361  
          * </pre>
 362  
          * 
 363  
          * @param theEncodeEmptyMandatorySegments
 364  
          *            If set to <code>true</code> (default is <code>true</code>),
 365  
          *            when encoding a group using the PipeParser where the first
 366  
          *            segment is required, but no data has been populated in that
 367  
          *            segment, the empty segment is now still encoded if needed as a
 368  
          *            blank segment in order to give parsers a hint about which
 369  
          *            group subsequent segments are in
 370  
          */
 371  
         public void setEncodeEmptyMandatoryFirstSegments(boolean theEncodeEmptyMandatorySegments) {
 372  1
                 myEncodeEmptyMandatorySegments = theEncodeEmptyMandatorySegments;
 373  1
         }
 374  
 
 375  
         /**
 376  
          * Set to <code>true</code> if subcomponent delimiters in OBX-5 shall be
 377  
          * ignored
 378  
      * @param escapeSubcomponentDelimiterInPrimitive boolean flag to enable or disable this behavior
 379  
          */
 380  
         public void setEscapeSubcomponentDelimiterInPrimitive(boolean escapeSubcomponentDelimiterInPrimitive) {
 381  1
                 this.escapeSubcomponentDelimiterInPrimitive = escapeSubcomponentDelimiterInPrimitive;
 382  1
         }
 383  
 
 384  
         /**
 385  
          * @param idGenerator
 386  
          *            the {@link IDGenerator} to be used for generating IDs for new
 387  
          *            messages, preferable initialized using the methods described
 388  
          *            in IDGeneratorFactory.
 389  
          * 
 390  
          * @see IDGenerator
 391  
          */
 392  
         public void setIdGenerator(IDGenerator idGenerator) {
 393  0
                 this.idGenerator = idGenerator;
 394  0
         }
 395  
 
 396  
         /**
 397  
          * <p>
 398  
          * If this property is set, the value provides a default datatype ("ST",
 399  
          * "NM", etc) for an OBX segment with an invalid OBX-2 value. This is useful
 400  
          * when parsing messages from systems which do not correctly populate OBX-2.
 401  
          * </p>
 402  
          * <p>
 403  
          * For example, if this property is set to "ST", and the following OBX
 404  
          * segment is encountered:
 405  
          * 
 406  
          * <pre>
 407  
          * OBX||INVALID|||This is a value
 408  
          * </pre>
 409  
          * 
 410  
          * It will be parsed as though it had read:
 411  
          * 
 412  
          * <pre>
 413  
          * OBX||ST|||This is a value
 414  
          * </pre>
 415  
          * 
 416  
          * </p>
 417  
          * <p>
 418  
          * Note that this configuration can also be set globally using the system
 419  
          * property {@link Varies#INVALID_OBX2_TYPE_PROP}, but any value provided to
 420  
          * {@link ParserConfiguration} takes priority over the system property.
 421  
          * </p>
 422  
          * 
 423  
          * @param theInvalidObx2Type
 424  
          *            If this property is set, the value provides a default datatype
 425  
          *            ("ST", "NM", etc) for an OBX segment with an invalid OBX-2
 426  
          *            value. This is useful when parsing messages from systems which
 427  
          *            do not correctly populate OBX-2.
 428  
          * @see ParserConfiguration#setDefaultObx2Type(String)
 429  
          * @see Varies#DEFAULT_OBX2_TYPE_PROP
 430  
          */
 431  
         public void setInvalidObx2Type(String theInvalidObx2Type) {
 432  0
                 myInvalidObx2Type = theInvalidObx2Type;
 433  0
         }
 434  
 
 435  
         /**
 436  
          * If set to <code>true</code> (default is <code>false</code>), pipe parser will be
 437  
          * put in non-greedy mode. This setting applies only to {@link PipeParser Pipe Parsers} and
 438  
          * will have no effect on {@link XMLParser XML Parsers}.
 439  
          * 
 440  
          * <p>
 441  
          * In non-greedy mode, if the message structure being parsed has an ambiguous
 442  
          * choice of where to put a segment because there is a segment matching the
 443  
          * current segment name in both a later position in the message, and
 444  
          * in an earlier position as a part of a repeating group, the earlier
 445  
          * position will be chosen.
 446  
          * </p>
 447  
          * <p>
 448  
          * This is perhaps best explained with an example. Consider the following structure:
 449  
          * </p>
 450  
          * <pre>
 451  
          * MSH
 452  
          * GROUP_1 (start)
 453  
          * {
 454  
          *    AAA
 455  
          *    BBB
 456  
          *    GROUP_2 (start)
 457  
          *    {
 458  
          *       AAA
 459  
          *    }
 460  
          *    GROUP_2 (end)
 461  
          * }
 462  
          * GROUP_1 (end)
 463  
          * </pre>
 464  
          * <p>
 465  
          * </p>
 466  
          * For the above example, consider a message containing the following segments:<br/>
 467  
          * <code>MSH<br/>
 468  
          * AAA<br/>
 469  
          * BBB<br/>
 470  
          * AAA</code>
 471  
          * </p>
 472  
          * <p>
 473  
          * In this example, when the second AAA segment is encountered, there are two
 474  
          * possible choices. It would be placed in GROUP_2, or it could be placed in 
 475  
          * a second repetition of GROUP_1. By default it will be placed in GROUP_2, but
 476  
          * in non-greedy mode it will be put in a new repetition of GROUP_1.
 477  
          * </p>
 478  
          * <p>
 479  
          * This mode is useful for example when parsing OML^O21 messages containing
 480  
          * multiple orders.
 481  
          * </p>
 482  
          */
 483  
         public void setNonGreedyMode(boolean theNonGreedyMode) {
 484  3
                 nonGreedyMode = theNonGreedyMode;
 485  3
         }
 486  
 
 487  
         /**
 488  
          * If set to <code>true</code> (which is the default), {@link XMLParser XML Parsers}
 489  
          * will attempt to pretty-print the XML they generate. This means the messages will look
 490  
          * nicer to humans, but may take up slightly more space/bandwidth.
 491  
          */
 492  
         public void setPrettyPrintWhenEncodingXml(boolean thePrettyPrintWhenEncodingXml) {
 493  0
                 prettyPrintWhenEncodingXml = thePrettyPrintWhenEncodingXml;
 494  0
         }
 495  
 
 496  
         /**
 497  
          * Sets the behaviour to use when parsing a message and a nonstandard
 498  
          * segment is found
 499  
      *
 500  
      * @param theUnexpectedSegmentBehaviour behaviour to use when a nonstandard segment is found
 501  
      */
 502  
         public void setUnexpectedSegmentBehaviour(UnexpectedSegmentBehaviourEnum theUnexpectedSegmentBehaviour) {
 503  5
                 if (theUnexpectedSegmentBehaviour == null) {
 504  0
                         throw new NullPointerException("UnexpectedSegmentBehaviour can not be null");
 505  
                 }
 506  5
                 myUnexpectedSegmentBehaviour = theUnexpectedSegmentBehaviour;
 507  5
         }
 508  
 
 509  
         /**
 510  
          * Determines whether the parser validates using a configured
 511  
          * {@link ValidationContext} or not. This allows to disable message
 512  
          * validation although a validation context is defined.
 513  
          * 
 514  
          * @param validating
 515  
          *            <code>true</code> if parser shall validate, <code>false</code>
 516  
          *            if not
 517  
          */
 518  
         public void setValidating(boolean validating) {
 519  0
                 this.validating = validating;
 520  0
         }
 521  
 
 522  
 }