Coverage Report - ca.uhn.hl7v2.parser.Escape
 
Classes in this File Line Coverage Branch Coverage Complexity
Escape
92%
85/92
70%
44/62
7.5
Escape$1
100%
3/3
50%
1/2
7.5
Escape$EncLookup
100%
18/18
100%
2/2
7.5
 
 1  
 /**
 2  
 The contents of this file are subject to the Mozilla Public License Version 1.1 
 3  
 (the "License"); you may not use this file except in compliance with the License. 
 4  
 You may obtain a copy of the License at http://www.mozilla.org/MPL/ 
 5  
 Software distributed under the License is distributed on an "AS IS" basis, 
 6  
 WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the 
 7  
 specific language governing rights and limitations under the License. 
 8  
 
 9  
 The Original Code is "Escape.java".  Description: 
 10  
 "Handles "escaping" and "unescaping" of text according to the HL7 escape sequence rules
 11  
 defined in section 2.10 of the standard (version 2.4)" 
 12  
 
 13  
 The Initial Developer of the Original Code is University Health Network. Copyright (C) 
 14  
 2001.  All Rights Reserved. 
 15  
 
 16  
 Contributor(s): Mark Lee (Skeva Technologies); Elmar Hinz 
 17  
 
 18  
 Alternatively, the contents of this file may be used under the terms of the 
 19  
 GNU General Public License (the  �GPL�), in which case the provisions of the GPL are 
 20  
 applicable instead of those above.  If you wish to allow use of your version of this 
 21  
 file only under the terms of the GPL and not to allow others to use your version 
 22  
 of this file under the MPL, indicate your decision by deleting  the provisions above 
 23  
 and replace  them with the notice and other provisions required by the GPL License.  
 24  
 If you do not delete the provisions above, a recipient may use your version of 
 25  
 this file under either the MPL or the GPL. 
 26  
  */
 27  
 package ca.uhn.hl7v2.parser;
 28  
 
 29  
 import java.util.Collections;
 30  
 import java.util.LinkedHashMap;
 31  
 import java.util.Map;
 32  
 
 33  
 /**
 34  
  * Handles "escaping" and "unescaping" of text according to the HL7 escape
 35  
  * sequence rules defined in section 2.10 of the standard (version 2.4).
 36  
  * Currently, escape sequences for multiple character sets are unsupported. The
 37  
  * highlighting, hexademical, and locally defined escape sequences are also
 38  
  * unsupported.
 39  
  * 
 40  
  * @author Bryan Tripp
 41  
  * @author Mark Lee (Skeva Technologies)
 42  
  * @author Elmar Hinz
 43  
  * @author Christian Ohr
 44  
  */
 45  
 public class Escape {
 46  
 
 47  
     /**
 48  
      * limits the size of variousEncChars to 1000, can be overridden by system property.
 49  
      */
 50  1
     private static Map<EncodingCharacters, EncLookup> variousEncChars = Collections.synchronizedMap(new LinkedHashMap<EncodingCharacters, EncLookup>(5, 0.75f, true) {
 51  
 
 52  
         private static final long serialVersionUID = 1L;
 53  1
         final int maxSize = new Integer(System.getProperty(Escape.class.getName() + ".maxSize", "1000"));
 54  
 
 55  
         @Override
 56  
         protected boolean removeEldestEntry(Map.Entry<EncodingCharacters, EncLookup> eldest) {
 57  2
             return this.size() > maxSize;
 58  
         }
 59  
     });
 60  
 
 61  
     /** Creates a new instance of Escape */
 62  0
     public Escape() {
 63  0
     }
 64  
 
 65  
     /**
 66  
      * @param text string to be escaped
 67  
      * @param encChars encoding characters to be used
 68  
      * @return the escaped string
 69  
      */
 70  
     public static String escape(String text, EncodingCharacters encChars) {
 71  9090
         EncLookup esc = getEscapeSequences(encChars);
 72  9090
         int textLength = text.length();
 73  
 
 74  9090
         StringBuilder result = new StringBuilder(textLength);
 75  70021
         for (int i = 0; i < textLength; i++) {
 76  60931
             boolean charReplaced = false;
 77  60931
             char c = text.charAt(i);
 78  
 
 79  
             FORENCCHARS:
 80  420185
                         for (int j = 0; j < 6; j++) {
 81  361062
                 if (text.charAt(i) == esc.characters[j]) {
 82  
 
 83  
                                         // Formatting escape sequences such as \.br\ should be left alone
 84  1808
                                         if (j == 4) {
 85  
                                                 
 86  509
                                                 if (i+1 < textLength) {
 87  
                                                         
 88  
                                                         // Check for \.br\
 89  509
                                                         char nextChar = text.charAt(i + 1);
 90  509
                                                         switch (nextChar) {
 91  
                                                         case '.':
 92  
                                                         case 'C':
 93  
                                                         case 'M':
 94  
                                                         case 'X':
 95  
                                                         case 'Z':
 96  
                                                         {
 97  63
                                                                 int nextEscapeIndex = text.indexOf(esc.characters[j], i + 1);
 98  63
                                                                 if (nextEscapeIndex > 0) {
 99  63
                                                                         result.append(text.substring(i, nextEscapeIndex + 1));
 100  63
                                                                         charReplaced = true;
 101  63
                                                                         i = nextEscapeIndex;
 102  63
                                                                         break FORENCCHARS;
 103  
                                                                 }
 104  
                                                                 break;
 105  
                                                         }
 106  
                                                         case 'H':
 107  
                                                         case 'N':
 108  
                                                         {
 109  30
                                                                 if (i+2 < textLength && text.charAt(i+2) == '\\') {
 110  30
                                                                         int nextEscapeIndex = i + 2;
 111  30
                                                                         if (nextEscapeIndex > 0) {
 112  30
                                                                                 result.append(text.substring(i, nextEscapeIndex + 1));
 113  30
                                                                                 charReplaced = true;
 114  30
                                                                                 i = nextEscapeIndex;
 115  30
                                                                                 break FORENCCHARS;
 116  
                                                                         }
 117  
                                                                 }
 118  
                                                                 break;
 119  
                                                         }
 120  
                                                         }
 121  
                                                         
 122  
                                                 }
 123  
                                                 
 124  
                                         }
 125  
 
 126  1715
                     result.append(esc.encodings[j]);
 127  1715
                     charReplaced = true;
 128  1715
                     break;
 129  
                 }
 130  
             }
 131  60931
             if (!charReplaced) {
 132  59123
                 result.append(c);
 133  
             }
 134  
         }
 135  9090
         return result.toString();
 136  
     }
 137  
 
 138  
     /**
 139  
      * @param text string to be unescaped
 140  
      * @param encChars encoding characters to be used
 141  
      * @return the unescaped string
 142  
      */
 143  
     public static String unescape(String text, EncodingCharacters encChars) {
 144  
 
 145  
         // If the escape char isn't found, we don't need to look for escape sequences
 146  17252
         char escapeChar = encChars.getEscapeCharacter();
 147  17252
         boolean foundEscapeChar = false;
 148  123777
         for (int i = 0; i < text.length(); i++) {
 149  106954
             if (text.charAt(i) == escapeChar) {
 150  429
                 foundEscapeChar = true;
 151  429
                 break;
 152  
             }
 153  
         }
 154  17252
         if (!foundEscapeChar) {
 155  16823
             return text;
 156  
         }
 157  
 
 158  429
         int textLength = text.length();
 159  429
         StringBuilder result = new StringBuilder(textLength + 20);
 160  429
         EncLookup esc = getEscapeSequences(encChars);
 161  429
         char escape = esc.characters[4];
 162  429
         int encodingsCount = esc.characters.length;
 163  429
         int i = 0;
 164  74850
         while (i < textLength) {
 165  74421
             char c = text.charAt(i);
 166  74421
             if (c != escape) {
 167  67274
                 result.append(c);
 168  67274
                 i++;
 169  
             } else {
 170  7147
                 boolean foundEncoding = false;
 171  
 
 172  
                                 // Test against the standard encodings
 173  32412
                                 for (int j = 0; j < encodingsCount; j++) {
 174  30083
                     String encoding = esc.encodings[j];
 175  30083
                                         int encodingLength = encoding.length();
 176  30083
                                         if ((i + encodingLength <= textLength) && text.substring(i, i + encodingLength)
 177  
                             .equals(encoding)) {
 178  4818
                         result.append(esc.characters[j]);
 179  4818
                         i += encodingLength;
 180  4818
                         foundEncoding = true;
 181  4818
                         break;
 182  
                     }
 183  
                 }
 184  
 
 185  7147
                 if (!foundEncoding) {
 186  
                                         
 187  
                                         // If we haven't found this, there is one more option. Escape sequences of /.XXXXX/ are
 188  
                                         // formatting codes. They should be left intact
 189  2329
                                         if (i + 1 < textLength) {
 190  2329
                                                 char nextChar = text.charAt(i + 1);
 191  2329
                                                 switch (nextChar) {
 192  
                                                         case '.':
 193  
                                                         case 'C':
 194  
                                                         case 'M':
 195  
                                                         case 'X':
 196  
                                                         case 'Z':
 197  
                                                         {
 198  2285
                                                                 int closingEscape = text.indexOf(escape, i + 1);
 199  2285
                                                                 if (closingEscape > 0) {
 200  2285
                                                                         String substring = text.substring(i, closingEscape + 1);
 201  2285
                                                                         result.append(substring);
 202  2285
                                                                         i += substring.length();
 203  2285
                                                                 } else {
 204  0
                                                                         i++;
 205  
                                                                 }
 206  0
                                                                 break;
 207  
                                                         }
 208  
                                                         case 'H':
 209  
                                                         case 'N':
 210  
                                                         {
 211  36
                                                                 int closingEscape = text.indexOf(escape, i + 1);
 212  36
                                                                 if (closingEscape == i + 2) {
 213  36
                                                                         String substring = text.substring(i, closingEscape + 1);
 214  36
                                                                         result.append(substring);
 215  36
                                                                         i += substring.length();
 216  36
                                                                 } else {
 217  0
                                                                         i++;
 218  
                                                                 }
 219  0
                                                                 break;
 220  
                                                         }
 221  
                                                         default:
 222  
                                                         {
 223  8
                                                                 i++;
 224  
                                                         }
 225  
                                                 }
 226  
                                                 
 227  2329
                                         } else {
 228  0
                                                 i++;
 229  
                                         }
 230  
                 }
 231  
 
 232  
 
 233  
             }
 234  74421
         }
 235  429
         return result.toString();
 236  
     }
 237  
 
 238  
     /**
 239  
      * Returns a HashTable with escape sequences as keys, and corresponding
 240  
      * Strings as values.
 241  
      */
 242  
     private static EncLookup getEscapeSequences(EncodingCharacters encChars) {
 243  9519
         EncLookup escapeSequences = variousEncChars.get(encChars);
 244  9519
         if (escapeSequences == null) {
 245  
             // this means we haven't got the sequences for these encoding
 246  
             // characters yet - let's make them
 247  2
             escapeSequences = new EncLookup(encChars);
 248  2
             variousEncChars.put(encChars, escapeSequences);
 249  
         }
 250  9519
         return escapeSequences;
 251  
     }
 252  
 
 253  
 
 254  
 
 255  
 
 256  
     /**
 257  
      * A performance-optimized replacement for using when
 258  
      * mapping from HL7 special characters to their respective
 259  
      * encodings
 260  
      *
 261  
      * @author Christian Ohr
 262  
      */
 263  
     private static class EncLookup {
 264  
 
 265  2
         char[] characters = new char[6];
 266  2
         String[] encodings = new String[6];
 267  
 
 268  2
         EncLookup(EncodingCharacters ec) {
 269  2
             characters[0] = ec.getFieldSeparator();
 270  2
             characters[1] = ec.getComponentSeparator();
 271  2
             characters[2] = ec.getSubcomponentSeparator();
 272  2
             characters[3] = ec.getRepetitionSeparator();
 273  2
             characters[4] = ec.getEscapeCharacter();
 274  2
             characters[5] = '\r';
 275  2
             char[] codes = {'F', 'S', 'T', 'R', 'E'};
 276  12
             for (int i = 0; i < codes.length; i++) {
 277  10
                 StringBuilder seq = new StringBuilder();
 278  10
                 seq.append(ec.getEscapeCharacter());
 279  10
                 seq.append(codes[i]);
 280  10
                 seq.append(ec.getEscapeCharacter());
 281  10
                 encodings[i] = seq.toString();
 282  
             }
 283  2
             encodings[5] = "\\X000d\\";
 284  2
         }
 285  
     }
 286  
 }