View Javadoc

1   /**
2   The contents of this file are subject to the Mozilla Public License Version 1.1 
3   (the "License"); you may not use this file except in compliance with the License. 
4   You may obtain a copy of the License at http://www.mozilla.org/MPL/ 
5   Software distributed under the License is distributed on an "AS IS" basis, 
6   WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the 
7   specific language governing rights and limitations under the License. 
8   
9   The Original Code is "Escape.java".  Description: 
10  "Handles "escaping" and "unescaping" of text according to the HL7 escape sequence rules
11  defined in section 2.10 of the standard (version 2.4)" 
12  
13  The Initial Developer of the Original Code is University Health Network. Copyright (C) 
14  2001.  All Rights Reserved. 
15  
16  Contributor(s): Mark Lee (Skeva Technologies); Elmar Hinz 
17  
18  Alternatively, the contents of this file may be used under the terms of the 
19  GNU General Public License (the  �GPL�), in which case the provisions of the GPL are 
20  applicable instead of those above.  If you wish to allow use of your version of this 
21  file only under the terms of the GPL and not to allow others to use your version 
22  of this file under the MPL, indicate your decision by deleting  the provisions above 
23  and replace  them with the notice and other provisions required by the GPL License.  
24  If you do not delete the provisions above, a recipient may use your version of 
25  this file under either the MPL or the GPL. 
26   */
27  package ca.uhn.hl7v2.parser;
28  
29  import java.util.Collections;
30  import java.util.LinkedHashMap;
31  import java.util.Map;
32  
33  /**
34   * Handles "escaping" and "unescaping" of text according to the HL7 escape
35   * sequence rules defined in section 2.10 of the standard (version 2.4).
36   * Currently, escape sequences for multiple character sets are unsupported. The
37   * highlighting, hexademical, and locally defined escape sequences are also
38   * unsupported.
39   * 
40   * @author Bryan Tripp
41   * @author Mark Lee (Skeva Technologies)
42   * @author Elmar Hinz
43   * @author Christian Ohr
44   */
45  public class Escape {
46  
47      /**
48       * limits the size of variousEncChars to 1000, can be overridden by system property.
49       */
50      private static Map<EncodingCharacters, EncLookup> variousEncChars = Collections.synchronizedMap(new LinkedHashMap<EncodingCharacters, EncLookup>(5, 0.75f, true) {
51  
52          private static final long serialVersionUID = 1L;
53          final int maxSize = new Integer(System.getProperty(Escape.class.getName() + ".maxSize", "1000"));
54  
55          @Override
56          protected boolean removeEldestEntry(Map.Entry<EncodingCharacters, EncLookup> eldest) {
57              return this.size() > maxSize;
58          }
59      });
60  
61      /** Creates a new instance of Escape */
62      public Escape() {
63      }
64  
65      /**
66       * @param text string to be escaped
67       * @param encChars encoding characters to be used
68       * @return the escaped string
69       */
70      public static String escape(String text, EncodingCharacters encChars) {
71          EncLookup esc = getEscapeSequences(encChars);
72          int textLength = text.length();
73  
74          StringBuilder result = new StringBuilder(textLength);
75          for (int i = 0; i < textLength; i++) {
76              boolean charReplaced = false;
77              char c = text.charAt(i);
78  
79              FORENCCHARS:
80  			for (int j = 0; j < 6; j++) {
81                  if (text.charAt(i) == esc.characters[j]) {
82  
83  					// Formatting escape sequences such as \.br\ should be left alone
84  					if (j == 4) {
85  						
86  						if (i+1 < textLength) {
87  							
88  							// Check for \.br\
89  							char nextChar = text.charAt(i + 1);
90  							switch (nextChar) {
91  							case '.':
92  							case 'C':
93  							case 'M':
94  							case 'X':
95  							case 'Z':
96  							{
97  								int nextEscapeIndex = text.indexOf(esc.characters[j], i + 1);
98  								if (nextEscapeIndex > 0) {
99  									result.append(text.substring(i, nextEscapeIndex + 1));
100 									charReplaced = true;
101 									i = nextEscapeIndex;
102 									break FORENCCHARS;
103 								}
104 								break;
105 							}
106 							case 'H':
107 							case 'N':
108 							{
109 								if (i+2 < textLength && text.charAt(i+2) == '\\') {
110 									int nextEscapeIndex = i + 2;
111 									if (nextEscapeIndex > 0) {
112 										result.append(text.substring(i, nextEscapeIndex + 1));
113 										charReplaced = true;
114 										i = nextEscapeIndex;
115 										break FORENCCHARS;
116 									}
117 								}
118 								break;
119 							}
120 							}
121 							
122 						}
123 						
124 					}
125 
126                     result.append(esc.encodings[j]);
127                     charReplaced = true;
128                     break;
129                 }
130             }
131             if (!charReplaced) {
132                 result.append(c);
133             }
134         }
135         return result.toString();
136     }
137 
138     /**
139      * @param text string to be unescaped
140      * @param encChars encoding characters to be used
141      * @return the unescaped string
142      */
143     public static String unescape(String text, EncodingCharacters encChars) {
144 
145         // If the escape char isn't found, we don't need to look for escape sequences
146         char escapeChar = encChars.getEscapeCharacter();
147         boolean foundEscapeChar = false;
148         for (int i = 0; i < text.length(); i++) {
149             if (text.charAt(i) == escapeChar) {
150                 foundEscapeChar = true;
151                 break;
152             }
153         }
154         if (!foundEscapeChar) {
155             return text;
156         }
157 
158         int textLength = text.length();
159         StringBuilder result = new StringBuilder(textLength + 20);
160         EncLookup esc = getEscapeSequences(encChars);
161         char escape = esc.characters[4];
162         int encodingsCount = esc.characters.length;
163         int i = 0;
164         while (i < textLength) {
165             char c = text.charAt(i);
166             if (c != escape) {
167                 result.append(c);
168                 i++;
169             } else {
170                 boolean foundEncoding = false;
171 
172 				// Test against the standard encodings
173 				for (int j = 0; j < encodingsCount; j++) {
174                     String encoding = esc.encodings[j];
175 					int encodingLength = encoding.length();
176 					if ((i + encodingLength <= textLength) && text.substring(i, i + encodingLength)
177                             .equals(encoding)) {
178                         result.append(esc.characters[j]);
179                         i += encodingLength;
180                         foundEncoding = true;
181                         break;
182                     }
183                 }
184 
185                 if (!foundEncoding) {
186 					
187 					// If we haven't found this, there is one more option. Escape sequences of /.XXXXX/ are
188 					// formatting codes. They should be left intact
189 					if (i + 1 < textLength) {
190 						char nextChar = text.charAt(i + 1);
191 						switch (nextChar) {
192 							case '.':
193 							case 'C':
194 							case 'M':
195 							case 'X':
196 							case 'Z':
197 							{
198 								int closingEscape = text.indexOf(escape, i + 1);
199 								if (closingEscape > 0) {
200 									String substring = text.substring(i, closingEscape + 1);
201 									result.append(substring);
202 									i += substring.length();
203 								} else {
204 									i++;
205 								}
206 								break;
207 							}
208 							case 'H':
209 							case 'N':
210 							{
211 								int closingEscape = text.indexOf(escape, i + 1);
212 								if (closingEscape == i + 2) {
213 									String substring = text.substring(i, closingEscape + 1);
214 									result.append(substring);
215 									i += substring.length();
216 								} else {
217 									i++;
218 								}
219 								break;
220 							}
221 							default:
222 							{
223 								i++;
224 							}
225 						}
226 						
227 					} else {
228 						i++;
229 					}
230                 }
231 
232 
233             }
234         }
235         return result.toString();
236     }
237 
238     /**
239      * Returns a HashTable with escape sequences as keys, and corresponding
240      * Strings as values.
241      */
242     private static EncLookup getEscapeSequences(EncodingCharacters encChars) {
243         EncLookup escapeSequences = variousEncChars.get(encChars);
244         if (escapeSequences == null) {
245             // this means we haven't got the sequences for these encoding
246             // characters yet - let's make them
247             escapeSequences = new EncLookup(encChars);
248             variousEncChars.put(encChars, escapeSequences);
249         }
250         return escapeSequences;
251     }
252 
253 
254 
255 
256     /**
257      * A performance-optimized replacement for using when
258      * mapping from HL7 special characters to their respective
259      * encodings
260      *
261      * @author Christian Ohr
262      */
263     private static class EncLookup {
264 
265         char[] characters = new char[6];
266         String[] encodings = new String[6];
267 
268         EncLookup(EncodingCharacters ec) {
269             characters[0] = ec.getFieldSeparator();
270             characters[1] = ec.getComponentSeparator();
271             characters[2] = ec.getSubcomponentSeparator();
272             characters[3] = ec.getRepetitionSeparator();
273             characters[4] = ec.getEscapeCharacter();
274             characters[5] = '\r';
275             char[] codes = {'F', 'S', 'T', 'R', 'E'};
276             for (int i = 0; i < codes.length; i++) {
277                 StringBuilder seq = new StringBuilder();
278                 seq.append(ec.getEscapeCharacter());
279                 seq.append(codes[i]);
280                 seq.append(ec.getEscapeCharacter());
281                 encodings[i] = seq.toString();
282             }
283             encodings[5] = "\\X000d\\";
284         }
285     }
286 }