View Javadoc

1   package ca.uhn.hl7v2.preparser;
2   
3   import java.util.ArrayList;
4   import java.util.Iterator;
5   import java.util.List;
6   import java.util.Map;
7   import java.util.Properties;
8   import java.util.SortedMap;
9   import java.util.StringTokenizer;
10  import java.util.TreeMap;
11  
12  import ca.uhn.hl7v2.parser.EncodingCharacters;
13  
14  /*
15  The point of this class (all static members, not instantiatable) is to take a
16  traditionally-encoded HL7 message and add all it's contents to a Properties
17  object, via the parseMessage() method.
18  
19  The key-value pairs added to the Properties argument have keys that represent a
20  datum's location in the message.  (in the ZYX-1-2[0] style.  TODO: define
21  exactly.)  See Datum, particularly the toString() of that class.
22  Anyway, the Properties keys are those and the values are the tokens found.
23  
24  Note: we accept useless field repetition separators at the end of a 
25  field repetition sequence.  i.e. |855-4545~555-3792~~~| , and interpret this
26  as definining repetitions 0 and 1.  This might not be allowed.  (HL7 2.3.1
27  section 2.10 explicitly allows this behaviour for fields / components /
28  subcomponents, but the allowance is notably absent for repetitions.  TODO:
29  nail down.)  We allow it anyway.
30  
31  Also, we accept things like |855-4545~~555-3792|, and interpret it as defining
32  repetitions 0 and 2.  The spec would seem to disallow this too, but there's no
33  harm.  :D  
34  */
35  public class ER7 {
36  	
37  	private ER7() {}
38  
39  	/** characters that delimit segments.  for use with StringTokenizer.
40  	We are forgiving: HL7 2.3.1 section 2.7 says that carriage return ('\r') is
41  	the only segment delimiter.  TODO: check other versions. */ 
42  	static final String segmentSeparators = "\r\n\f";
43  
44  	/** Parses message and dumps contents to props, with keys in the 
45  	ZYX[a]-b[c]-d-e style.
46  	*/
47  	public static boolean parseMessage(/*out*/ Properties props, 
48  		/*in*/ List<DatumPath> msgMask, /*in*/ String message)
49  	{
50  		boolean ok = false;
51  		if(message != null) {
52  			if(props == null)
53  				props = new Properties();
54  
55  			StringTokenizer messageTokenizer 
56  				= new StringTokenizer(message, segmentSeparators);
57  			if(messageTokenizer.hasMoreTokens()) {
58  				String firstSegment = messageTokenizer.nextToken();
59  				EncodingCharacters encodingChars = new EncodingCharacters('0', "0000");
60  				if(parseMSHSegmentWhole(props, msgMask, encodingChars, firstSegment)) {
61  					ok = true;
62  					SortedMap<String, Integer> segmentId2nextRepIdx = new TreeMap<String, Integer>();
63  					segmentId2nextRepIdx.put(new String("MSH"), 1); 
64  						// in case we find another MSH segment, heh.
65  					while(messageTokenizer.hasMoreTokens()) {
66  						parseSegmentWhole(props, segmentId2nextRepIdx, 
67  							msgMask, encodingChars, messageTokenizer.nextToken());
68  					}
69  				}
70  			}
71  		}
72  		return ok;
73  	}
74  	
75  	/** given segment, starting with "MSH", then encoding characters, etc...
76  	put MSH[0]-1[0]-1-1 (== MSH-1) and MSH[0]-2[0]-1-1 (== MSH-2) into props, if found,
77  	plus everything else found in 'segment' */
78  	protected static boolean parseMSHSegmentWhole(/*out*/ Properties props, 
79  		/*in*/ List<DatumPath> msgMask, /*in*/ EncodingCharacters encodingChars, 
80  		/*in*/ String segment) 
81  	{
82  		boolean ret = false;
83  		try {
84  			ER7SegmentHandler handler = new ER7SegmentHandler();
85  			handler.m_props = props;
86  			handler.m_encodingChars = encodingChars;
87  			handler.m_segmentId = "MSH";
88  			handler.m_segmentRepIdx = 0;
89  			if(msgMask != null)
90  				handler.m_msgMask = msgMask;
91  			else {
92  				handler.m_msgMask = new ArrayList<DatumPath>();
93  				handler.m_msgMask.add(new DatumPath()); // everything will pass this
94  					// (every DatumPath startsWith the zero-length DatumPath)
95  			}
96  
97  			encodingChars.setFieldSeparator(segment.charAt(3));
98  			List<Integer> nodeKey = new ArrayList<Integer>();
99  			nodeKey.add(new Integer(0));
100 			handler.putDatum(nodeKey, String.valueOf(encodingChars.getFieldSeparator()));
101 			encodingChars.setComponentSeparator(segment.charAt(4));
102 			encodingChars.setRepetitionSeparator(segment.charAt(5));
103 			encodingChars.setEscapeCharacter(segment.charAt(6));
104 			encodingChars.setSubcomponentSeparator(segment.charAt(7));
105 			nodeKey.set(0, new Integer(1));
106 			handler.putDatum(nodeKey, encodingChars.toString());
107 
108 			if(segment.charAt(8) == encodingChars.getFieldSeparator()) {	
109 				ret = true; 
110 				// now -- we recurse 
111 				// through fields / field-repetitions / components / subcomponents.
112 				nodeKey.clear();
113 				nodeKey.add(new Integer(2));
114 				parseSegmentGuts(handler, segment.substring(9), nodeKey);
115 			}
116 		}
117 		catch(IndexOutOfBoundsException e) {}
118 		catch(NullPointerException e) {}
119 
120 		return ret;
121 	}
122 
123 	/** pass in a whole segment (of type other than MSH), including message type
124 	at the start, according to encodingChars, and we'll parse the contents and
125 	put them in props. */
126 	protected static void parseSegmentWhole(/*out*/ Properties props, 
127 		/*in/out*/ Map<String, Integer> segmentId2nextRepIdx, 
128 		/*in*/ List<DatumPath> msgMask, /*in*/ EncodingCharacters encodingChars, 
129 		/*in*/ String segment)
130 	{
131 		try {
132 			String segmentId = segment.substring(0, 3);
133 
134 			int currentSegmentRepIdx = 0;
135 			if(segmentId2nextRepIdx.containsKey(segmentId))
136 				currentSegmentRepIdx = ((Integer)segmentId2nextRepIdx.get(segmentId)).intValue();
137 			else
138 				currentSegmentRepIdx = 0;
139 			segmentId2nextRepIdx.put(segmentId, new Integer(currentSegmentRepIdx+1));
140 
141 			// will only bother to parse this segment if any of it's contents will 
142 			// be dumped to props.
143 			boolean parseThisSegment = false;
144 			DatumPath segmentIdAsDatumPath = new DatumPath().add(segmentId);
145 			for(Iterator<DatumPath> maskIt = msgMask.iterator(); !parseThisSegment && maskIt.hasNext(); ) 
146 				parseThisSegment = segmentIdAsDatumPath.startsWith(maskIt.next());
147 			for(Iterator<DatumPath> maskIt = msgMask.iterator(); !parseThisSegment && maskIt.hasNext(); ) 
148 				parseThisSegment = maskIt.next().startsWith(segmentIdAsDatumPath);
149 
150 			if(parseThisSegment && (segment.charAt(3) == encodingChars.getFieldSeparator())) {
151 				ER7SegmentHandler handler = new ER7SegmentHandler();
152 				handler.m_props = props;
153 				handler.m_encodingChars = encodingChars;
154 				handler.m_segmentId = segmentId;
155 				handler.m_msgMask = msgMask;
156 				handler.m_segmentRepIdx = currentSegmentRepIdx;
157 
158 				List<Integer> nodeKey = new ArrayList<Integer>();
159 				nodeKey.add(new Integer(0));
160 				parseSegmentGuts(handler, segment.substring(4), nodeKey);
161 			}
162 		}
163 		catch(NullPointerException e) {}
164 		catch(IndexOutOfBoundsException e) {}
165 	}
166 
167 	static protected interface Handler
168 	{
169 		public int specDepth();
170 		public char delim(int level);
171 
172 		public void putDatum(List<Integer> nodeKey, String value);
173 	}
174 
175 	static protected class ER7SegmentHandler implements Handler
176 	{
177 		Properties m_props;
178 
179 		EncodingCharacters m_encodingChars;
180 
181 		String m_segmentId;
182 		int m_segmentRepIdx;
183 
184 		List<DatumPath> m_msgMask;
185 
186 		public int specDepth() {return 4;}
187 
188 		public char delim(int level)
189 		{
190 			if(level == 0)
191 				return m_encodingChars.getFieldSeparator();
192 			else if(level == 1)
193 				return m_encodingChars.getRepetitionSeparator();
194 			else if(level == 2)
195 				return m_encodingChars.getComponentSeparator();
196 			else if(level == 3)
197 				return m_encodingChars.getSubcomponentSeparator();
198 			else
199 				throw new java.lang.Error();
200 		}
201 
202 		public void putDatum(List<Integer> valNodeKey, String value)
203 		{
204 			// make a DatumPath from valNodeKey and info in this: 
205 			DatumPath valDatumPath = new DatumPath();
206 			valDatumPath.add(m_segmentId).add(m_segmentRepIdx);
207 			for(int i=0; i<valNodeKey.size(); ++i) {
208 				// valNodeKey: everything counts from 0 -- not so with DatumPath ... sigh. 
209 				int itval = ((Integer)valNodeKey.get(i)).intValue();
210 				valDatumPath.add(new Integer(i == 1 ? itval : itval+1));
211 			}
212 
213 			// see if valDatumPath passes m_msgMask: 
214 			boolean valDatumPathPassesMask = false;
215 			for(Iterator<DatumPath> maskIt = m_msgMask.iterator(); 
216 				!valDatumPathPassesMask && maskIt.hasNext(); )
217 			{
218 				valDatumPathPassesMask = valDatumPath.startsWith(maskIt.next());
219 			}
220 
221 			if(valDatumPathPassesMask)
222 				m_props.setProperty(valDatumPath.toString(), value);
223 		}
224 	}
225 
226 	/** recursively tokenize "guts" (a segment, or part of one) into tokens, 
227 	according to separators (aka delimiters) which are different at each level
228 	of recursion, and to a recursive depth which is discovered through "handler"
229 	via handler.delim(int) and handler.specDepth()  As tokens are found, they
230 	are reported to handler via handler.putDatum(), which presumably stashes them
231 	away somewhere.  We tell the handler about the location in the message via
232 	putDatum()'s key argument, which is a List of Integers representing the 
233 	position in the parse tree (size() == depth of recursion).
234 
235 	TODO: say more.
236 	*/
237 	protected static void parseSegmentGuts(/*in/out*/ Handler handler,  
238 		/*in*/ String guts, /*in*/List<Integer> nodeKey)
239 	{
240 		char thisDepthsDelim = handler.delim(nodeKey.size()-1);
241 		//nodeKey.add(new Integer(0)); // will change nodeKey back before function exits
242 
243 		StringTokenizer gutsTokenizer 
244 			= new StringTokenizer(guts, String.valueOf(thisDepthsDelim), true);
245 		while(gutsTokenizer.hasMoreTokens()) {
246 			String gutsToken = gutsTokenizer.nextToken();
247 
248 			if(gutsToken.charAt(0) == thisDepthsDelim) {
249 				// gutsToken is all delims -- skipping over as many fields or
250 				// components or whatevers as there are characters in the token: 
251 				int oldvalue = ((Integer)nodeKey.get(nodeKey.size()-1)).intValue();
252 				nodeKey.set(nodeKey.size()-1, new Integer(oldvalue + gutsToken.length()));
253 			}
254 			else {
255 				if(nodeKey.size() < handler.specDepth()) {
256 					nodeKey.add(new Integer(0));
257 					parseSegmentGuts(handler, gutsToken, nodeKey);
258 					nodeKey.remove(nodeKey.size()-1);
259 				}
260 				else 
261 					handler.putDatum(nodeKey, gutsToken);
262 			}
263 		}
264 		//nodeKey.setSize(nodeKey.size()-1); // undoing add done at top of this func
265 	}
266 
267 	public static void main(String args[])
268 	{
269 		if(args.length >= 1) {
270 			//String message = "MSH|^~\\&||||foo|foo|foo";
271 			System.out.println(args[0]);
272 
273 			Properties props = new Properties();
274 
275 			List<DatumPath> msgMask = new ArrayList<DatumPath>();
276 			msgMask.add(new DatumPath());
277 
278 			System.err.println("ER7.parseMessage returned " + parseMessage(props, msgMask, args[0]));
279 			props.list(System.out);
280 		}
281 	}
282 	
283 }
284