1 /**
2 * The contents of this file are subject to the Mozilla Public License Version 1.1
3 * (the "License"); you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at http://www.mozilla.org/MPL/
5 * Software distributed under the License is distributed on an "AS IS" basis,
6 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
7 * specific language governing rights and limitations under the License.
8 *
9 * The Original Code is "MessageQuery.java". Description:
10 * "Queries messages in an SQL-like style. "
11 *
12 * The Initial Developer of the Original Code is University Health Network. Copyright (C)
13 * 2005. All Rights Reserved.
14 *
15 * Contributor(s): ______________________________________.
16 *
17 * Alternatively, the contents of this file may be used under the terms of the
18 * GNU General Public License (the “GPL”), in which case the provisions of the GPL are
19 * applicable instead of those above. If you wish to allow use of your version of this
20 * file only under the terms of the GPL and not to allow others to use your version
21 * of this file under the MPL, indicate your decision by deleting the provisions above
22 * and replace them with the notice and other provisions required by the GPL License.
23 * If you do not delete the provisions above, a recipient may use your version of
24 * this file under either the MPL or the GPL.
25 *
26 */
27 package ca.uhn.hl7v2.util;
28
29 import java.util.ArrayList;
30 import java.util.HashMap;
31 import java.util.Map;
32 import java.util.Properties;
33 import java.util.StringTokenizer;
34 import java.util.regex.Matcher;
35 import java.util.regex.Pattern;
36
37 import ca.uhn.hl7v2.HL7Exception;
38 import ca.uhn.hl7v2.model.Message;
39
40 /**
41 * Queries messages in an SQL-like style. We get repeated row-like
42 * structures by looping over repetitions of groups, segments, or fields.
43 *
44 * This is a very advanced class ... maybe too advanced even for you. If you
45 * find it confusing, please note that there are simpler ways to get data
46 * from a message (like calling its getters or using Terser).
47 *
48 * LOOPING:
49 * You specify the loop points as part of the query. For example you could
50 * specify loop point x like this: <code>x = /.MSH-18(*)</code>. The *
51 * is replaced by numbers 0, 1, 2, etc. as you loop through the results,
52 * so this example would loop through repetitions of MSH-18. If
53 * there are multiple loop points, the loops are nested so that each possible
54 * combination is returned. Looping stops when none of the fields under a
55 * loop point are valued. The name of the loop point ('x' in the example
56 * above) is arbitrary.
57 *
58 * SELECTING FIELDS:
59 * The syntax is similar to SQL, except that Terser paths are used in place
60 * of table.field. You can use the "as" keyword to give a field a name, like
61 * this: <code>select /.MSH-7 as msg_date</code>. If your field is under
62 * a loop point, replace the path up to the loop point with a loop point
63 * reference, like this: <code>select {foo}-1 loop foo = /.PID-3(*)</code>
64 *
65 * SELECTING ROWS:
66 * A "row" is a combination of all selected fields at one iteration. You
67 * can filter which rows are returned using a where clause similar to that
68 * in SQL. Use exact values or regular expressions, for example:
69 * <code>where {1} like '.*blood.*'</code> or <code>where {1}/PID-3-1 = '111'</code>
70 * Multiple filters can be separated with commas (which mean 'and'). Future
71 * versions may support 'or', negation, brackets, etc., but this version doesn't.
72 *
73 * FULL EXAMPLE:
74 * select {pat-id}-1 as id loop pat-id = ./PID-3(*) where {pat-id}-2 = 'mrn'
75 *
76 * SUBTLETIES OF LOOPING:
77 * A loop point can be under another loop point. For example consider the message:
78 *
79 * MSH|etc.|etc.
80 * Z01|one~two|a
81 * Z01|three~four|b
82 *
83 * The query, "select {a}-2, {b} loop a = /Z01(*), b = {a}-1(*)" would return:
84 * a one
85 * a two
86 * b three
87 * b four
88 *
89 * While the query "select {a}-2, {b} loop a = /Z01(*), b = /Z01(1)-1(*)" would return:
90 * a one
91 * a two
92 * b one
93 * b two
94 *
95 * In the first case, one loop point refers to another. In the second case the loops
96 * are treated as independent, just as if they referred to different branches of the
97 * message.
98 *
99 * TODO: could support distinct easily by keeping record of rows and comparing each
100 * one to previous rows
101 *
102 * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a>
103 * @version $Revision: 1.3 $ updated on $Date: 2005/02/22 16:06:36 $ by $Author: bryan_tripp $
104 */
105 public class MessageQuery {
106
107 /**
108 * @param theMessage an HL7 message from which data are to be queried
109 * @param theQuery the query (see class docs for syntax)
110 * @return data from the message that are selected by the query
111 */
112 public static Result query(Message theMessage, String theQuery) {
113 Properties clauses = getClauses(theQuery);
114
115
116 StringTokenizer select = new StringTokenizer(clauses.getProperty("select"), ", ", false);
117 ArrayList fieldPaths = new ArrayList(10);
118 HashMap names = new HashMap(10);
119 while (select.hasMoreTokens()) {
120 String token = select.nextToken();
121 if (token.equals("as")) {
122 if (!select.hasMoreTokens()) {
123 throw new IllegalArgumentException("Keyword 'as' must be followed by a field label");
124 }
125 names.put(select.nextToken(), new Integer(fieldPaths.size()-1));
126 } else {
127 fieldPaths.add(token);
128 }
129 }
130
131
132 StringTokenizer loop = new StringTokenizer(clauses.getProperty("loop", ""), ",", false);
133 ArrayList loopPoints = new ArrayList(10);
134 HashMap loopPointNames = new HashMap(10);
135 while (loop.hasMoreTokens()) {
136 String pointDecl = loop.nextToken();
137 StringTokenizer tok = new StringTokenizer(pointDecl, "=", false);
138 String name = tok.nextToken().trim();
139 String path = tok.nextToken().trim();
140 loopPoints.add(path);
141 loopPointNames.put(name, new Integer(loopPoints.size()-1));
142 }
143
144
145
146
147 StringTokenizer where = new StringTokenizer(clauses.getProperty("where", ""), ",", false);
148 ArrayList filters = new ArrayList();
149 while (where.hasMoreTokens()) {
150 filters.add(where.nextToken());
151 }
152 String[] filterPaths = new String[filters.size()];
153 String[] filterPatterns = new String[filters.size()];
154 boolean[] exactFlags = new boolean[filters.size()];
155
156 for (int i = 0; i < filters.size(); i++) {
157 exactFlags[i] = true;
158 String filter = (String) filters.get(i);
159 String[] parts = splitFromEnd(filter, "=");
160 if (parts[1] != null) {
161 parts[1] = parts[1].substring(1);
162 } else {
163 exactFlags[i] = false;
164 parts = splitFromEnd(filter, "like");
165 parts[1] = parts[1].substring(4);
166 }
167 filterPaths[i] = parts[0].trim();
168 parts[1] = parts[1].trim();
169 filterPatterns[i] = parts[1].substring(1, parts[1].length()-1);
170 }
171
172 return new ResultImpl(theMessage,
173 (String[]) loopPoints.toArray(new String[0]),
174 loopPointNames,
175 (String[]) fieldPaths.toArray(new String[0]),
176 names,
177 filterPaths,
178 filterPatterns,
179 exactFlags);
180 }
181
182
183 private static Properties getClauses(String theQuery) {
184 Properties clauses = new Properties();
185
186 String[] split = splitFromEnd(theQuery, "where ");
187 setClause(clauses, "where", split[1]);
188
189 split = splitFromEnd(split[0], "loop ");
190 setClause(clauses, "loop", split[1]);
191 setClause(clauses, "select", split[0]);
192
193 if (clauses.getProperty("where", "").indexOf("loop ") >= 0) {
194 throw new IllegalArgumentException("The loop clause must precede the where clause");
195 }
196 if (clauses.getProperty("select") == null) {
197 throw new IllegalArgumentException("The query must begin with a select clause");
198 }
199 return clauses;
200 }
201
202 private static void setClause(Properties theClauses, String theName, String theClause) {
203 if (theClause != null) {
204 theClauses.setProperty(theName, theClause.substring(theName.length()).trim());
205 }
206 }
207
208 private static String[] splitFromEnd(String theString, String theMarker) {
209 String[] result = new String[2];
210 int begin = theString.indexOf(theMarker);
211 if (begin >= 0) {
212 result[0] = theString.substring(0, begin);
213 result[1] = theString.substring(begin);
214 } else {
215 result[0] = theString;
216 }
217 return result;
218 }
219
220 /**
221 * A result set for a message query.
222 *
223 * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a>
224 * @version $Revision: 1.3 $ updated on $Date: 2005/02/22 16:06:36 $ by $Author: bryan_tripp $
225 */
226 public static interface Result {
227
228 /**
229 * @param theFieldNumber numbered from zero in the order they are specified in the
230 * query
231 * @return the corresponding value in the current row
232 */
233 public String get(int theFieldNumber);
234
235 /**
236 * @param theFieldName a field name as specified in the query with the keyword "as"
237 * @return the corresponding value in the current row
238 */
239 public String get(String theFieldName);
240
241 /**
242 * @return a list of named fields as defined with 'as' in the query
243 */
244 public String[] getNamedFields();
245
246 /**
247 * Advances to the next "row" of data if one is available.
248 * @return true if another row is available
249 * @throws HL7Exception
250 */
251 public boolean next() throws HL7Exception;
252
253 }
254
255 private static class ResultImpl implements Result {
256
257 private Terser myTerser;
258 private String[] myValues;
259 private String[] myLoopPoints;
260 private Map myLoopPointNames;
261 private String[] myFieldPaths;
262 private Map myFieldNames;
263 private int[] myIndices;
264 private int[] myNumEmpty;
265 private int[] myMaxNumEmpty;
266 private boolean myNonLoopingQuery = false;
267 private String[] myWherePaths;
268 private String[] myWhereValues;
269 private String[] myWherePatterns;
270 private boolean[] myExactMatchFlags;
271
272 public ResultImpl(Message theMessage,
273 String[] theLoopPoints,
274 Map theLoopPointNames,
275 String[] theFieldPaths,
276 Map theFieldNames,
277 String[] theWherePaths,
278 String[] theWherePatterns,
279 boolean[] theExactMatchFlags) {
280
281 myTerser = new Terser(theMessage);
282 myLoopPoints = theLoopPoints;
283 myIndices = new int[theLoopPoints.length];
284 myNumEmpty = new int[theLoopPoints.length];
285 myMaxNumEmpty = getMaxNumEmpty(theLoopPoints);
286 myLoopPointNames = theLoopPointNames;
287 myFieldPaths = theFieldPaths;
288 myValues = new String[theFieldPaths.length];
289 myFieldNames = theFieldNames;
290 myWherePaths = theWherePaths;
291 myWherePatterns = theWherePatterns;
292 myExactMatchFlags = theExactMatchFlags;
293
294 if (theLoopPoints.length == 0) {
295 myNonLoopingQuery = true;
296 } else {
297 myIndices[myIndices.length - 1] = -1;
298 }
299
300 }
301
302
303
304 private int[] getMaxNumEmpty(String[] theLoopPoints) {
305 int[] retVal = new int[theLoopPoints.length];
306 for (int i = 0; i < theLoopPoints.length; i++) {
307 retVal[i] = getMaxNumEmpty(theLoopPoints[i]);
308 }
309 return retVal;
310 }
311
312 private int getMaxNumEmpty(String theLoopPoint) {
313 int retVal = 0;
314
315 Matcher m = Pattern.compile("\\*(\\d+)").matcher(theLoopPoint);
316 if (m.find()) {
317 String num = m.group(1);
318 retVal = Integer.parseInt(num);
319 }
320
321 return retVal;
322 }
323
324
325
326 private boolean currentRowValued(int theLoopPoint) {
327 for (int i = 0; i < myFieldPaths.length; i++) {
328 if (referencesLoop(myFieldPaths[i], theLoopPoint)) {
329 String value = (String) myValues[i];
330 if (value != null && value.length() > 0) {
331 return true;
332 }
333 }
334 }
335 return false;
336 }
337
338
339 private boolean currentRowMatchesFilter() {
340 for (int i = 0; i < myWhereValues.length; i++) {
341 if (myExactMatchFlags[i]) {
342 if (!myWherePatterns[i].equals(myWhereValues[i])) {
343 return false;
344 }
345 } else {
346 if (!Pattern.matches(myWherePatterns[i], myWhereValues[i])) {
347 return false;
348 }
349 }
350 }
351 return true;
352 }
353
354
355
356 private boolean referencesLoop(String theFieldPath, int theLoopPoint) {
357 String path = theFieldPath;
358 int lp;
359 while ((lp = getLoopPointReference(path)) >= 0) {
360 if (lp == theLoopPoint) {
361 return true;
362 } else {
363 path = myLoopPoints[lp];
364 }
365 }
366 return false;
367 }
368
369
370
371 private String[] getCurrentValues(String[] thePaths) throws HL7Exception {
372 String[] paths = composePaths(thePaths);
373 String[] values = new String[paths.length];
374 for (int i = 0; i < paths.length; i++) {
375 values[i] = myTerser.get(paths[i]);
376 if (values[i] == null) {
377 values[i] = "";
378 }
379 }
380 return values;
381 }
382
383
384
385 private String[] composePaths(String[] thePaths) {
386 String[] currentLoopPoints = composeLoopPoints();
387 String[] result = new String[thePaths.length];
388 for (int i = 0; i < thePaths.length; i++) {
389 result[i] = thePaths[i];
390 int ref = getLoopPointReference(thePaths[i]);
391 if (ref >= 0) {
392 result[i] = expandLoopPointReference(result[i], currentLoopPoints[ref]);
393 }
394 }
395 return result;
396 }
397
398
399
400 private String[] composeLoopPoints() {
401 String[] result = new String[myLoopPoints.length];
402 for (int i = 0; i < myLoopPoints.length; i++) {
403 result[i] = myLoopPoints[i].replaceAll("\\*\\d*", String.valueOf(myIndices[i]));
404
405 int ref = getLoopPointReference(myLoopPoints[i]);
406 if (ref >= i) {
407 throw new IllegalStateException("Loop point must be defined after the " +
408 "one it references: " + myLoopPoints[i]);
409 } else if (ref >= 0) {
410 result[i] = expandLoopPointReference(result[i], result[ref]);
411 }
412 }
413 return result;
414 }
415
416
417 private int getLoopPointReference(String thePath) {
418 StringTokenizer tok = new StringTokenizer(thePath, "{}", false);
419 if (thePath.indexOf('{') >= 0 && tok.hasMoreTokens()) {
420 String ref = tok.nextToken();
421 return ((Integer) myLoopPointNames.get(ref)).intValue();
422 } else {
423 return -1;
424 }
425 }
426
427 private String expandLoopPointReference(String thePath, String theLoopPoint) {
428 return thePath.replaceAll("\\{.*\\}", theLoopPoint);
429 }
430
431 /**
432 * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(int)
433 */
434 public String get(int theFieldNumber) {
435 if (theFieldNumber < 0 || theFieldNumber >= myValues.length) {
436 throw new IllegalArgumentException("Field number must be between 0 and "
437 + (myValues.length - 1));
438 }
439 return (String) myValues[theFieldNumber];
440 }
441
442 /**
443 * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(java.lang.String)
444 */
445 public String get(String theFieldName) {
446 Integer fieldNum = (Integer) myFieldNames.get(theFieldName);
447 if (fieldNum == null) {
448 throw new IllegalArgumentException("Field name not recognized: " + theFieldName);
449 }
450 return get(fieldNum.intValue());
451 }
452
453 /**
454 * @throws HL7Exception
455 * @see ca.uhn.hl7v2.util.MessageQuery.Result#next()
456 */
457 public boolean next() throws HL7Exception {
458 if (myNonLoopingQuery) {
459 myNonLoopingQuery = false;
460 myValues = getCurrentValues(myFieldPaths);
461 myWhereValues = getCurrentValues(myWherePaths);
462 return currentRowMatchesFilter();
463 }
464
465 boolean hasNext = false;
466 findNext : for (int i = myIndices.length - 1; i >= 0; i--) {
467 boolean gotMatch = false;
468 while (!gotMatch && myNumEmpty[i] <= myMaxNumEmpty[i]) {
469 myIndices[i]++;
470 myValues = getCurrentValues(myFieldPaths);
471 myWhereValues = getCurrentValues(myWherePaths);
472
473 if (!currentRowValued(i)) {
474 myNumEmpty[i]++;
475 } else {
476 myNumEmpty[i] = 0;
477 }
478 if (currentRowMatchesFilter()) {
479 gotMatch = true;
480 }
481 }
482
483 hasNext = myNumEmpty[i] <= myMaxNumEmpty[i];
484 if (hasNext) {
485 break findNext;
486 }
487
488 myIndices[i] = 0;
489 myNumEmpty[i] = 0;
490
491
492
493
494
495
496
497 }
498 return hasNext;
499 }
500
501 /**
502 * @see ca.uhn.hl7v2.util.MessageQuery.Result#getNamedFields()
503 */
504 public String[] getNamedFields() {
505 return (String[]) myFieldNames.keySet().toArray(new String[0]);
506 }
507
508 }
509
510 }