View Javadoc

1   /**
2    * The contents of this file are subject to the Mozilla Public License Version 1.1
3    * (the "License"); you may not use this file except in compliance with the License.
4    * You may obtain a copy of the License at http://www.mozilla.org/MPL/
5    * Software distributed under the License is distributed on an "AS IS" basis,
6    * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
7    * specific language governing rights and limitations under the License.
8    *
9    * The Original Code is "MessageQuery.java".  Description:
10   * "Queries messages in an SQL-like style.  "
11   *
12   * The Initial Developer of the Original Code is University Health Network. Copyright (C)
13   * 2005.  All Rights Reserved.
14   *
15   * Contributor(s): ______________________________________.
16   *
17   * Alternatively, the contents of this file may be used under the terms of the
18   * GNU General Public License (the  “GPL”), in which case the provisions of the GPL are
19   * applicable instead of those above.  If you wish to allow use of your version of this
20   * file only under the terms of the GPL and not to allow others to use your version
21   * of this file under the MPL, indicate your decision by deleting  the provisions above
22   * and replace  them with the notice and other provisions required by the GPL License.
23   * If you do not delete the provisions above, a recipient may use your version of
24   * this file under either the MPL or the GPL.
25   *
26   */
27  package ca.uhn.hl7v2.util;
28  
29  import java.util.ArrayList;
30  import java.util.HashMap;
31  import java.util.Map;
32  import java.util.Properties;
33  import java.util.StringTokenizer;
34  import java.util.regex.Matcher;
35  import java.util.regex.Pattern;
36  
37  import ca.uhn.hl7v2.HL7Exception;
38  import ca.uhn.hl7v2.model.Message;
39  
40  /**
41   * Queries messages in an SQL-like style.  We get repeated row-like 
42   * structures by looping over repetitions of groups, segments, or fields. 
43   * 
44   * This is a very advanced class ... maybe too advanced even for you.  If you 
45   * find it confusing, please note that there are simpler ways to get data 
46   * from a message (like calling its getters or using Terser).   
47   * 
48   * LOOPING:    
49   * You specify the loop points as part of the query.  For example you could 
50   * specify loop point x like this: <code>x = /.MSH-18(*)</code>.  The * 
51   * is replaced by numbers 0, 1, 2, etc. as you loop through the results, 
52   * so this example would loop through repetitions of MSH-18.  If 
53   * there are multiple loop points, the loops are nested so that each possible 
54   * combination is returned.  Looping stops when none of the fields under a 
55   * loop point are valued.  The name of the loop point ('x' in the example 
56   * above) is arbitrary.        
57   * 
58   * SELECTING FIELDS: 
59   * The syntax is similar to SQL, except that Terser paths are used in place
60   * of table.field.  You can use the "as" keyword to give a field a name, like 
61   * this: <code>select /.MSH-7 as msg_date</code>.  If your field is under 
62   * a loop point, replace the path up to the loop point with a loop point 
63   * reference, like this: <code>select {foo}-1 loop foo = /.PID-3(*)</code>
64   * 
65   * SELECTING ROWS:
66   * A "row" is a combination of all selected fields at one iteration.  You 
67   * can filter which rows are returned using a where clause similar to that
68   * in SQL.  Use exact values or regular expressions, for example: 
69   * <code>where {1} like '.*blood.*'</code> or <code>where {1}/PID-3-1 = '111'</code>
70   * Multiple filters can be separated with commas (which mean 'and').  Future
71   * versions may support 'or', negation, brackets, etc., but this version doesn't.  
72   * 
73   * FULL EXAMPLE: 
74   * select {pat-id}-1 as id loop pat-id = ./PID-3(*) where {pat-id}-2 = 'mrn'
75   * 
76   * SUBTLETIES OF LOOPING: 
77   * A loop point can be under another loop point.  For example consider the message:   
78   * 
79   * MSH|etc.|etc.
80   * Z01|one~two|a
81   * Z01|three~four|b
82   * 
83   * The query, "select {a}-2, {b} loop a = /Z01(*), b = {a}-1(*)" would return: 
84   * a one
85   * a two 
86   * b three
87   * b four
88   * 
89   * While the query "select {a}-2, {b} loop a = /Z01(*), b = /Z01(1)-1(*)" would return:
90   * a one
91   * a two 
92   * b one
93   * b two
94   * 
95   * In the first case, one loop point refers to another.  In the second case the loops 
96   * are treated as independent, just as if they referred to different branches of the 
97   * message.  
98   * 
99   * TODO: could support distinct easily by keeping record of rows and comparing each 
100  * one to previous rows 
101  * 
102  * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a>
103  * @version $Revision: 1.3 $ updated on $Date: 2005/02/22 16:06:36 $ by $Author: bryan_tripp $
104  */
105 public class MessageQuery {
106 
107     /**
108      * @param theMessage an HL7 message from which data are to be queried 
109      * @param theQuery the query (see class docs for syntax)
110      * @return data from the message that are selected by the query 
111      */
112     public static Result query(Message theMessage, String theQuery) {
113         Properties clauses = getClauses(theQuery);
114         
115         //parse select clause
116         StringTokenizer select = new StringTokenizer(clauses.getProperty("select"), ", ", false);
117         ArrayList fieldPaths = new ArrayList(10);
118         HashMap names = new HashMap(10);
119         while (select.hasMoreTokens()) {
120             String token = select.nextToken();
121             if (token.equals("as")) {
122                 if (!select.hasMoreTokens()) {
123                     throw new IllegalArgumentException("Keyword 'as' must be followed by a field label");
124                 }
125                 names.put(select.nextToken(), new Integer(fieldPaths.size()-1));
126             } else {
127                 fieldPaths.add(token);  
128             }
129         }
130         
131         //parse loop clause 
132         StringTokenizer loop = new StringTokenizer(clauses.getProperty("loop", ""), ",", false);
133         ArrayList loopPoints = new ArrayList(10);
134         HashMap loopPointNames = new HashMap(10);
135         while (loop.hasMoreTokens()) {
136             String pointDecl = loop.nextToken();
137             StringTokenizer tok = new StringTokenizer(pointDecl, "=", false);
138             String name = tok.nextToken().trim();
139             String path = tok.nextToken().trim();
140             loopPoints.add(path);
141             loopPointNames.put(name, new Integer(loopPoints.size()-1));
142         }
143                 
144         //parse where clause 
145         //TODO: this will do for now but it should really be evaluated like an expression 
146         //rather than a list  
147         StringTokenizer where = new StringTokenizer(clauses.getProperty("where", ""), ",", false);
148         ArrayList filters = new ArrayList();
149         while (where.hasMoreTokens()) {
150             filters.add(where.nextToken());
151         }
152         String[] filterPaths = new String[filters.size()];
153         String[] filterPatterns = new String[filters.size()];
154         boolean[] exactFlags = new boolean[filters.size()];
155         
156         for (int i = 0; i < filters.size(); i++) {
157             exactFlags[i] = true;
158             String filter = (String) filters.get(i);
159             String[] parts = splitFromEnd(filter, "=");
160             if (parts[1] != null) {                
161                 parts[1] = parts[1].substring(1);
162             } else {
163                 exactFlags[i] = false;
164                 parts = splitFromEnd(filter, "like");
165                 parts[1] = parts[1].substring(4);
166             }
167             filterPaths[i] = parts[0].trim();
168             parts[1] = parts[1].trim();
169             filterPatterns[i] = parts[1].substring(1, parts[1].length()-1);
170         }        
171         
172         return new ResultImpl(theMessage, 
173                 (String[]) loopPoints.toArray(new String[0]), 
174                 loopPointNames, 
175                 (String[]) fieldPaths.toArray(new String[0]), 
176                 names,
177                 filterPaths, 
178                 filterPatterns, 
179                 exactFlags);
180     }
181     
182     
183     private static Properties getClauses(String theQuery) {
184         Properties clauses = new Properties();
185         
186         String[] split = splitFromEnd(theQuery, "where ");
187         setClause(clauses, "where", split[1]);
188                 
189         split = splitFromEnd(split[0], "loop ");
190         setClause(clauses, "loop", split[1]);
191         setClause(clauses, "select", split[0]);
192         
193         if (clauses.getProperty("where", "").indexOf("loop ") >= 0) {
194             throw new IllegalArgumentException("The loop clause must precede the where clause");            
195         }
196         if (clauses.getProperty("select") == null) {
197             throw new IllegalArgumentException("The query must begin with a select clause");                        
198         }
199         return clauses;
200     }
201     
202     private static void setClause(Properties theClauses, String theName, String theClause) {
203         if (theClause != null) {
204             theClauses.setProperty(theName, theClause.substring(theName.length()).trim());
205         }
206     }
207     
208     private static String[] splitFromEnd(String theString, String theMarker) {
209         String[] result = new String[2];
210         int begin = theString.indexOf(theMarker);
211         if (begin >= 0) {
212             result[0] = theString.substring(0, begin);
213             result[1] = theString.substring(begin);
214         } else {
215             result[0] = theString;
216         }        
217         return result;
218     }
219     
220     /**
221      * A result set for a message query.    
222      * 
223      * @author <a href="mailto:bryan.tripp@uhn.on.ca">Bryan Tripp</a>
224      * @version $Revision: 1.3 $ updated on $Date: 2005/02/22 16:06:36 $ by $Author: bryan_tripp $
225      */
226     public static interface Result {
227         
228         /**
229          * @param theFieldNumber numbered from zero in the order they are specified in the 
230          *      query
231          * @return the corresponding value in the current row 
232          */
233         public String get(int theFieldNumber);
234         
235         /**
236          * @param theFieldName a field name as specified in the query with the keyword "as" 
237          * @return the corresponding value in the current row 
238          */
239         public String get(String theFieldName);
240         
241         /**
242          * @return a list of named fields as defined with 'as' in the query
243          */
244         public String[] getNamedFields();
245         
246         /**
247          * Advances to the next "row" of data if one is available. 
248          * @return true if another row is available  
249          * @throws HL7Exception
250          */
251         public boolean next() throws HL7Exception;
252 
253     }
254     
255     private static class ResultImpl implements Result {
256 
257         private Terser myTerser;
258         private String[] myValues;
259         private String[] myLoopPoints;
260         private Map myLoopPointNames;
261         private String[] myFieldPaths;
262         private Map myFieldNames;
263         private int[] myIndices;
264         private int[] myNumEmpty; //number of empty sub-loops since last non-empty one
265         private int[] myMaxNumEmpty;
266         private boolean myNonLoopingQuery = false;
267         private String[] myWherePaths;
268         private String[] myWhereValues;
269         private String[] myWherePatterns;
270         private boolean[] myExactMatchFlags;
271         
272         public ResultImpl(Message theMessage, 
273                 String[] theLoopPoints,  
274                 Map theLoopPointNames, 
275                 String[] theFieldPaths, 
276                 Map theFieldNames, 
277                 String[] theWherePaths, 
278                 String[] theWherePatterns, 
279                 boolean[] theExactMatchFlags) {
280             
281             myTerser = new Terser(theMessage);
282             myLoopPoints = theLoopPoints;
283             myIndices = new int[theLoopPoints.length];
284             myNumEmpty = new int[theLoopPoints.length];
285             myMaxNumEmpty = getMaxNumEmpty(theLoopPoints);
286             myLoopPointNames = theLoopPointNames;
287             myFieldPaths = theFieldPaths;
288             myValues = new String[theFieldPaths.length];
289             myFieldNames = theFieldNames;
290             myWherePaths = theWherePaths;
291             myWherePatterns = theWherePatterns;
292             myExactMatchFlags = theExactMatchFlags;
293                         
294             if (theLoopPoints.length == 0) {
295                 myNonLoopingQuery = true; //if no loops, give ourselves 1 iteration
296             } else {
297                 myIndices[myIndices.length - 1] = -1; //start before 1st iteration
298             }
299             
300         }
301         
302         //extracts max number of empty iterations for each loop point (this is communicated 
303         //as an optional integer after the *, e.g. blah(*3) ... default is 0).  
304         private int[] getMaxNumEmpty(String[] theLoopPoints) {
305             int[] retVal = new int[theLoopPoints.length];
306             for (int i = 0; i < theLoopPoints.length; i++) {
307                 retVal[i] = getMaxNumEmpty(theLoopPoints[i]);
308             }
309             return retVal;
310         }
311         
312         private int getMaxNumEmpty(String theLoopPoint) {
313             int retVal = 0; //default
314             
315             Matcher m = Pattern.compile("\\*(\\d+)").matcher(theLoopPoint);
316             if (m.find()) {
317                 String num = m.group(1);
318                 retVal = Integer.parseInt(num);
319             }
320             
321             return retVal;
322         }
323         
324         //returns true if some field under the given loop point has a value at the present 
325         //iteration
326         private boolean currentRowValued(int theLoopPoint) {
327             for (int i = 0; i < myFieldPaths.length; i++) {
328                 if (referencesLoop(myFieldPaths[i], theLoopPoint)) {
329                     String value = (String) myValues[i];
330                     if (value != null && value.length() > 0) {
331                         return true;
332                     }                    
333                 }
334             }
335             return false;
336         }
337         
338         //returns true if the current row matches the where clause filters
339         private boolean currentRowMatchesFilter() {
340             for (int i = 0; i < myWhereValues.length; i++) {
341                 if (myExactMatchFlags[i]) {
342                     if (!myWherePatterns[i].equals(myWhereValues[i])) {
343                         return false;
344                     }                    
345                 } else {
346                     if (!Pattern.matches(myWherePatterns[i], myWhereValues[i])) {
347                         return false;
348                     }
349                 }
350             }
351             return true;
352         }
353         
354         //true if the given path references the given loop point (directly 
355         //or indirectly)
356         private boolean referencesLoop(String theFieldPath, int theLoopPoint) {
357             String path = theFieldPath;
358             int lp;
359             while ((lp = getLoopPointReference(path)) >= 0) {
360                 if (lp == theLoopPoint) {
361                     return true;
362                 } else {
363                     path = myLoopPoints[lp];
364                 }                
365             }
366             return false;
367         }
368         
369         //expands a set of paths to their current loop point iterations, and gets 
370         //current values from our message
371         private String[] getCurrentValues(String[] thePaths) throws HL7Exception {
372             String[] paths = composePaths(thePaths);
373             String[] values = new String[paths.length]; 
374             for (int i = 0; i < paths.length; i++) {
375                 values[i] = myTerser.get(paths[i]);
376                 if (values[i] == null) {
377                     values[i] = "";
378                 }
379             }
380             return values;
381         }               
382         
383         //creates full Terser paths from current location, loop points, and given paths 
384         //with loop point references 
385         private String[] composePaths(String[] thePaths) {
386             String[] currentLoopPoints = composeLoopPoints();
387             String[] result = new String[thePaths.length];
388             for (int i = 0; i < thePaths.length; i++) {
389                 result[i] = thePaths[i];
390                 int ref = getLoopPointReference(thePaths[i]);
391                 if (ref >= 0) {
392                     result[i] = expandLoopPointReference(result[i], currentLoopPoints[ref]);
393                 }                
394             }
395             return result;
396         }
397         
398         //parameterizes loop points with present location (i.e. replaces * with current
399         //indices)
400         private String[] composeLoopPoints() {
401             String[] result = new String[myLoopPoints.length];
402             for (int i = 0; i < myLoopPoints.length; i++) {
403                 result[i] = myLoopPoints[i].replaceAll("\\*\\d*", String.valueOf(myIndices[i]));
404 
405                 int ref = getLoopPointReference(myLoopPoints[i]);
406                 if (ref >= i) {
407                     throw new IllegalStateException("Loop point must be defined after the " +
408                             "one it references: " + myLoopPoints[i]);
409                 } else if (ref >= 0) {
410                     result[i] = expandLoopPointReference(result[i], result[ref]);
411                 }
412             }
413             return result;
414         }
415         
416         //extracts LP# of label between first '{' and first '}', or -1 if there isn't one
417         private int getLoopPointReference(String thePath) {
418             StringTokenizer tok = new StringTokenizer(thePath, "{}", false);
419             if (thePath.indexOf('{') >= 0 && tok.hasMoreTokens()) {
420                 String ref = tok.nextToken();
421                 return ((Integer) myLoopPointNames.get(ref)).intValue();
422             } else {
423                 return -1;
424             }
425         }
426         
427         private String expandLoopPointReference(String thePath, String theLoopPoint) {
428             return thePath.replaceAll("\\{.*\\}", theLoopPoint);
429         }
430                 
431         /**
432          * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(int)
433          */
434         public String get(int theFieldNumber) {
435             if (theFieldNumber < 0 || theFieldNumber >= myValues.length) {
436                 throw new IllegalArgumentException("Field number must be between 0 and " 
437                         + (myValues.length - 1));
438             }
439             return (String) myValues[theFieldNumber];
440         }
441 
442         /**
443          * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(java.lang.String)
444          */
445         public String get(String theFieldName) {
446             Integer fieldNum = (Integer) myFieldNames.get(theFieldName);
447             if (fieldNum == null) {
448                 throw new IllegalArgumentException("Field name not recognized: " + theFieldName);
449             }
450             return get(fieldNum.intValue());
451         }
452 
453         /** 
454          * @throws HL7Exception
455          * @see ca.uhn.hl7v2.util.MessageQuery.Result#next()
456          */
457         public boolean next() throws HL7Exception {
458             if (myNonLoopingQuery) {
459                 myNonLoopingQuery = false;
460                 myValues = getCurrentValues(myFieldPaths);
461                 myWhereValues = getCurrentValues(myWherePaths);
462                 return currentRowMatchesFilter();
463             }
464             
465             boolean hasNext = false;
466             findNext : for (int i = myIndices.length - 1; i >= 0; i--) {    
467                 boolean gotMatch = false;
468                 while (!gotMatch && myNumEmpty[i] <= myMaxNumEmpty[i]) {
469                     myIndices[i]++;
470                     myValues = getCurrentValues(myFieldPaths);
471                     myWhereValues = getCurrentValues(myWherePaths);
472                     
473                     if (!currentRowValued(i)) {
474                         myNumEmpty[i]++;
475                     } else {
476                         myNumEmpty[i] = 0;
477                     }
478                     if (currentRowMatchesFilter()) {
479                         gotMatch = true;
480                     }
481                 }
482                 
483                 hasNext = myNumEmpty[i] <= myMaxNumEmpty[i];// && currentRowMatchesFilter();
484                 if (hasNext) {
485                     break findNext;
486                 }                                 
487                 
488                 myIndices[i] = 0;
489                 myNumEmpty[i] = 0;
490                 
491                 //TODO: if we aren't allowing empties in this loop, and have no value, we want to 
492                 //return the null in the super-loop.  However, we don't know which loop point, if 
493                 //any, is the super-loop.  If it was the next one we could do this ... 
494                 //if (i > 0 && myMaxNumEmpty[i] == 0 && myMaxNumEmpty[i-1] > 0 && myIndices[i-1] == 0) {
495                 //    myIndices[i-1] = -1;
496                 //} ... but it may not be, so we'll ignore this problem for now.  
497             }
498             return hasNext;
499         }
500 
501         /**
502          * @see ca.uhn.hl7v2.util.MessageQuery.Result#getNamedFields()
503          */
504         public String[] getNamedFields() {
505             return (String[]) myFieldNames.keySet().toArray(new String[0]);
506         }
507         
508     }
509 
510 }