/** * The contents of this file are subject to the Mozilla Public License Version 1.1 * (the "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at http://www.mozilla.org/MPL/ * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the * specific language governing rights and limitations under the License. * * The Original Code is "MessageQuery.java". Description: * "Queries messages in an SQL-like style. " * * The Initial Developer of the Original Code is University Health Network. Copyright (C) * 2005. All Rights Reserved. * * Contributor(s): ______________________________________. * * Alternatively, the contents of this file may be used under the terms of the * GNU General Public License (the “GPL”), in which case the provisions of the GPL are * applicable instead of those above. If you wish to allow use of your version of this * file only under the terms of the GPL and not to allow others to use your version * of this file under the MPL, indicate your decision by deleting the provisions above * and replace them with the notice and other provisions required by the GPL License. * If you do not delete the provisions above, a recipient may use your version of * this file under either the MPL or the GPL. * */ package ca.uhn.hl7v2.util; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; import java.util.Properties; import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; import ca.uhn.hl7v2.HL7Exception; import ca.uhn.hl7v2.model.Message; /** * Queries messages in an SQL-like style. We get repeated row-like * structures by looping over repetitions of groups, segments, or fields. * * This is a very advanced class ... maybe too advanced even for you. If you * find it confusing, please note that there are simpler ways to get data * from a message (like calling its getters or using Terser). * * LOOPING: * You specify the loop points as part of the query. For example you could * specify loop point x like this: x = /.MSH-18(*). The * * is replaced by numbers 0, 1, 2, etc. as you loop through the results, * so this example would loop through repetitions of MSH-18. If * there are multiple loop points, the loops are nested so that each possible * combination is returned. Looping stops when none of the fields under a * loop point are valued. The name of the loop point ('x' in the example * above) is arbitrary. * * SELECTING FIELDS: * The syntax is similar to SQL, except that Terser paths are used in place * of table.field. You can use the "as" keyword to give a field a name, like * this: select /.MSH-7 as msg_date. If your field is under * a loop point, replace the path up to the loop point with a loop point * reference, like this: select {foo}-1 loop foo = /.PID-3(*) * * SELECTING ROWS: * A "row" is a combination of all selected fields at one iteration. You * can filter which rows are returned using a where clause similar to that * in SQL. Use exact values or regular expressions, for example: * where {1} like '.*blood.*' or where {1}/PID-3-1 = '111' * Multiple filters can be separated with commas (which mean 'and'). Future * versions may support 'or', negation, brackets, etc., but this version doesn't. * * FULL EXAMPLE: * select {pat-id}-1 as id loop pat-id = ./PID-3(*) where {pat-id}-2 = 'mrn' * * SUBTLETIES OF LOOPING: * A loop point can be under another loop point. For example consider the message: * * MSH|etc.|etc. * Z01|one~two|a * Z01|three~four|b * * The query, "select {a}-2, {b} loop a = /Z01(*), b = {a}-1(*)" would return: * a one * a two * b three * b four * * While the query "select {a}-2, {b} loop a = /Z01(*), b = /Z01(1)-1(*)" would return: * a one * a two * b one * b two * * In the first case, one loop point refers to another. In the second case the loops * are treated as independent, just as if they referred to different branches of the * message. * * TODO: could support distinct easily by keeping record of rows and comparing each * one to previous rows * * @author Bryan Tripp * @version $Revision: 1.1 $ updated on $Date: 2007/02/19 02:24:27 $ by $Author: jamesagnew $ */ public class MessageQuery { /** * @param theMessage an HL7 message from which data are to be queried * @param theQuery the query (see class docs for syntax) * @return data from the message that are selected by the query */ public static Result query(Message theMessage, String theQuery) { Properties clauses = getClauses(theQuery); //parse select clause StringTokenizer select = new StringTokenizer(clauses.getProperty("select"), ", ", false); ArrayList fieldPaths = new ArrayList(10); HashMap names = new HashMap(10); while (select.hasMoreTokens()) { String token = select.nextToken(); if (token.equals("as")) { if (!select.hasMoreTokens()) { throw new IllegalArgumentException("Keyword 'as' must be followed by a field label"); } names.put(select.nextToken(), new Integer(fieldPaths.size()-1)); } else { fieldPaths.add(token); } } //parse loop clause StringTokenizer loop = new StringTokenizer(clauses.getProperty("loop", ""), ",", false); ArrayList loopPoints = new ArrayList(10); HashMap loopPointNames = new HashMap(10); while (loop.hasMoreTokens()) { String pointDecl = loop.nextToken(); StringTokenizer tok = new StringTokenizer(pointDecl, "=", false); String name = tok.nextToken().trim(); String path = tok.nextToken().trim(); loopPoints.add(path); loopPointNames.put(name, new Integer(loopPoints.size()-1)); } //parse where clause //TODO: this will do for now but it should really be evaluated like an expression //rather than a list StringTokenizer where = new StringTokenizer(clauses.getProperty("where", ""), ",", false); ArrayList filters = new ArrayList(); while (where.hasMoreTokens()) { filters.add(where.nextToken()); } String[] filterPaths = new String[filters.size()]; String[] filterPatterns = new String[filters.size()]; boolean[] exactFlags = new boolean[filters.size()]; for (int i = 0; i < filters.size(); i++) { exactFlags[i] = true; String filter = (String) filters.get(i); String[] parts = splitFromEnd(filter, "="); if (parts[1] != null) { parts[1] = parts[1].substring(1); } else { exactFlags[i] = false; parts = splitFromEnd(filter, "like"); parts[1] = parts[1].substring(4); } filterPaths[i] = parts[0].trim(); parts[1] = parts[1].trim(); filterPatterns[i] = parts[1].substring(1, parts[1].length()-1); } return new ResultImpl(theMessage, (String[]) loopPoints.toArray(new String[0]), loopPointNames, (String[]) fieldPaths.toArray(new String[0]), names, filterPaths, filterPatterns, exactFlags); } private static Properties getClauses(String theQuery) { Properties clauses = new Properties(); String[] split = splitFromEnd(theQuery, "where "); setClause(clauses, "where", split[1]); split = splitFromEnd(split[0], "loop "); setClause(clauses, "loop", split[1]); setClause(clauses, "select", split[0]); if (clauses.getProperty("where", "").indexOf("loop ") >= 0) { throw new IllegalArgumentException("The loop clause must precede the where clause"); } if (clauses.getProperty("select") == null) { throw new IllegalArgumentException("The query must begin with a select clause"); } return clauses; } private static void setClause(Properties theClauses, String theName, String theClause) { if (theClause != null) { theClauses.setProperty(theName, theClause.substring(theName.length()).trim()); } } private static String[] splitFromEnd(String theString, String theMarker) { String[] result = new String[2]; int begin = theString.indexOf(theMarker); if (begin >= 0) { result[0] = theString.substring(0, begin); result[1] = theString.substring(begin); } else { result[0] = theString; } return result; } /** * A result set for a message query. * * @author Bryan Tripp * @version $Revision: 1.1 $ updated on $Date: 2007/02/19 02:24:27 $ by $Author: jamesagnew $ */ public static interface Result { /** * @param theFieldNumber numbered from zero in the order they are specified in the * query * @return the corresponding value in the current row */ public String get(int theFieldNumber); /** * @param theFieldName a field name as specified in the query with the keyword "as" * @return the corresponding value in the current row */ public String get(String theFieldName); /** * @return a list of named fields as defined with 'as' in the query */ public String[] getNamedFields(); /** * Advances to the next "row" of data if one is available. * @return true if another row is available * @throws HL7Exception */ public boolean next() throws HL7Exception; } private static class ResultImpl implements Result { private Terser myTerser; private String[] myValues; private String[] myLoopPoints; private Map myLoopPointNames; private String[] myFieldPaths; private Map myFieldNames; private int[] myIndices; private int[] myNumEmpty; //number of empty sub-loops since last non-empty one private int[] myMaxNumEmpty; private boolean myNonLoopingQuery = false; private String[] myWherePaths; private String[] myWhereValues; private String[] myWherePatterns; private boolean[] myExactMatchFlags; public ResultImpl(Message theMessage, String[] theLoopPoints, Map theLoopPointNames, String[] theFieldPaths, Map theFieldNames, String[] theWherePaths, String[] theWherePatterns, boolean[] theExactMatchFlags) { myTerser = new Terser(theMessage); myLoopPoints = theLoopPoints; myIndices = new int[theLoopPoints.length]; myNumEmpty = new int[theLoopPoints.length]; myMaxNumEmpty = getMaxNumEmpty(theLoopPoints); myLoopPointNames = theLoopPointNames; myFieldPaths = theFieldPaths; myValues = new String[theFieldPaths.length]; myFieldNames = theFieldNames; myWherePaths = theWherePaths; myWherePatterns = theWherePatterns; myExactMatchFlags = theExactMatchFlags; if (theLoopPoints.length == 0) { myNonLoopingQuery = true; //if no loops, give ourselves 1 iteration } else { myIndices[myIndices.length - 1] = -1; //start before 1st iteration } } //extracts max number of empty iterations for each loop point (this is communicated //as an optional integer after the *, e.g. blah(*3) ... default is 0). private int[] getMaxNumEmpty(String[] theLoopPoints) { int[] retVal = new int[theLoopPoints.length]; for (int i = 0; i < theLoopPoints.length; i++) { retVal[i] = getMaxNumEmpty(theLoopPoints[i]); } return retVal; } private int getMaxNumEmpty(String theLoopPoint) { int retVal = 0; //default Matcher m = Pattern.compile("\\*(\\d+)").matcher(theLoopPoint); if (m.find()) { String num = m.group(1); retVal = Integer.parseInt(num); } return retVal; } //returns true if some field under the given loop point has a value at the present //iteration private boolean currentRowValued(int theLoopPoint) { for (int i = 0; i < myFieldPaths.length; i++) { if (referencesLoop(myFieldPaths[i], theLoopPoint)) { String value = (String) myValues[i]; if (value != null && value.length() > 0) { return true; } } } return false; } //returns true if the current row matches the where clause filters private boolean currentRowMatchesFilter() { for (int i = 0; i < myWhereValues.length; i++) { if (myExactMatchFlags[i]) { if (!myWherePatterns[i].equals(myWhereValues[i])) { return false; } } else { if (!Pattern.matches(myWherePatterns[i], myWhereValues[i])) { return false; } } } return true; } //true if the given path references the given loop point (directly //or indirectly) private boolean referencesLoop(String theFieldPath, int theLoopPoint) { String path = theFieldPath; int lp; while ((lp = getLoopPointReference(path)) >= 0) { if (lp == theLoopPoint) { return true; } else { path = myLoopPoints[lp]; } } return false; } //expands a set of paths to their current loop point iterations, and gets //current values from our message private String[] getCurrentValues(String[] thePaths) throws HL7Exception { String[] paths = composePaths(thePaths); String[] values = new String[paths.length]; for (int i = 0; i < paths.length; i++) { values[i] = myTerser.get(paths[i]); if (values[i] == null) { values[i] = ""; } } return values; } //creates full Terser paths from current location, loop points, and given paths //with loop point references private String[] composePaths(String[] thePaths) { String[] currentLoopPoints = composeLoopPoints(); String[] result = new String[thePaths.length]; for (int i = 0; i < thePaths.length; i++) { result[i] = thePaths[i]; int ref = getLoopPointReference(thePaths[i]); if (ref >= 0) { result[i] = expandLoopPointReference(result[i], currentLoopPoints[ref]); } } return result; } //parameterizes loop points with present location (i.e. replaces * with current //indices) private String[] composeLoopPoints() { String[] result = new String[myLoopPoints.length]; for (int i = 0; i < myLoopPoints.length; i++) { result[i] = myLoopPoints[i].replaceAll("\\*\\d*", String.valueOf(myIndices[i])); int ref = getLoopPointReference(myLoopPoints[i]); if (ref >= i) { throw new IllegalStateException("Loop point must be defined after the " + "one it references: " + myLoopPoints[i]); } else if (ref >= 0) { result[i] = expandLoopPointReference(result[i], result[ref]); } } return result; } //extracts LP# of label between first '{' and first '}', or -1 if there isn't one private int getLoopPointReference(String thePath) { StringTokenizer tok = new StringTokenizer(thePath, "{}", false); if (thePath.indexOf('{') >= 0 && tok.hasMoreTokens()) { String ref = tok.nextToken(); return ((Integer) myLoopPointNames.get(ref)).intValue(); } else { return -1; } } private String expandLoopPointReference(String thePath, String theLoopPoint) { return thePath.replaceAll("\\{.*\\}", theLoopPoint); } /** * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(int) */ public String get(int theFieldNumber) { if (theFieldNumber < 0 || theFieldNumber >= myValues.length) { throw new IllegalArgumentException("Field number must be between 0 and " + (myValues.length - 1)); } return (String) myValues[theFieldNumber]; } /** * @see ca.uhn.hl7v2.util.MessageQuery.Result#get(java.lang.String) */ public String get(String theFieldName) { Integer fieldNum = (Integer) myFieldNames.get(theFieldName); if (fieldNum == null) { throw new IllegalArgumentException("Field name not recognized: " + theFieldName); } return get(fieldNum.intValue()); } /** * @throws HL7Exception * @see ca.uhn.hl7v2.util.MessageQuery.Result#next() */ public boolean next() throws HL7Exception { if (myNonLoopingQuery) { myNonLoopingQuery = false; myValues = getCurrentValues(myFieldPaths); myWhereValues = getCurrentValues(myWherePaths); return currentRowMatchesFilter(); } boolean hasNext = false; findNext : for (int i = myIndices.length - 1; i >= 0; i--) { boolean gotMatch = false; while (!gotMatch && myNumEmpty[i] <= myMaxNumEmpty[i]) { myIndices[i]++; myValues = getCurrentValues(myFieldPaths); myWhereValues = getCurrentValues(myWherePaths); if (!currentRowValued(i)) { myNumEmpty[i]++; } else { myNumEmpty[i] = 0; } if (currentRowMatchesFilter()) { gotMatch = true; } } hasNext = myNumEmpty[i] <= myMaxNumEmpty[i];// && currentRowMatchesFilter(); if (hasNext) { break findNext; } myIndices[i] = 0; myNumEmpty[i] = 0; //TODO: if we aren't allowing empties in this loop, and have no value, we want to //return the null in the super-loop. However, we don't know which loop point, if //any, is the super-loop. If it was the next one we could do this ... //if (i > 0 && myMaxNumEmpty[i] == 0 && myMaxNumEmpty[i-1] > 0 && myIndices[i-1] == 0) { // myIndices[i-1] = -1; //} ... but it may not be, so we'll ignore this problem for now. } return hasNext; } /** * @see ca.uhn.hl7v2.util.MessageQuery.Result#getNamedFields() */ public String[] getNamedFields() { return (String[]) myFieldNames.keySet().toArray(new String[0]); } } }