/* * Hl7InputStreamReader.java */ package ca.uhn.hl7v2.util; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.PushbackReader; import java.io.Reader; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.log4j.Logger; import ca.uhn.hl7v2.parser.Parser; import ca.uhn.hl7v2.parser.PipeParser; /** * Reads HL7 messages from an InputStream * * @version $Revision: 1.1 $ updated on $Date: 2007/02/19 02:24:27 $ by $Author: jamesagnew $ */ public class Hl7InputStreamReader { private static final Logger ourLog = Logger.getLogger(Hl7InputStreamReader.class); private InputStream is = null; /** * Reads HL7 messages from an InputStream and outputs an array of HL7 message strings * * @version $Revision: 1.1 $ updated on $Date: 2007/02/19 02:24:27 $ by $Author: jamesagnew $ */ public static String[] read( InputStream theMsgInputStream ) throws FileNotFoundException, IOException { Parser hapiParser = new PipeParser(); BufferedReader in = new BufferedReader( new CommentFilterReader( new InputStreamReader( theMsgInputStream ) ) ); StringBuffer rawMsgBuffer = new StringBuffer(); int c = 0; while( (c = in.read()) >= 0) { rawMsgBuffer.append( (char) c); } String[] messages = getHL7Messages(rawMsgBuffer.toString()); ourLog.info(messages.length + " messages sent."); return messages; } /** * Given a string that contains HL7 messages, and possibly other junk, * returns an array of the HL7 messages. * An attempt is made to recognize segments even if there is other * content between segments, for example if a log file logs segments * individually with timestamps between them. * * @param theSource a string containing HL7 messages * @return the HL7 messages contained in theSource */ private static String[] getHL7Messages(String theSource) { ArrayList messages = new ArrayList(20); Pattern startPattern = Pattern.compile("^MSH", Pattern.MULTILINE); Matcher startMatcher = startPattern.matcher(theSource); while (startMatcher.find()) { String messageExtent = getMessageExtent(theSource.substring(startMatcher.start()), startPattern); char fieldDelim = messageExtent.charAt(3); Pattern segmentPattern = Pattern.compile("^[A-Z]{3}\\" + fieldDelim + ".*$", Pattern.MULTILINE); Matcher segmentMatcher = segmentPattern.matcher(messageExtent); StringBuffer msg = new StringBuffer(); while (segmentMatcher.find()) { msg.append(segmentMatcher.group().trim()); msg.append('\r'); } messages.add(msg.toString()); } return (String[]) messages.toArray(new String[0]); } /** * Given a string that contains at least one HL7 message, returns the * smallest string that contains the first of these messages. */ private static String getMessageExtent(String theSource, Pattern theStartPattern) { Matcher startMatcher = theStartPattern.matcher(theSource); if (!startMatcher.find()) { throw new IllegalArgumentException(theSource + "does not contain message start pattern" + theStartPattern.toString()); } int start = startMatcher.start(); int end = theSource.length(); if (startMatcher.find()) { end = startMatcher.start(); } return theSource.substring(start, end).trim(); } /** * TODO: this code is copied from HAPI ... should make it part of HAPI public API instead * Removes C and C++ style comments from a reader stream. C style comments are * distinguished from URL protocol delimiters by the preceding colon in the * latter. */ private static class CommentFilterReader extends PushbackReader { private final char[] startCPPComment = {'/', '*'}; private final char[] endCPPComment = {'*', '/'}; private final char[] startCComment = {'/', '/'}; private final char[] endCComment = {'\n'}; private final char[] protocolDelim = {':', '/', '/'}; public CommentFilterReader(Reader in) { super(in, 5); } /** * Returns the next character, not including comments. */ public int read() throws IOException { if (atSequence(protocolDelim)) { //proceed normally } else if (atSequence(startCPPComment)) { //skip() doesn't seem to work for some reason while (!atSequence(endCPPComment)) super.read(); for (int i = 0; i < endCPPComment.length; i++) super.read(); } else if (atSequence(startCComment)) { while (!atSequence(endCComment)) super.read(); for (int i = 0; i < endCComment.length; i++) super.read(); } int ret = super.read(); if (ret == 65535) ret = -1; return ret; } public int read(char[] cbuf, int off, int len) throws IOException { int i = -1; boolean done = false; while (++i < len) { int next = read(); if (next == 65535 || next == -1) { //Pushback causes -1 to convert to 65535 done = true; break; } cbuf[off + i] = (char) next; } if (i == 0 && done) i = -1; return i; } /** * Tests incoming data for match with char sequence, resets reader when done. */ private boolean atSequence(char[] sequence) throws IOException { boolean result = true; int i = -1; int[] data = new int[sequence.length]; while (++i < sequence.length && result == true) { data[i] = super.read(); if ((char) data[i] != sequence[i]) result = false; //includes case where end of stream reached } for (int j = i-1; j >= 0; j--) { this.unread(data[j]); } return result; } } }