/**
* The contents of this file are subject to the Mozilla Public License Version 1.1
* (the "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
* specific language governing rights and limitations under the License.
*
* The Original Code is "XMLParser.java". Description:
* "Parses and encodes HL7 messages in XML form, according to HL7's normative XML encoding
* specification."
*
* The Initial Developer of the Original Code is University Health Network. Copyright (C)
* 2002. All Rights Reserved.
*
* Contributor(s): ______________________________________.
*
* Alternatively, the contents of this file may be used under the terms of the
* GNU General Public License (the �GPL�), in which case the provisions of the GPL are
* applicable instead of those above. If you wish to allow use of your version of this
* file only under the terms of the GPL and not to allow others to use your version
* of this file under the MPL, indicate your decision by deleting the provisions above
* and replace them with the notice and other provisions required by the GPL License.
* If you do not delete the provisions above, a recipient may use your version of
* this file under either the MPL or the GPL.
*/
package ca.uhn.hl7v2.parser;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.HashSet;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.xerces.parsers.DOMParser;
import org.apache.xerces.parsers.StandardParserConfiguration;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.XMLSerializer;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import ca.uhn.hl7v2.HL7Exception;
import ca.uhn.hl7v2.model.Composite;
import ca.uhn.hl7v2.model.DataTypeException;
import ca.uhn.hl7v2.model.GenericComposite;
import ca.uhn.hl7v2.model.GenericMessage;
import ca.uhn.hl7v2.model.GenericPrimitive;
import ca.uhn.hl7v2.model.Message;
import ca.uhn.hl7v2.model.Primitive;
import ca.uhn.hl7v2.model.Segment;
import ca.uhn.hl7v2.model.Structure;
import ca.uhn.hl7v2.model.Type;
import ca.uhn.hl7v2.model.Varies;
import ca.uhn.hl7v2.util.Terser;
import ca.uhn.log.HapiLog;
import ca.uhn.log.HapiLogFactory;
/**
* Parses and encodes HL7 messages in XML form, according to HL7's normative XML encoding
* specification. This is an abstract class that handles datatype and segment parsing/encoding,
* but not the parsing/encoding of entire messages. To use the XML parser, you should create a
* subclass for a certain message structure. This subclass must be able to identify the Segment
* objects that correspond to various Segment nodes in an XML document, and call the methods Creates and populates a Message object from an XML Document that contains an XML-encoded HL7 message. The easiest way to implement this method for a particular message structure is as follows:
*
* parse(Segment segment, ElementNode segmentNode)
and encode(Segment segment, ElementNode segmentNode)
*
as appropriate. XMLParser uses the Xerces parser, which must be installed in your classpath.
* @author Bryan Tripp, Shawn Bellina
*/
public abstract class XMLParser extends Parser {
private static final HapiLog log = HapiLogFactory.getHapiLog(XMLParser.class);
private DOMParser parser;
private String textEncoding;
/**
* The nodes whose names match these strings will be kept as original,
* meaning that no white space treaming will occur on them
*/
private String[] keepAsOriginalNodes;
/**
* All keepAsOriginalNodes names, concatenated by a pipe (|)
*/
private String concatKeepAsOriginalNodes = "";
/** Constructor */
public XMLParser() {
this(null);
}
/**
* Constructor
*
* @param theFactory custom factory to use for model class lookup
*/
public XMLParser(ModelClassFactory theFactory) {
parser = new DOMParser(new StandardParserConfiguration());
try {
parser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
}
catch (Exception e) {
log.error("Can't exclude whitespace from XML DOM", e);
}
}
/**
* Returns a String representing the encoding of the given message, if
* the encoding is recognized. For example if the given message appears
* to be encoded using HL7 2.x XML rules then "XML" would be returned.
* If the encoding is not recognized then null is returned. That this
* method returns a specific encoding does not guarantee that the
* message is correctly encoded (e.g. well formed XML) - just that
* it is not encoded using any other encoding than the one returned.
* Returns null if the encoding is not recognized.
*/
public String getEncoding(String message) {
String encoding = null;
//check for a number of expected strings
String[] expected = { "
* At the end of this process, your Message object should be populated with data from the XML
* Document.parse(Segment segmentObject, Element segmentElement)
,
* providing the appropriate Segment from your Message object, and the corresponding Element.
Parses a message string and returns the corresponding Message
* object. This method checks that the given message string is XML encoded, creates an
* XML Document object (using Xerces) from the given String, and calls the abstract
* method parse(Document XMLMessage)
encodeDocument(...)
in order to obtain XML Document object
* representation of the Message, then serializes it to a String.
* @throws HL7Exception if the data fields in the message do not permit encoding
* (e.g. required fields are null)
*/
protected String doEncode(Message source) throws HL7Exception {
if (source instanceof GenericMessage) {
throw new HL7Exception("Can't XML-encode a GenericMessage. Message must have a recognized structure.");
}
Document doc = encodeDocument(source);
Element documentElement = doc.getDocumentElement();
documentElement.setAttribute("xmlns", "urn:hl7-org:v2xml");
StringWriter out = new StringWriter();
OutputFormat outputFormat = new OutputFormat("", null, true);
outputFormat.setLineWidth(0);
if (textEncoding != null) {
outputFormat.setEncoding(textEncoding);
}
XMLSerializer ser = new XMLSerializer(out, outputFormat); //default output format
try {
ser.serialize(doc);
}
catch (IOException e) {
throw new HL7Exception(
"IOException serializing XML document to string",
HL7Exception.APPLICATION_INTERNAL_ERROR,
e);
}
return out.toString();
}
/**
* Creates an XML Document that corresponds to the given Message object.
*If you are implementing this method, you should create an XML Document, and insert XML Elements
* into it that correspond to the groups and segments that belong to the message type that your subclass
* of XMLParser supports. Then, for each segment in the message, call the method
* encode(Segment segmentObject, Element segmentElement)
using the Element for
* that segment and the corresponding Segment object from the given Message.
Node
content should be kept as original (ie.: whitespaces won't be removed)
*
* @param node The target Node
* @return boolean true
if whitespaces should not be removed from node content,
* false
otherwise
*/
protected boolean keepAsOriginal(Node node) {
if (node.getNodeName() == null)
return false;
return concatKeepAsOriginalNodes.indexOf(node.getNodeName()) != -1;
}
/**
* Removes all unecessary whitespace from the given String (intended to be used with Primitive values).
* This includes leading and trailing whitespace, and repeated space characters. Carriage returns,
* line feeds, and tabs are replaced with spaces.
*/
protected String removeWhitespace(String s) {
s = s.replace('\r', ' ');
s = s.replace('\n', ' ');
s = s.replace('\t', ' ');
boolean repeatedSpacesExist = true;
while (repeatedSpacesExist) {
int loc = s.indexOf(" ");
if (loc < 0) {
repeatedSpacesExist = false;
}
else {
StringBuffer buf = new StringBuffer();
buf.append(s.substring(0, loc));
buf.append(" ");
buf.append(s.substring(loc + 2));
s = buf.toString();
}
}
return s.trim();
}
/**
* Populates a Composite type by looping through it's children, finding corresponding
* Elements among the children of the given Element, and calling parse(Type, Element) for
* each.
*/
private void parseComposite(Composite datatypeObject, Element datatypeElement) throws DataTypeException {
if (datatypeObject instanceof GenericComposite) { //elements won't be named GenericComposite.x
NodeList children = datatypeElement.getChildNodes();
int compNum = 0;
for (int i = 0; i < children.getLength(); i++) {
if (children.item(i).getNodeType() == Node.ELEMENT_NODE) {
Element nextElement = (Element) children.item(i);
String localName = nextElement.getLocalName();
int dotIndex = localName.indexOf(".");
if (dotIndex > -1) {
compNum = Integer.parseInt(localName.substring(dotIndex + 1)) - 1;
} else {
log.debug("Datatype element " + datatypeElement.getLocalName()
+ " doesn't have a valid numbered name, usgin default index of " + compNum);
}
Type nextComponent = datatypeObject.getComponent(compNum);
parse(nextComponent, nextElement);
compNum++;
}
}
}
else {
Type[] children = datatypeObject.getComponents();
for (int i = 0; i < children.length; i++) {
NodeList matchingElements =
datatypeElement.getElementsByTagName(makeElementName(datatypeObject, i + 1));
if (matchingElements.getLength() > 0) {
parse(children[i], (Element) matchingElements.item(0)); //components don't repeat - use 1st
}
}
}
}
/**
* Returns the expected XML element name for the given child of a message constituent
* of the given class (the class should be a Composite or Segment class).
*/
/*private String makeElementName(Class c, int child) {
String longClassName = c.getName();
String shortClassName = longClassName.substring(longClassName.lastIndexOf('.') + 1, longClassName.length());
if (shortClassName.startsWith("Valid")) {
shortClassName = shortClassName.substring(5, shortClassName.length());
}
return shortClassName + "." + child;
}*/
/** Returns the expected XML element name for the given child of the given Segment */
private String makeElementName(Segment s, int child) {
return s.getName() + "." + child;
}
/** Returns the expected XML element name for the given child of the given Composite */
private String makeElementName(Composite composite, int child) {
return composite.getName() + "." + child;
}
/**
* Populates the given Element with data from the given Type, by inserting
* Elements corresponding to the Type's components and values. Returns true if
* the given type contains a value (i.e. for Primitives, if getValue() doesn't
* return null, and for Composites, if at least one underlying Primitive doesn't
* return null).
*/
private boolean encode(Type datatypeObject, Element datatypeElement) throws DataTypeException {
boolean hasData = false;
if (datatypeObject instanceof Varies) {
hasData = encodeVaries((Varies) datatypeObject, datatypeElement);
}
else if (datatypeObject instanceof Primitive) {
hasData = encodePrimitive((Primitive) datatypeObject, datatypeElement);
}
else if (datatypeObject instanceof Composite) {
hasData = encodeComposite((Composite) datatypeObject, datatypeElement);
}
return hasData;
}
/**
* Encodes a Varies type by extracting it's data field and encoding that. Returns true
* if the data field (or one of its components) contains a value.
*/
private boolean encodeVaries(Varies datatypeObject, Element datatypeElement) throws DataTypeException {
boolean hasData = false;
if (datatypeObject.getData() != null) {
hasData = encode(datatypeObject.getData(), datatypeElement);
}
return hasData;
}
/**
* Encodes a Primitive in XML by adding it's value as a child of the given Element.
* Returns true if the given Primitive contains a value.
*/
private boolean encodePrimitive(Primitive datatypeObject, Element datatypeElement) throws DataTypeException {
boolean hasValue = false;
if (datatypeObject.getValue() != null && !datatypeObject.getValue().equals(""))
hasValue = true;
Text t = datatypeElement.getOwnerDocument().createTextNode(datatypeObject.getValue());
if (hasValue) {
try {
datatypeElement.appendChild(t);
}
catch (DOMException e) {
throw new DataTypeException("DOMException encoding Primitive: ", e);
}
}
return hasValue;
}
/**
* Encodes a Composite in XML by looping through it's components, creating new
* children for each of them (with the appropriate names) and populating them by
* calling encode(Type, Element) using these children. Returns true if at least
* one component contains a value.
*/
private boolean encodeComposite(Composite datatypeObject, Element datatypeElement) throws DataTypeException {
Type[] components = datatypeObject.getComponents();
boolean hasValue = false;
for (int i = 0; i < components.length; i++) {
String name = makeElementName(datatypeObject, i + 1);
Element newNode = datatypeElement.getOwnerDocument().createElement(name);
boolean componentHasValue = encode(components[i], newNode);
if (componentHasValue) {
try {
datatypeElement.appendChild(newNode);
}
catch (DOMException e) {
throw new DataTypeException("DOMException encoding Composite: ", e);
}
hasValue = true;
}
}
return hasValue;
}
/**
* Returns a minimal amount of data from a message string, including only the * data needed to send a response to the remote system. This includes the * following fields: *