View Javadoc

1   /* Copyright (c) 2008 Sascha Kohlmann
2    *
3    * This program is free software: you can redistribute it and/or modify
4    * it under the terms of the GNU Affero General Public License as published by
5    * the Free Software Foundation, either version 3 of the License, or
6    * (at your option) any later version.
7    *
8    * This program is distributed in the hope that it will be useful,
9    * but WITHOUT ANY WARRANTY; without even the implied warranty of
10   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11   * GNU Affero General Public License for more details.
12   *
13   * You should have received a copy of the GNU Affero General Public License
14   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
15   */
16  package net.sf.eos.document;
17  
18  import net.sf.eos.io.NewlineReplaceWriter;
19  
20  import org.apache.commons.lang.StringEscapeUtils;
21  import org.apache.commons.logging.Log;
22  import org.apache.commons.logging.LogFactory;
23  import org.xml.sax.Attributes;
24  import org.xml.sax.InputSource;
25  import org.xml.sax.SAXException;
26  import org.xml.sax.ext.DefaultHandler2;
27  
28  import java.io.IOException;
29  import java.io.Reader;
30  import java.io.Writer;
31  import java.util.ArrayList;
32  import java.util.Collections;
33  import java.util.List;
34  import java.util.Map;
35  import java.util.Map.Entry;
36  
37  import javax.xml.parsers.ParserConfigurationException;
38  import javax.xml.parsers.SAXParser;
39  import javax.xml.parsers.SAXParserFactory;
40  
41  /**
42   * Serializer and deserializer for an {@link EosDocument}. See
43   * {@link XmlSerializer.ElementName} for element names. The order of the elements in
44   * the root container are not defined. Also the order in the meta container
45   * is not defined. If the root container contains more than one <em>title</em>
46   * or <em>text</em> element, the latest elements may win. If the meta container
47   * contains more than one <em>key</em>, the latest may win.
48   * @author Sascha Kohlmann
49   */
50  public class XmlSerializer extends Serializer {
51  
52      static final Log LOG = LogFactory.getLog(XmlSerializer.class.getName());
53  
54      /**
55       * Represents the XML element names of a serialized &#949;&#959;s document.
56       * @author Sascha Kohlmann
57       */
58      public enum ElementName {
59          /** Root element of an &#949;&#959;s document. */
60          d,
61          /** Container for a meta data entry. */
62          m,
63          /** key of a meta data entry. There is only one key in a meta data
64           * entry. */
65          k,
66          /**value of a meta data entry. There may be more than one value. */
67          v,
68          /** Title of a document. */
69          ti,
70          /** Text of a document. */
71          te}
72  
73      @SuppressWarnings("nls")
74      private static final String XML_OPEN = "<";
75      @SuppressWarnings("nls")
76      private static final String XML_CLOSE = ">";
77      @SuppressWarnings("nls")
78      private static final String XML_CLOSE_TAG = "</";
79  
80      /*
81       * @see  net.sf.eos.document.Serializer#serialize(net.sf.eos.document.EosDocument, java.io.Writer)
82       */
83      @Override
84      @SuppressWarnings("nls")
85      public void serialize(final EosDocument doc, final Writer out)
86              throws IOException {
87          if (LOG.isDebugEnabled()) {
88              LOG.debug("start serialize EosDocument");
89          }
90          final NewlineReplaceWriter writer = new NewlineReplaceWriter(out);
91          writer.write(XML_OPEN + ElementName.d.name() + XML_CLOSE);
92          final  Map<String, List<String>> meta = doc.getMeta();
93          if (meta != null && meta.size() != 0) {
94              for (final Entry<String, List<String>> entry : meta.entrySet()) {
95                  writer.write(XML_OPEN + ElementName.m.name() + XML_CLOSE);
96  
97                  writer.write(XML_OPEN + ElementName.k.name() + XML_CLOSE);
98                  writer.write(StringEscapeUtils.escapeXml(entry.getKey()));
99                  writer.write(XML_CLOSE_TAG + ElementName.k.name() + XML_CLOSE);
100 
101                 for (final String value : entry.getValue()) {
102                     writer.write(XML_OPEN + ElementName.v.name() + XML_CLOSE);
103                     writer.write(StringEscapeUtils.escapeXml(value));
104                     writer.write(XML_CLOSE_TAG + ElementName.v.name() 
105                                  + XML_CLOSE);
106                 }
107                 writer.write(XML_CLOSE_TAG + ElementName.m.name() + XML_CLOSE);
108             }
109         }
110 
111         final CharSequence title = doc.getTitle();
112         if (title != null) {
113             final String asString = title.toString();
114             final String escaped = StringEscapeUtils.escapeXml(asString);
115             writer.write(XML_OPEN + ElementName.ti.name() + XML_CLOSE);
116             writer.write(escaped);
117             writer.write(XML_CLOSE_TAG + ElementName.ti.name() + XML_CLOSE);
118         }
119 
120         final CharSequence text = doc.getText();
121         if (text != null) {
122             final String asString = text.toString();
123             final String escaped = StringEscapeUtils.escapeXml(asString);
124             writer.write(XML_OPEN + ElementName.te.name() + XML_CLOSE);
125             writer.write(escaped);
126             writer.write(XML_CLOSE_TAG + ElementName.te.name() + XML_CLOSE);
127         }
128 
129         writer.write(XML_CLOSE_TAG + ElementName.d.name() + XML_CLOSE);
130         if (LOG.isDebugEnabled()) {
131             LOG.debug("end serialize EosDocument");
132         }
133     }
134 
135     /*
136      * @see net.sf.eos.document.Serializer#serialize(java.io.Reader)
137      */
138     @Override
139     @SuppressWarnings("nls")
140     public EosDocument deserialize(final Reader in)
141             throws IOException, ParserConfigurationException, SAXException {
142 
143         final XmlEosDocumentHandler handler = new XmlEosDocumentHandler();
144         if (LOG.isDebugEnabled()) {
145             LOG.debug(handler.id + " start loading EosDocument");
146         }
147 
148         final SAXParserFactory factory = SAXParserFactory.newInstance();
149         final SAXParser parser = factory.newSAXParser();
150 
151         final InputSource source = new InputSource(in);
152         parser.parse(source, handler);
153 
154         final EosDocument doc = handler.getDocument();
155         if (LOG.isDebugEnabled()) {
156             LOG.debug(handler.id + " start loading EosDocument - " + doc);
157         }
158 
159         return doc;
160     }
161 
162     /**
163      * Deserializes a {@link EosDocument} which is serialized by
164      * {@link XmlSerializer#serialize(EosDocument, Writer)}.
165      * @author Sascha Kohlmann
166      */
167     protected final static class XmlEosDocumentHandler extends DefaultHandler2 {
168 
169         private StringBuilder sb = null;
170 
171         private EosDocument doc = new EosDocument();
172 
173         private String key = null;
174         private List<String> value = null;
175 
176         private boolean inMeta = false;
177         private boolean inKey = false;
178         private boolean inValue = false;
179 
180         final int id = System.identityHashCode(this);
181 
182         /*
183          * @see org.xml.sax.helpers.DefaultHandler#startDocument()
184          */
185         @Override
186         @SuppressWarnings("nls")
187         public void startDocument() {
188             if (LOG.isDebugEnabled()) {
189                 LOG.debug(this.id + " start parsing EosDocument");
190             }
191         }
192 
193         /*
194          * @see org.xml.sax.helpers.DefaultHandler#endDocument()
195          */
196         @Override
197         @SuppressWarnings("nls")
198         public void endDocument() {
199             if (LOG.isDebugEnabled()) {
200                 LOG.debug(this.id + " end parsing EosDocument");
201             }
202         }
203 
204         /*
205          * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
206          */
207         @Override
208         public void startElement(final String uri,
209                                  final String localName,
210                                  final String qName,
211                                  final Attributes attributes) {
212             if (ElementName.te.name().equals(qName)) {
213                 this.sb = new StringBuilder();
214             } else if (ElementName.ti.name().equals(qName)) {
215                 this.sb = new StringBuilder();
216             } else if (ElementName.m.name().equals(qName)) {
217                 this.inMeta = true;
218                 this.key = null;
219                 this.value = new ArrayList<String>();
220             } else if (ElementName.k.name().equals(qName)) {
221                 this.inKey = true;
222                 this.sb = new StringBuilder();
223             } else if (ElementName.v.name().equals(qName)) {
224                 this.inValue = true;
225                 this.sb = new StringBuilder();
226             }
227         }
228 
229         /*
230          * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
231          */
232         @Override
233         @SuppressWarnings("nls")
234         public void endElement(final String uri,
235                                final String localName,
236                                final String qName) {
237 
238             if (ElementName.te.name().equals(qName)) {
239                 final String text = this.sb.toString();
240                 this.doc.setText(text);
241 
242                 if (LOG.isTraceEnabled()) {
243                     final StringBuilder lbuf =
244                         new StringBuilder(this.id + " text: ");
245                     lbuf.append(text);
246                     LOG.trace(lbuf.toString());
247                 }
248 
249             } else if (ElementName.ti.name().equals(qName)) {
250                 final String title = sb.toString();
251                 this.doc.setTitle(title);
252 
253                 if (LOG.isTraceEnabled()) {
254                     final StringBuilder lbuf =
255                         new StringBuilder(this.id + " title: ");
256                     lbuf.append(title);
257                     LOG.trace(lbuf.toString());
258                 }
259 
260             } else if (ElementName.m.name().equals(qName)) {
261                 assert this.inMeta == true;
262                 final Map<String, List<String>> meta = this.doc.getMeta();
263                 assert this.key != null;
264                 meta.put(this.key, this.value);
265 
266                 if (LOG.isTraceEnabled()) {
267                     final StringBuilder lbuf =
268                         new StringBuilder(this.id + " key: ");
269                     lbuf.append(this.key);
270                     lbuf.append(" values: ");
271                     for (final String v : this.value) {
272                         lbuf.append("value=");
273                         lbuf.append(v);
274                     }
275                     LOG.trace(lbuf.toString());
276                 }
277 
278                 this.inMeta = false;
279             } else if (ElementName.k.name().equals(qName)) {
280                 assert this.inKey == true;
281                 this.key = this.sb.toString();
282             } else if (ElementName.v.name().equals(qName)) {
283                 assert this.inValue == true;
284                 assert this.value != null;
285                 final String v = this.sb.toString();
286                 this.value.add(v);
287             }
288         }
289 
290         /*
291          * @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int)
292          */
293         @Override
294         public void characters(final char[] ch,
295                                final int start,
296                                final int length) {
297             if (this.sb != null) {
298                 this.sb.append(ch, start, length);
299             }
300         }
301 
302         /**
303          * Returns a document, constructed from the value of the parser.
304          * @return a document if created. may be {@code null} or an empty
305          *         document.
306          */
307         
308         @SuppressWarnings("unchecked")
309         public EosDocument getDocument() {
310             assert this.doc != null;
311             if (this.doc.getMeta() == null) {
312                 this.doc.setMeta(Collections.EMPTY_MAP);
313             }
314             return this.doc;
315         }
316     }
317 }