source: src/main/java/weka/classifiers/evaluation/output/prediction/XML.java @ 4

Last change on this file since 4 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 15.1 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * XML.java
19 * Copyright (C) 2009 University of Waikato, Hamilton, New Zealand
20 */
21
22package weka.classifiers.evaluation.output.prediction;
23
24import weka.classifiers.Classifier;
25import weka.core.Attribute;
26import weka.core.Instance;
27import weka.core.Utils;
28import weka.core.Version;
29import weka.core.xml.XMLDocument;
30
31/**
32 <!-- globalinfo-start -->
33 * Outputs the predictions in XML.<br/>
34 * <br/>
35 * The following DTD is used:<br/>
36 * <br/>
37 * &lt;!DOCTYPE predictions<br/>
38 * [<br/>
39 *   &lt;!ELEMENT predictions (prediction*)&gt;<br/>
40 *   &lt;!ATTLIST predictions version CDATA "3.5.8"&gt;<br/>
41 *   &lt;!ATTLIST predictions name CDATA #REQUIRED&gt;<br/>
42 * <br/>
43 *   &lt;!ELEMENT prediction ((actual_label,predicted_label,error,(prediction|distribution),attributes?)|(actual_value,predicted_value,error,attributes?))&gt;<br/>
44 *   &lt;!ATTLIST prediction index CDATA #REQUIRED&gt;<br/>
45 * <br/>
46 *   &lt;!ELEMENT actual_label ANY&gt;<br/>
47 *   &lt;!ATTLIST actual_label index CDATA #REQUIRED&gt;<br/>
48 *   &lt;!ELEMENT predicted_label ANY&gt;<br/>
49 *   &lt;!ATTLIST predicted_label index CDATA #REQUIRED&gt;<br/>
50 *   &lt;!ELEMENT error ANY&gt;<br/>
51 *   &lt;!ELEMENT prediction ANY&gt;<br/>
52 *   &lt;!ELEMENT distribution (class_label+)&gt;<br/>
53 *   &lt;!ELEMENT class_label ANY&gt;<br/>
54 *   &lt;!ATTLIST class_label index CDATA #REQUIRED&gt;<br/>
55 *   &lt;!ATTLIST class_label predicted (yes|no) "no"&gt;<br/>
56 *   &lt;!ELEMENT actual_value ANY&gt;<br/>
57 *   &lt;!ELEMENT predicted_value ANY&gt;<br/>
58 *   &lt;!ELEMENT attributes (attribute+)&gt;<br/>
59 *   &lt;!ELEMENT attribute ANY&gt;<br/>
60 *   &lt;!ATTLIST attribute index CDATA #REQUIRED&gt;<br/>
61 *   &lt;!ATTLIST attribute name CDATA #REQUIRED&gt;<br/>
62 *   &lt;!ATTLIST attribute type (numeric|date|nominal|string|relational) #REQUIRED&gt;<br/>
63 * ]<br/>
64 * &gt;
65 * <p/>
66 <!-- globalinfo-end -->
67 *
68 <!-- options-start -->
69 * Valid options are: <p/>
70 *
71 * <pre> -p &lt;range&gt;
72 *  The range of attributes to print in addition to the classification.
73 *  (default: none)</pre>
74 *
75 * <pre> -distribution
76 *  Whether to turn on the output of the class distribution.
77 *  Only for nominal class attributes.
78 *  (default: off)</pre>
79 *
80 * <pre> -decimals &lt;num&gt;
81 *  The number of digits after the decimal point.
82 *  (default: 3)</pre>
83 *
84 * <pre> -file &lt;path&gt;
85 *  The file to store the output in, instead of outputting it on stdout.
86 *  Gets ignored if the supplied path is a directory.
87 *  (default: .)</pre>
88 *
89 * <pre> -suppress
90 *  In case the data gets stored in a file, then this flag can be used
91 *  to suppress the regular output.
92 *  (default: not suppressed)</pre>
93 *
94 <!-- options-end -->
95 *
96 * @author  fracpete (fracpete at waikato dot ac dot nz)
97 * @version $Revision: 5987 $
98 */
99public class XML
100  extends AbstractOutput {
101 
102  /** for serialization. */
103  private static final long serialVersionUID = -3165514277316824801L;
104
105  /** the DocType definition. */
106  public final static String DTD_DOCTYPE = XMLDocument.DTD_DOCTYPE;
107 
108  /** the Element definition. */
109  public final static String DTD_ELEMENT = XMLDocument.DTD_ELEMENT;
110 
111  /** the AttList definition. */
112  public final static String DTD_ATTLIST = XMLDocument.DTD_ATTLIST;
113 
114  /** the optional marker. */
115  public final static String DTD_OPTIONAL = XMLDocument.DTD_OPTIONAL;
116 
117  /** the at least one marker. */
118  public final static String DTD_AT_LEAST_ONE = XMLDocument.DTD_AT_LEAST_ONE;
119 
120  /** the zero or more marker. */
121  public final static String DTD_ZERO_OR_MORE = XMLDocument.DTD_ZERO_OR_MORE;
122 
123  /** the option separator. */
124  public final static String DTD_SEPARATOR = XMLDocument.DTD_SEPARATOR;
125 
126  /** the CDATA placeholder. */
127  public final static String DTD_CDATA = XMLDocument.DTD_CDATA; 
128 
129  /** the ANY placeholder. */
130  public final static String DTD_ANY = XMLDocument.DTD_ANY; 
131 
132  /** the #PCDATA placeholder. */
133  public final static String DTD_PCDATA = XMLDocument.DTD_PCDATA; 
134 
135  /** the #IMPLIED placeholder. */
136  public final static String DTD_IMPLIED = XMLDocument.DTD_IMPLIED; 
137 
138  /** the #REQUIRED placeholder. */
139  public final static String DTD_REQUIRED = XMLDocument.DTD_REQUIRED; 
140
141  /** the "version" attribute. */
142  public final static String ATT_VERSION = XMLDocument.ATT_VERSION;
143 
144  /** the "name" attribute. */
145  public final static String ATT_NAME = XMLDocument.ATT_NAME;
146 
147  /** the "type" attribute. */
148  public final static String ATT_TYPE = "type";
149
150  /** the value "yes". */
151  public final static String VAL_YES = XMLDocument.VAL_YES;
152 
153  /** the value "no". */
154  public final static String VAL_NO = XMLDocument.VAL_NO;
155 
156  /** the predictions tag. */
157  public final static String TAG_PREDICTIONS = "predictions";
158 
159  /** the prediction tag. */
160  public final static String TAG_PREDICTION = "prediction";
161
162  /** the actual_nominal tag. */
163  public final static String TAG_ACTUAL_LABEL = "actual_label";
164
165  /** the predicted_nominal tag. */
166  public final static String TAG_PREDICTED_LABEL = "predicted_label";
167
168  /** the error tag. */
169  public final static String TAG_ERROR = "error";
170
171  /** the distribution tag. */
172  public final static String TAG_DISTRIBUTION = "distribution";
173
174  /** the class_label tag. */
175  public final static String TAG_CLASS_LABEL = "class_label";
176
177  /** the actual_numeric tag. */
178  public final static String TAG_ACTUAL_VALUE = "actual_value";
179
180  /** the predicted_numeric tag. */
181  public final static String TAG_PREDICTED_VALUE = "predicted_value";
182
183  /** the attributes tag. */
184  public final static String TAG_ATTRIBUTES = "attributes";
185
186  /** the attribute tag. */
187  public final static String TAG_ATTRIBUTE = "attribute";
188
189  /** the index attribute. */
190  public final static String ATT_INDEX = "index";
191
192  /** the predicted attribute. */
193  public final static String ATT_PREDICTED = "predicted";
194 
195  /** the DTD. */
196  public final static String DTD = 
197    "<!" + DTD_DOCTYPE + " " + TAG_PREDICTIONS + "\n"
198    + "[\n"
199    + "  <!" + DTD_ELEMENT + " " + TAG_PREDICTIONS + " (" + TAG_PREDICTION + DTD_ZERO_OR_MORE + ")" + ">\n"
200    + "  <!" + DTD_ATTLIST + " " + TAG_PREDICTIONS + " " + ATT_VERSION + " " + DTD_CDATA + " \"" + Version.VERSION + "\"" + ">\n"
201    + "  <!" + DTD_ATTLIST + " " + TAG_PREDICTIONS + " " + ATT_NAME + " " + DTD_CDATA + " " + DTD_REQUIRED + ">\n"
202    + "\n"
203    + "  <!" + DTD_ELEMENT + " " + TAG_PREDICTION + " " 
204             + "(" 
205             + "(" + TAG_ACTUAL_LABEL + "," + TAG_PREDICTED_LABEL + "," + TAG_ERROR + "," + "(" + TAG_PREDICTION + DTD_SEPARATOR + TAG_DISTRIBUTION + ")" + "," + TAG_ATTRIBUTES + DTD_OPTIONAL + ")" 
206             + DTD_SEPARATOR
207             + "(" + TAG_ACTUAL_VALUE + "," + TAG_PREDICTED_VALUE + "," + TAG_ERROR + "," + TAG_ATTRIBUTES + DTD_OPTIONAL + ")"
208             + ")" + ">\n"
209    + "  <!" + DTD_ATTLIST + " " + TAG_PREDICTION + " " + ATT_INDEX + " " + DTD_CDATA + " " + DTD_REQUIRED + ">\n"
210    + "\n"
211    + "  <!" + DTD_ELEMENT + " " + TAG_ACTUAL_LABEL + " " + DTD_ANY + ">\n"
212    + "  <!" + DTD_ATTLIST + " " + TAG_ACTUAL_LABEL + " " + ATT_INDEX + " " + DTD_CDATA + " " + DTD_REQUIRED + ">\n"
213    + "  <!" + DTD_ELEMENT + " " + TAG_PREDICTED_LABEL + " " + DTD_ANY + ">\n"
214    + "  <!" + DTD_ATTLIST + " " + TAG_PREDICTED_LABEL + " " + ATT_INDEX + " " + DTD_CDATA + " " + DTD_REQUIRED + ">\n"
215    + "  <!" + DTD_ELEMENT + " " + TAG_ERROR + " " + DTD_ANY + ">\n"
216    + "  <!" + DTD_ELEMENT + " " + TAG_PREDICTION + " " + DTD_ANY + ">\n"
217    + "  <!" + DTD_ELEMENT + " " + TAG_DISTRIBUTION + " (" + TAG_CLASS_LABEL + DTD_AT_LEAST_ONE + ")" + ">\n"
218    + "  <!" + DTD_ELEMENT + " " + TAG_CLASS_LABEL + " " + DTD_ANY + ">\n"
219    + "  <!" + DTD_ATTLIST + " " + TAG_CLASS_LABEL + " " + ATT_INDEX + " " + DTD_CDATA + " " + DTD_REQUIRED + ">\n"
220    + "  <!" + DTD_ATTLIST + " " + TAG_CLASS_LABEL + " " + ATT_PREDICTED + " (" + VAL_YES + DTD_SEPARATOR + VAL_NO + ") " + "\"" + VAL_NO + "\"" + ">\n"
221    + "  <!" + DTD_ELEMENT + " " + TAG_ACTUAL_VALUE + " " + DTD_ANY + ">\n"
222    + "  <!" + DTD_ELEMENT + " " + TAG_PREDICTED_VALUE + " " + DTD_ANY + ">\n"
223    + "  <!" + DTD_ELEMENT + " " + TAG_ATTRIBUTES + " (" + TAG_ATTRIBUTE + DTD_AT_LEAST_ONE + ")" + ">\n"
224    + "  <!" + DTD_ELEMENT + " " + TAG_ATTRIBUTE + " " + DTD_ANY + ">\n"
225    + "  <!" + DTD_ATTLIST + " " + TAG_ATTRIBUTE + " " + ATT_INDEX + " " + DTD_CDATA + " " + DTD_REQUIRED + ">\n"
226    + "  <!" + DTD_ATTLIST + " " + TAG_ATTRIBUTE + " " + ATT_NAME + " " + DTD_CDATA + " " + DTD_REQUIRED + ">\n"
227    + "  <!" + DTD_ATTLIST + " " + TAG_ATTRIBUTE + " " + ATT_TYPE + " " + "(" + Attribute.typeToString(Attribute.NUMERIC) + DTD_SEPARATOR + Attribute.typeToString(Attribute.DATE) + DTD_SEPARATOR + Attribute.typeToString(Attribute.NOMINAL) + DTD_SEPARATOR + Attribute.typeToString(Attribute.STRING) + DTD_SEPARATOR + Attribute.typeToString(Attribute.RELATIONAL) + ")" + " " + DTD_REQUIRED + ">\n"
228    + "]\n"
229    + ">";
230 
231  /**
232   * Returns a string describing the output generator.
233   *
234   * @return            a description suitable for
235   *                    displaying in the GUI
236   */
237  public String globalInfo() {
238    return 
239        "Outputs the predictions in XML.\n\n"
240      + "The following DTD is used:\n\n"
241      + DTD;
242  }
243 
244  /**
245   * Returns a short display text, to be used in comboboxes.
246   *
247   * @return            a short display text
248   */
249  public String getDisplay() {
250    return "XML";
251  }
252
253  /**
254   * Replaces certain characters with their XML entities.
255   *
256   * @param s           the string to process
257   * @return            the processed string
258   */
259  protected String sanitize(String s) {
260    String      result;
261   
262    result = s;
263    result = result.replaceAll("&", "&amp;");
264    result = result.replaceAll("<", "&lt;");
265    result = result.replaceAll(">", "&gt;");
266    result = result.replaceAll("\"", "&quot;");
267   
268    return result;
269  }
270 
271  /**
272   * Performs the actual printing of the header.
273   */
274  protected void doPrintHeader() {
275    append("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
276    append("\n");
277    append(DTD + "\n\n");
278    append("<" + TAG_PREDICTIONS + " " + ATT_VERSION + "=\"" + Version.VERSION + "\"" + " " + ATT_NAME + "=\"" + sanitize(m_Header.relationName()) + "\">\n");
279  }
280
281  /**
282   * Builds a string listing the attribute values in a specified range of indices,
283   * separated by commas and enclosed in brackets.
284   *
285   * @param instance    the instance to print the values from
286   * @return            a string listing values of the attributes in the range
287   */
288  protected String attributeValuesString(Instance instance) {
289    StringBuffer text = new StringBuffer();
290    if (m_Attributes != null) {
291      text.append("    <" + TAG_ATTRIBUTES + ">\n");
292      m_Attributes.setUpper(instance.numAttributes() - 1);
293      for (int i=0; i<instance.numAttributes(); i++) {
294        if (m_Attributes.isInRange(i) && i != instance.classIndex()) {
295          text.append("      <" + TAG_ATTRIBUTE + " " + ATT_INDEX + "=\"" + (i+1) + "\"" + " " + ATT_NAME + "=\"" + sanitize(instance.attribute(i).name()) + "\"" + " " + ATT_TYPE + "=\"" + Attribute.typeToString(instance.attribute(i).type()) + "\"" + ">");
296          text.append(sanitize(instance.toString(i)));
297          text.append("</" + TAG_ATTRIBUTE + ">\n");
298        }
299      }
300      text.append("    </" + TAG_ATTRIBUTES + ">\n");
301    }
302    return text.toString();
303  }
304
305  /**
306   * Store the prediction made by the classifier as a string.
307   *
308   * @param classifier  the classifier to use
309   * @param inst        the instance to generate text from
310   * @param index       the index in the dataset
311   * @throws Exception  if something goes wrong
312   */
313  protected void doPrintClassification(Classifier classifier, Instance inst, int index) throws Exception {
314    int prec = m_NumDecimals;
315
316    Instance withMissing = (Instance)inst.copy();
317    withMissing.setDataset(inst.dataset());
318    withMissing.setMissing(withMissing.classIndex());
319    double predValue = classifier.classifyInstance(withMissing);
320
321    // opening tag
322    append("  <" + TAG_PREDICTION + " " + ATT_INDEX + "=\"" + (index+1) + "\">\n");
323
324    if (inst.dataset().classAttribute().isNumeric()) {
325      // actual
326      append("    <" + TAG_ACTUAL_VALUE + ">");
327      if (inst.classIsMissing())
328        append("?");
329      else
330        append(Utils.doubleToString(inst.classValue(), prec));
331      append("</" + TAG_ACTUAL_VALUE + ">\n");
332      // predicted
333      append("    <" + TAG_PREDICTED_VALUE + ">");
334      if (inst.classIsMissing())
335        append("?");
336      else
337        append(Utils.doubleToString(predValue, prec));
338      append("</" + TAG_PREDICTED_VALUE + ">\n");
339      // error
340      append("    <" + TAG_ERROR + ">");
341      if (Utils.isMissingValue(predValue) || inst.classIsMissing())
342        append("?");
343      else
344        append(Utils.doubleToString(predValue - inst.classValue(), prec));
345      append("</" + TAG_ERROR + ">\n");
346    } else {
347      // actual
348      append("    <" + TAG_ACTUAL_LABEL + " " + ATT_INDEX + "=\"" + ((int) inst.classValue()+1) + "\"" + ">");
349      append(sanitize(inst.toString(inst.classIndex())));
350      append("</" + TAG_ACTUAL_LABEL + ">\n");
351      // predicted
352      append("    <" + TAG_PREDICTED_LABEL + " " + ATT_INDEX + "=\"" + ((int) predValue+1) + "\"" + ">");
353      if (Utils.isMissingValue(predValue))
354        append("?");
355      else
356        append(sanitize(inst.dataset().classAttribute().value((int)predValue)));
357      append("</" + TAG_PREDICTED_LABEL + ">\n");
358      // error?
359      append("    <" + TAG_ERROR + ">");
360      if (!Utils.isMissingValue(predValue) && !inst.classIsMissing() && ((int) predValue+1 != (int) inst.classValue()+1))
361        append(VAL_YES);
362      else
363        append(VAL_NO);
364      append("</" + TAG_ERROR + ">\n");
365      // prediction/distribution
366      if (m_OutputDistribution) {
367        append("    <" + TAG_DISTRIBUTION + ">\n");
368        double[] dist = classifier.distributionForInstance(withMissing);
369        for (int n = 0; n < dist.length; n++) {
370          append("      <" + TAG_CLASS_LABEL + " " + ATT_INDEX + "=\"" + (n+1) + "\"");
371          if (!Utils.isMissingValue(predValue) && (n == (int) predValue))
372            append(" " + ATT_PREDICTED + "=\"" + VAL_YES + "\"");
373          append(">");
374          append(Utils.doubleToString(dist[n], prec));
375          append("</" + TAG_CLASS_LABEL + ">\n");
376        }
377        append("    </" + TAG_DISTRIBUTION + ">\n");
378      }
379      else {
380        append("    <" + TAG_PREDICTION + ">");
381        if (Utils.isMissingValue(predValue))
382          append("?");
383        else
384          append(Utils.doubleToString(classifier.distributionForInstance(withMissing) [(int)predValue], prec));
385        append("</" + TAG_PREDICTION + ">\n");
386      }
387    }
388
389    // attributes
390    if (m_Attributes != null)
391      append(attributeValuesString(withMissing));
392   
393    // closing tag
394    append("  </" + TAG_PREDICTION + ">\n");
395  }
396 
397  /**
398   * Does nothing.
399   */
400  protected void doPrintFooter() {
401    append("</" + TAG_PREDICTIONS + ">\n");
402  }
403}
Note: See TracBrowser for help on using the repository browser.