source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/MILESFilter.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 11.0 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * MILES.java
19 * Copyright (C) 2008-09 University of Waikato, Hamilton, New Zealand
20 */
21
22package weka.filters.unsupervised.attribute;
23
24import weka.core.Attribute;
25import weka.core.Instance; 
26import weka.core.DenseInstance;
27import weka.core.Instances;
28import weka.core.Version;
29import weka.core.Capabilities.Capability;
30import weka.core.Capabilities;
31import weka.core.RevisionUtils;
32import weka.core.OptionHandler;
33import weka.core.Utils;
34import weka.core.FastVector;
35import weka.core.Option;
36import weka.core.TechnicalInformation;
37import weka.core.TechnicalInformationHandler;
38import weka.core.TechnicalInformation.Field;
39import weka.core.TechnicalInformation.Type;
40
41import weka.filters.SimpleBatchFilter;
42import weka.filters.UnsupervisedFilter;
43
44import java.util.LinkedList;
45import java.util.Enumeration;
46
47/**
48 <!-- globalinfo-start -->
49 * Implements the MILES transformation that maps multiple instance bags into a high-dimensional single-instance feature space.<br/>
50 * For more information see:<br/>
51 * <br/>
52 * Y. Chen, J. Bi, J.Z. Wang (2006). MILES: Multiple-instance learning via embedded instance selection. IEEE PAMI. 28(12):1931-1947.<br/>
53 * <br/>
54 * James Foulds, Eibe Frank: Revisiting multiple-instance learning via embedded instance selection. In: 21st Australasian Joint Conference on Artificial Intelligence, 300-310, 2008.
55 * <p/>
56 <!-- globalinfo-end -->
57 *
58 <!-- technical-bibtex-start -->
59 * BibTeX:
60 * <pre>
61 * &#64;article{Chen2006,
62 *    author = {Y. Chen and J. Bi and J.Z. Wang},
63 *    journal = {IEEE PAMI},
64 *    number = {12},
65 *    pages = {1931-1947},
66 *    title = {MILES: Multiple-instance learning via embedded instance selection},
67 *    volume = {28},
68 *    year = {2006}
69 * }
70 *
71 * &#64;inproceedings{Foulds2008,
72 *    author = {James Foulds and Eibe Frank},
73 *    booktitle = {21st Australasian Joint Conference on Artificial Intelligence},
74 *    pages = {300-310},
75 *    publisher = {Springer},
76 *    title = {Revisiting multiple-instance learning via embedded instance selection},
77 *    year = {2008}
78 * }
79 * </pre>
80 * <p/>
81 <!-- technical-bibtex-end -->
82 *
83 <!-- options-start -->
84 * Valid options are: <p/>
85 *
86 * <pre> -S &lt;num&gt;
87 *  Specify the sigma parameter (default: sqrt(800000)</pre>
88 *
89 <!-- options-end -->
90 *
91 * @author Jimmy Foulds
92 * @author Eibe Frank
93 * @version $Revision: 5987 $
94 */
95public class MILESFilter
96  extends SimpleBatchFilter implements UnsupervisedFilter, OptionHandler, TechnicalInformationHandler
97{
98
99  /** For serialization */
100  static final long serialVersionUID = 4694489111366063853L;
101 
102  /** Index of bag attribute */
103  public static final int BAG_ATTRIBUTE = 1;
104
105  /** Index of label attribute */
106  public static final int LABEL_ATTRIBUTE = 2; 
107
108  /** Sigma parameter (default: square root of 800000) */
109  private double m_sigma = Math.sqrt(800000);
110 
111  /** Linked list of all instances collected */
112  private LinkedList<Instance> m_allInsts = null;
113 
114  /**
115   * Returns the tip text for this property
116   */
117  public String sigmaTipText() {
118
119    return "The value of the sigma parameter.";
120  }
121
122  /**
123   * Sets the sigma parameter.
124   */
125  public void setSigma(double sigma)
126  {
127    m_sigma = sigma;
128  }
129 
130  /**
131   * Gets the sigma parameter.
132   */
133  public double getSigma()
134  {
135    return m_sigma;
136  }
137
138  /**
139   * Global info for the filter.
140   */
141  public String globalInfo() {
142    return   "Implements the MILES transformation that maps multiple instance bags into"
143      + " a high-dimensional single-instance feature space."
144      + "\n"
145      + "For more information see:\n\n"
146      + getTechnicalInformation().toString();
147  }
148
149  /**
150   * Returns an instance of a TechnicalInformation object, containing
151   * detailed information about the technical background of this class,
152   * e.g., paper reference or book this class is based on.
153   *
154   * @return            the technical information about this class
155   */
156  public TechnicalInformation getTechnicalInformation() {
157    TechnicalInformation        result;
158    TechnicalInformation        additional;
159     
160    result = new TechnicalInformation(Type.ARTICLE);
161    result.setValue(Field.AUTHOR, "Y. Chen and J. Bi and J.Z. Wang");
162    result.setValue(Field.TITLE, "MILES: Multiple-instance learning via embedded instance selection");
163    result.setValue(Field.JOURNAL, "IEEE PAMI");
164    result.setValue(Field.YEAR, "2006");
165    result.setValue(Field.VOLUME, "28");
166    result.setValue(Field.PAGES, "1931-1947");
167    result.setValue(Field.NUMBER, "12");
168   
169    additional = result.add(Type.INPROCEEDINGS);
170    additional.setValue(Field.AUTHOR, "James Foulds and Eibe Frank");
171    additional.setValue(Field.TITLE, "Revisiting multiple-instance learning via embedded instance selection");
172    additional.setValue(Field.BOOKTITLE, "21st Australasian Joint Conference on Artificial Intelligence");
173    additional.setValue(Field.YEAR, "2008");
174    additional.setValue(Field.PAGES, "300-310");
175    additional.setValue(Field.PUBLISHER, "Springer");
176   
177    return result;
178  }
179
180  /**
181   * Capabilities for the filter.
182   */
183  public Capabilities getCapabilities() {
184    Capabilities result = super.getCapabilities();
185    result.enable(Capability.ONLY_MULTIINSTANCE);
186    return result;
187  }
188
189  /**
190   * Determines the output format for the filter.
191   */
192  protected Instances determineOutputFormat(Instances inputFormat) {
193
194    // Create attributes
195    FastVector atts = new FastVector();
196    m_allInsts = new LinkedList<Instance>();
197    for (int i = 0; i < getInputFormat().numInstances(); i++)
198      {
199        Instances bag = getInputFormat().instance(i).relationalValue(BAG_ATTRIBUTE);
200        for (int j = 0; j < bag.numInstances(); j++) 
201          {
202            m_allInsts.add(bag.instance(j));
203          }
204      }
205    for (int i = 0; i < m_allInsts.size(); i++)
206      {
207        atts.addElement(new Attribute("" + i));
208      }
209    atts.addElement(inputFormat.attribute(LABEL_ATTRIBUTE)); //class
210 
211    //TODO set relation name properly
212    Instances returner = new Instances("", atts, 0);
213    returner.setClassIndex(returner.numAttributes() - 1);
214
215    return returner;
216  }
217
218  /**
219   * Processes a set of instances.
220   */
221  protected Instances process(Instances inst)
222  {
223   
224    // Get instances object with correct output format
225    Instances result = getOutputFormat();
226    result.setClassIndex(result.numAttributes() - 1);
227
228    // Can't do much if bag is empty
229    if (inst.numInstances() == 0)
230      {
231        return result;
232      }
233   
234    // Go through all the instances in the bag to be transformed
235    for (int i = 0; i < inst.numInstances(); i++) //for every bag
236      {
237
238        // Allocate memory for instance
239        double[] outputInstance = new double[result.numAttributes()];
240       
241        // Get the bag
242        Instances bag = inst.instance(i).relationalValue(BAG_ATTRIBUTE);
243        int k = 0;
244        for (Instance x_k : m_allInsts) //for every instance in every bag
245          {
246            //TODO handle empty bags
247            double dSquared = Double.MAX_VALUE;
248            for (int j = 0; j < bag.numInstances(); j++) //for every instance in the current bag
249              {
250                // Compute sum of squared differences
251                double total = 0;
252                Instance x_ij = bag.instance(j);
253                double numMissingValues = 0;
254                for (int l = 0; l < x_k.numAttributes(); l++) //for every attribute
255                  {
256                    // Can skip missing values in reference instance
257                    if (x_k.isMissing(l)) {
258                      continue;
259                    }
260                    // Need to keep track of how many values in current instance are missing
261                    if (!x_ij.isMissing(l)) {
262                      total += (x_ij.value(l) - x_k.value(l)) * (x_ij.value(l) - x_k.value(l));
263                    } else {
264                      numMissingValues++;
265                    }
266                  }
267                // Adjust for missing values
268                total *= x_k.numAttributes() / (x_k.numAttributes() - numMissingValues);
269
270                // Update minimum
271                if (total < dSquared || dSquared == Double.MAX_VALUE) 
272                  {
273                    dSquared = total;
274                  }
275              }
276            if (dSquared == Double.MAX_VALUE)
277              outputInstance[k] = 0; //TODO is this ok?
278            else
279              outputInstance[k] = Math.exp(-1.0 * dSquared / (m_sigma * m_sigma));
280            k++;
281          }
282
283        // Set class label
284        double label = inst.instance(i).value(LABEL_ATTRIBUTE);
285        outputInstance[outputInstance.length - 1] = label;
286     
287        // Add instance to result
288        result.add(new DenseInstance(inst.instance(i).weight(), outputInstance));
289      }
290   
291    return result;
292  }
293 
294  /**
295   * Returns an enumeration describing the available options.
296   *
297   * @return an enumeration of all the available options.
298   */
299  public Enumeration listOptions() {
300   
301    FastVector newVector = new FastVector(1);
302   
303    newVector.addElement(new Option(
304                                    "\tSpecify the sigma parameter (default: sqrt(800000)",
305                                    "S", 1, "-S <num>"));
306   
307    return newVector.elements();
308  }
309 
310 
311  /**
312   * Parses a given list of options. <p/>
313   *
314   <!-- options-start -->
315   * Valid options are: <p/>
316   *
317   * <pre> -S &lt;num&gt;
318   *  Specify the sigma parameter (default: sqrt(800000)</pre>
319   *
320   <!-- options-end -->
321   *
322   * @param options the list of options as an array of strings
323   * @throws Exception if an option is not supported
324   */
325  public void setOptions(String[] options) throws Exception {
326
327    String sigmaString = Utils.getOption('S', options);
328    if (sigmaString.length() != 0) {
329      setSigma(Double.parseDouble(sigmaString));
330    } else {
331      setSigma(Math.sqrt(800000));
332    }
333  }
334 
335  /**
336   * Gets the current settings of the filter.
337   *
338   * @return an array of strings suitable for passing to setOptions
339   */
340  public String [] getOptions() {
341   
342    String [] options = new String [2];
343    int current = 0;
344   
345    options[current++] = "-S"; options[current++] = "" + getSigma();
346   
347    while (current < options.length) {
348      options[current++] = "";
349    }
350    return options;
351  }
352 
353  public static void main(String[] args)
354  {
355    runFilter(new MILESFilter(), args);
356  }
357 
358  public String getRevision() {
359    return RevisionUtils.extract("$Revision: 5987 $");
360  }
361}
362
Note: See TracBrowser for help on using the repository browser.