source: branches/MetisMQI/src/main/java/weka/filters/unsupervised/attribute/KernelFilter.java

Last change on this file was 29, checked in by gnappo, 15 years ago

Taggata versione per la demo e aggiunto branch.

File size: 26.8 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * KernelFilter.java
19 * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.filters.unsupervised.attribute;
24
25import weka.classifiers.functions.supportVector.Kernel;
26import weka.classifiers.functions.supportVector.PolyKernel;
27import weka.classifiers.functions.supportVector.RBFKernel;
28import weka.core.Attribute;
29import weka.core.Capabilities;
30import weka.core.FastVector;
31import weka.core.Instance; 
32import weka.core.DenseInstance;
33import weka.core.Instances;
34import weka.core.MathematicalExpression;
35import weka.core.Option;
36import weka.core.OptionHandler;
37import weka.core.RevisionUtils;
38import weka.core.SingleIndex;
39import weka.core.TechnicalInformation;
40import weka.core.TechnicalInformationHandler;
41import weka.core.Utils;
42import weka.core.Capabilities.Capability;
43import weka.core.TechnicalInformation.Field;
44import weka.core.TechnicalInformation.Type;
45import weka.core.converters.ConverterUtils.DataSource;
46import weka.filters.AllFilter;
47import weka.filters.Filter;
48import weka.filters.SimpleBatchFilter;
49import weka.filters.UnsupervisedFilter;
50
51import java.io.File;
52import java.util.Enumeration;
53import java.util.HashMap;
54import java.util.Vector;
55
56/**
57 <!-- globalinfo-start -->
58 * Converts the given set of predictor variables into a kernel matrix. The class value remains unchangedm, as long as the preprocessing filter doesn't change it.<br/>
59 * By default, the data is preprocessed with the Center filter, but the user can choose any filter (NB: one must be careful that the filter does not alter the class attribute unintentionally). With weka.filters.AllFilter the preprocessing gets disabled.<br/>
60 * <br/>
61 * For more information regarding preprocessing the data, see:<br/>
62 * <br/>
63 * K.P. Bennett, M.J. Embrechts: An Optimization Perspective on Kernel Partial Least Squares Regression. In: Advances in Learning Theory: Methods, Models and Applications, 227-249, 2003.
64 * <p/>
65 <!-- globalinfo-end -->
66 *
67 <!-- technical-bibtex-start -->
68 * BibTeX:
69 * <pre>
70 * &#64;inproceedings{Bennett2003,
71 *    author = {K.P. Bennett and M.J. Embrechts},
72 *    booktitle = {Advances in Learning Theory: Methods, Models and Applications},
73 *    editor = {J. Suykens et al.},
74 *    pages = {227-249},
75 *    publisher = {IOS Press, Amsterdam, The Netherlands},
76 *    series = {NATO Science Series, Series III: Computer and System Sciences},
77 *    title = {An Optimization Perspective on Kernel Partial Least Squares Regression},
78 *    volume = {190},
79 *    year = {2003}
80 * }
81 * </pre>
82 * <p/>
83 <!-- technical-bibtex-end -->
84 *
85 <!-- options-start -->
86 * Valid options are: <p/>
87 *
88 * <pre> -D
89 *  Turns on output of debugging information.</pre>
90 *
91 * <pre> -no-checks
92 *  Turns off all checks - use with caution!
93 *  Turning them off assumes that data is purely numeric, doesn't
94 *  contain any missing values, and has a nominal class. Turning them
95 *  off also means that no header information will be stored if the
96 *  machine is linear. Finally, it also assumes that no instance has
97 *  a weight equal to 0.
98 *  (default: checks on)</pre>
99 *
100 * <pre> -F &lt;filename&gt;
101 *  The file to initialize the filter with (optional).</pre>
102 *
103 * <pre> -C &lt;num&gt;
104 *  The class index for the file to initialize with,
105 *  First and last are valid (optional, default: last).</pre>
106 *
107 * <pre> -K &lt;classname and parameters&gt;
108 *  The Kernel to use.
109 *  (default: weka.classifiers.functions.supportVector.PolyKernel)</pre>
110 *
111 * <pre> -kernel-factor
112 *  Defines a factor for the kernel.
113 *   - RBFKernel: a factor for gamma
114 *    Standardize: 1/(2*N)
115 *    Normalize..: 6/N
116 *  Available parameters are:
117 *   N for # of instances, A for # of attributes
118 *  (default: 1)</pre>
119 *
120 * <pre> -P &lt;classname and parameters&gt;
121 *  The Filter used for preprocessing (use weka.filters.AllFilter
122 *  to disable preprocessing).
123 *  (default: weka.filters.unsupervised.attribute.Center)</pre>
124 *
125 * <pre>
126 * Options specific to kernel weka.classifiers.functions.supportVector.PolyKernel:
127 * </pre>
128 *
129 * <pre> -D
130 *  Enables debugging output (if available) to be printed.
131 *  (default: off)</pre>
132 *
133 * <pre> -no-checks
134 *  Turns off all checks - use with caution!
135 *  (default: checks on)</pre>
136 *
137 * <pre> -C &lt;num&gt;
138 *  The size of the cache (a prime number), 0 for full cache and
139 *  -1 to turn it off.
140 *  (default: 250007)</pre>
141 *
142 * <pre> -E &lt;num&gt;
143 *  The Exponent to use.
144 *  (default: 1.0)</pre>
145 *
146 * <pre> -L
147 *  Use lower-order terms.
148 *  (default: no)</pre>
149 *
150 * <pre>
151 * Options specific to preprocessing filter weka.filters.unsupervised.attribute.Center:
152 * </pre>
153 *
154 * <pre> -unset-class-temporarily
155 *  Unsets the class index temporarily before the filter is
156 *  applied to the data.
157 *  (default: no)</pre>
158 *
159 <!-- options-end -->
160 *
161 * @author Jonathan Miles (jdm18@cs.waikato.ac.nz)
162 * @author FracPete (fracpete at waikato dot ac dot nz)
163 * @version $Revision: 5987 $
164 */
165public class KernelFilter
166  extends SimpleBatchFilter
167  implements UnsupervisedFilter, TechnicalInformationHandler {
168
169  /** for serialization */
170  static final long serialVersionUID = 213800899640387499L;
171
172  /** The number of instances in the training data. */
173  protected int m_NumTrainInstances;
174
175  /** Kernel to use **/
176  protected Kernel m_Kernel = new PolyKernel();
177
178  /** the Kernel which is actually used for computation */
179  protected Kernel m_ActualKernel = null;
180
181  /** Turn off all checks and conversions? Turning them off assumes
182      that data is purely numeric, doesn't contain any missing values,
183      and has a nominal class. Turning them off also means that
184      no header information will be stored if the machine is linear.
185      Finally, it also assumes that no instance has a weight equal to 0.*/
186  protected boolean m_checksTurnedOff;
187
188  /** The filter used to make attributes numeric. */
189  protected NominalToBinary m_NominalToBinary;
190
191  /** The filter used to get rid of missing values. */
192  protected ReplaceMissingValues m_Missing;
193
194  /** The dataset to initialize the filter with */
195  protected File m_InitFile = new File(System.getProperty("user.dir"));
196
197  /** the class index for the file to initialized with
198   * @see #m_InitFile */
199  protected SingleIndex m_InitFileClassIndex = new SingleIndex("last");
200 
201  /** whether the filter was initialized */
202  protected boolean m_Initialized = false;
203
204  /** optimizes the kernel with this formula
205   * (A = # of attributes, N = # of instances)*/
206  protected String m_KernelFactorExpression = "1";
207
208  /** the calculated kernel factor
209   * @see #m_KernelFactorExpression */
210  protected double m_KernelFactor = 1.0;
211 
212  /** for centering/standardizing the data */
213  protected Filter m_Filter = new Center();
214 
215  /** for centering/standardizing the data (the actual filter to use) */
216  protected Filter m_ActualFilter = null;
217 
218  /**
219   * Returns a string describing this filter.
220   *
221   * @return      a description of the filter suitable for
222   *              displaying in the explorer/experimenter gui
223   */
224  public String globalInfo() {
225    return 
226        "Converts the given set of predictor variables into a kernel matrix. "
227      + "The class value remains unchangedm, as long as the preprocessing "
228      + "filter doesn't change it.\n"
229      + "By default, the data is preprocessed with the Center filter, but the "
230      + "user can choose any filter (NB: one must be careful that the filter "
231      + "does not alter the class attribute unintentionally). With "
232      + "weka.filters.AllFilter the preprocessing gets disabled.\n\n"
233      + "For more information regarding preprocessing the data, see:\n\n"
234      + getTechnicalInformation().toString();
235  }
236
237  /**
238   * Returns an instance of a TechnicalInformation object, containing
239   * detailed information about the technical background of this class,
240   * e.g., paper reference or book this class is based on.
241   *
242   * @return the technical information about this class
243   */
244  public TechnicalInformation getTechnicalInformation() {
245    TechnicalInformation        result;
246   
247    result = new TechnicalInformation(Type.INPROCEEDINGS);
248    result.setValue(Field.AUTHOR, "K.P. Bennett and M.J. Embrechts");
249    result.setValue(Field.TITLE, "An Optimization Perspective on Kernel Partial Least Squares Regression");
250    result.setValue(Field.YEAR, "2003");
251    result.setValue(Field.EDITOR, "J. Suykens et al.");
252    result.setValue(Field.BOOKTITLE, "Advances in Learning Theory: Methods, Models and Applications");
253    result.setValue(Field.PAGES, "227-249");
254    result.setValue(Field.PUBLISHER, "IOS Press, Amsterdam, The Netherlands");
255    result.setValue(Field.SERIES, "NATO Science Series, Series III: Computer and System Sciences");
256    result.setValue(Field.VOLUME, "190");
257   
258    return result;
259  }
260
261  /**
262   * Returns an enumeration describing the available options.
263   *
264   * @return an enumeration of all the available options.
265   */
266  public Enumeration listOptions() {
267    Vector        result;
268    Enumeration   enm;
269
270    result = new Vector();
271
272    enm = super.listOptions();
273    while (enm.hasMoreElements())
274      result.addElement(enm.nextElement());
275   
276    result.addElement(new Option(
277        "\tTurns off all checks - use with caution!\n"
278        + "\tTurning them off assumes that data is purely numeric, doesn't\n"
279        + "\tcontain any missing values, and has a nominal class. Turning them\n"
280        + "\toff also means that no header information will be stored if the\n"
281        + "\tmachine is linear. Finally, it also assumes that no instance has\n"
282        + "\ta weight equal to 0.\n"
283        + "\t(default: checks on)",
284        "no-checks", 0, "-no-checks"));
285
286    result.addElement(new Option(
287        "\tThe file to initialize the filter with (optional).",
288        "F", 1, "-F <filename>"));
289
290    result.addElement(new Option(
291        "\tThe class index for the file to initialize with,\n"
292        + "\tFirst and last are valid (optional, default: last).",
293        "C", 1, "-C <num>"));
294
295    result.addElement(new Option(
296        "\tThe Kernel to use.\n"
297        + "\t(default: weka.classifiers.functions.supportVector.PolyKernel)",
298        "K", 1, "-K <classname and parameters>"));
299
300    result.addElement(new Option(
301        "\tDefines a factor for the kernel.\n"
302        + "\t\t- RBFKernel: a factor for gamma\n"
303        + "\t\t\tStandardize: 1/(2*N)\n"
304        + "\t\t\tNormalize..: 6/N\n"
305        + "\tAvailable parameters are:\n"
306        + "\t\tN for # of instances, A for # of attributes\n"
307        + "\t(default: 1)",
308        "kernel-factor", 0, "-kernel-factor"));
309
310    result.addElement(new Option(
311        "\tThe Filter used for preprocessing (use weka.filters.AllFilter\n"
312        + "\tto disable preprocessing).\n"
313        + "\t(default: " + Center.class.getName() + ")",
314        "P", 1, "-P <classname and parameters>"));
315
316    // kernel options
317    result.addElement(new Option(
318        "",
319        "", 0, "\nOptions specific to kernel "
320        + getKernel().getClass().getName() + ":"));
321   
322    enm = ((OptionHandler) getKernel()).listOptions();
323    while (enm.hasMoreElements())
324      result.addElement(enm.nextElement());
325
326    // filter options
327    if (getPreprocessing() instanceof OptionHandler) {
328      result.addElement(new Option(
329          "",
330          "", 0, "\nOptions specific to preprocessing filter "
331          + getPreprocessing().getClass().getName() + ":"));
332
333      enm = ((OptionHandler) getPreprocessing()).listOptions();
334      while (enm.hasMoreElements())
335        result.addElement(enm.nextElement());
336    }
337   
338    return result.elements();
339  }       
340
341  /**
342   * Gets the current settings of the filter.
343   *
344   * @return an array of strings suitable for passing to setOptions
345   */
346  public String[] getOptions() {
347    int         i;
348    Vector      result;
349    String[]    options;
350    String      tmpStr;
351
352    result = new Vector();
353    options = super.getOptions();
354    for (i = 0; i < options.length; i++)
355      result.add(options[i]);
356   
357    if (getChecksTurnedOff())
358      result.add("-no-checks");
359
360    if ((getInitFile() != null) && getInitFile().isFile()) {
361      result.add("-F");
362      result.add("" + getInitFile().getAbsolutePath());
363
364      result.add("-C");
365      result.add("" + getInitFileClassIndex());
366    }
367
368    result.add("-K");
369    result.add("" + getKernel().getClass().getName() + " " + Utils.joinOptions(getKernel().getOptions()));
370
371    result.add("-kernel-factor");
372    result.add("" + getKernelFactorExpression());
373
374    result.add("-P");
375    tmpStr = getPreprocessing().getClass().getName();
376    if (getPreprocessing() instanceof OptionHandler)
377      tmpStr += " " + Utils.joinOptions(((OptionHandler) getPreprocessing()).getOptions());
378    result.add("" + tmpStr);
379
380    return (String[]) result.toArray(new String[result.size()]);         
381  }       
382
383  /**
384   * Parses a given list of options. <p/>
385   *
386   <!-- options-start -->
387   * Valid options are: <p/>
388   *
389   * <pre> -D
390   *  Turns on output of debugging information.</pre>
391   *
392   * <pre> -no-checks
393   *  Turns off all checks - use with caution!
394   *  Turning them off assumes that data is purely numeric, doesn't
395   *  contain any missing values, and has a nominal class. Turning them
396   *  off also means that no header information will be stored if the
397   *  machine is linear. Finally, it also assumes that no instance has
398   *  a weight equal to 0.
399   *  (default: checks on)</pre>
400   *
401   * <pre> -F &lt;filename&gt;
402   *  The file to initialize the filter with (optional).</pre>
403   *
404   * <pre> -C &lt;num&gt;
405   *  The class index for the file to initialize with,
406   *  First and last are valid (optional, default: last).</pre>
407   *
408   * <pre> -K &lt;classname and parameters&gt;
409   *  The Kernel to use.
410   *  (default: weka.classifiers.functions.supportVector.PolyKernel)</pre>
411   *
412   * <pre> -kernel-factor
413   *  Defines a factor for the kernel.
414   *   - RBFKernel: a factor for gamma
415   *    Standardize: 1/(2*N)
416   *    Normalize..: 6/N
417   *  Available parameters are:
418   *   N for # of instances, A for # of attributes
419   *  (default: 1)</pre>
420   *
421   * <pre> -P &lt;classname and parameters&gt;
422   *  The Filter used for preprocessing (use weka.filters.AllFilter
423   *  to disable preprocessing).
424   *  (default: weka.filters.unsupervised.attribute.Center)</pre>
425   *
426   * <pre>
427   * Options specific to kernel weka.classifiers.functions.supportVector.PolyKernel:
428   * </pre>
429   *
430   * <pre> -D
431   *  Enables debugging output (if available) to be printed.
432   *  (default: off)</pre>
433   *
434   * <pre> -no-checks
435   *  Turns off all checks - use with caution!
436   *  (default: checks on)</pre>
437   *
438   * <pre> -C &lt;num&gt;
439   *  The size of the cache (a prime number), 0 for full cache and
440   *  -1 to turn it off.
441   *  (default: 250007)</pre>
442   *
443   * <pre> -E &lt;num&gt;
444   *  The Exponent to use.
445   *  (default: 1.0)</pre>
446   *
447   * <pre> -L
448   *  Use lower-order terms.
449   *  (default: no)</pre>
450   *
451   * <pre>
452   * Options specific to preprocessing filter weka.filters.unsupervised.attribute.Center:
453   * </pre>
454   *
455   * <pre> -unset-class-temporarily
456   *  Unsets the class index temporarily before the filter is
457   *  applied to the data.
458   *  (default: no)</pre>
459   *
460   <!-- options-end -->
461   *
462   * @param options the list of options as an array of strings
463   * @throws Exception if an option is not supported
464   */
465  public void setOptions(String[] options) throws Exception {
466    String      tmpStr;
467    String[]    tmpOptions;
468   
469    setChecksTurnedOff(Utils.getFlag("no-checks", options));
470
471    tmpStr = Utils.getOption('F', options);
472    if (tmpStr.length() != 0)
473      setInitFile(new File(tmpStr));
474    else 
475      setInitFile(null);
476
477    tmpStr = Utils.getOption('C', options);
478    if (tmpStr.length() != 0)
479      setInitFileClassIndex(tmpStr);
480    else 
481      setInitFileClassIndex("last");
482
483    tmpStr     = Utils.getOption('K', options);
484    tmpOptions = Utils.splitOptions(tmpStr);
485    if (tmpOptions.length != 0) {
486      tmpStr        = tmpOptions[0];
487      tmpOptions[0] = "";
488      setKernel(Kernel.forName(tmpStr, tmpOptions));
489    }
490   
491    tmpStr = Utils.getOption("kernel-factor", options);
492    if (tmpStr.length() != 0)
493      setKernelFactorExpression(tmpStr);
494    else 
495      setKernelFactorExpression("1");
496   
497    tmpStr = Utils.getOption("P", options);
498    tmpOptions = Utils.splitOptions(tmpStr);
499    if (tmpOptions.length != 0) {
500      tmpStr        = tmpOptions[0];
501      tmpOptions[0] = "";
502      setPreprocessing((Filter) Utils.forName(Filter.class, tmpStr, tmpOptions));
503    }
504    else {
505      setPreprocessing(new Center());
506    }
507
508    super.setOptions(options);
509  }       
510 
511  /**
512   * Returns the tip text for this property
513   *
514   * @return            tip text for this property suitable for
515   *                    displaying in the explorer/experimenter gui
516   */
517  public String initFileTipText() {
518    return "The dataset to initialize the filter with.";
519  }
520
521  /**
522   * Gets the file to initialize the filter with, can be null.
523   *
524   * @return            the file
525   */
526  public File getInitFile() {
527    return m_InitFile;
528  }
529   
530  /**
531   * Sets the file to initialize the filter with, can be null.
532   *
533   * @param value       the file
534   */
535  public void setInitFile(File value) {
536    m_InitFile = value;
537  }
538 
539  /**
540   * Returns the tip text for this property
541   *
542   * @return            tip text for this property suitable for
543   *                    displaying in the explorer/experimenter gui
544   */
545  public String initFileClassIndexTipText() {
546    return "The class index of the dataset to initialize the filter with (first and last are valid).";
547  }
548
549  /**
550   * Gets the class index of the file to initialize the filter with.
551   *
552   * @return            the class index
553   */
554  public String getInitFileClassIndex() {
555    return m_InitFileClassIndex.getSingleIndex();
556  }
557   
558  /**
559   * Sets class index of the file to initialize the filter with.
560   *
561   * @param value       the class index
562   */
563  public void setInitFileClassIndex(String value) {
564    m_InitFileClassIndex.setSingleIndex(value);
565  }
566 
567  /**
568   * Returns the tip text for this property
569   *
570   * @return            tip text for this property suitable for
571   *                    displaying in the explorer/experimenter gui
572   */
573  public String kernelTipText() {
574    return "The kernel to use.";
575  }
576
577  /**
578   * Gets the kernel to use.
579   *
580   * @return            the kernel
581   */
582  public Kernel getKernel() {
583    return m_Kernel;
584  }
585   
586  /**
587   * Sets the kernel to use.
588   *
589   * @param value       the kernel
590   */
591  public void setKernel(Kernel value) {
592    m_Kernel = value;
593  }
594
595  /**
596   * Disables or enables the checks (which could be time-consuming). Use with
597   * caution!
598   *
599   * @param value       if true turns off all checks
600   */
601  public void setChecksTurnedOff(boolean value) {
602    m_checksTurnedOff = value;
603  }
604 
605  /**
606   * Returns whether the checks are turned off or not.
607   *
608   * @return            true if the checks are turned off
609   */
610  public boolean getChecksTurnedOff() {
611    return m_checksTurnedOff;
612  }
613
614  /**
615   * Returns the tip text for this property
616   *
617   * @return            tip text for this property suitable for
618   *                    displaying in the explorer/experimenter gui
619   */
620  public String checksTurnedOffTipText() {
621    return "Turns time-consuming checks off - use with caution.";
622  }
623 
624  /**
625   * Returns the tip text for this property
626   *
627   * @return            tip text for this property suitable for
628   *                    displaying in the explorer/experimenter gui
629   */
630  public String kernelFactorExpressionTipText() {
631    return "The factor for the kernel, with A = # of attributes and N = # of instances.";
632  }
633
634  /**
635   * Gets the expression for the kernel.
636   *
637   * @return            the expression
638   */
639  public String getKernelFactorExpression() {
640    return m_KernelFactorExpression;
641  }
642   
643  /**
644   * Sets the expression for the kernel.
645   *
646   * @param value       the file
647   */
648  public void setKernelFactorExpression(String value) {
649    m_KernelFactorExpression = value;
650  }
651
652  /**
653   * Returns the tip text for this property
654   *
655   * @return            tip text for this property suitable for
656   *                    displaying in the explorer/experimenter gui
657   */
658  public String preprocessingTipText() {
659    return "Sets the filter to use for preprocessing (use the AllFilter for no preprocessing).";
660  }
661
662  /**
663   * Sets the filter to use for preprocessing (use the AllFilter for no
664   * preprocessing)
665   *
666   * @param value       the preprocessing filter
667   */
668  public void setPreprocessing(Filter value) {
669    m_Filter       = value;
670    m_ActualFilter = null;
671  }
672
673  /**
674   * Gets the filter used for preprocessing
675   *
676   * @return            the current preprocessing filter.
677   */
678  public Filter getPreprocessing() {
679    return m_Filter;
680  }
681
682  /**
683   * resets the filter, i.e., m_NewBatch to true and m_FirstBatchDone to
684   * false.
685   */
686  protected void reset() {
687    super.reset();
688   
689    m_Initialized = false;
690  }
691
692  /**
693   * Determines the output format based on the input format and returns
694   * this. In case the output format cannot be returned immediately, i.e.,
695   * immediateOutputFormat() returns false, then this method will be called
696   * from batchFinished().
697   *
698   * @param inputFormat     the input format to base the output format on
699   * @return                the output format
700   * @throws Exception      in case the determination goes wrong
701   * @see   #hasImmediateOutputFormat()
702   * @see   #batchFinished()
703   */
704  protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
705    return new Instances(inputFormat);
706  }
707 
708  /**
709   * initializes the filter with the given dataset, i.e., the kernel gets
710   * built. Needs to be called before the first call of Filter.useFilter or
711   * batchFinished(), if not the -F option (or setInitFile(File) is used).
712   *
713   * @param instances   the data to initialize with
714   * @throws Exception  if building of kernel fails
715   */
716  public void initFilter(Instances instances) throws Exception {
717    HashMap     symbols;
718   
719    // determine kernel factor
720    symbols = new HashMap();
721    symbols.put("A", new Double(instances.numAttributes()));
722    symbols.put("N", new Double(instances.numInstances()));
723    m_KernelFactor = MathematicalExpression.evaluate(getKernelFactorExpression(), symbols);
724   
725    // init filters
726    if (!m_checksTurnedOff) {
727      m_Missing = new ReplaceMissingValues();
728      m_Missing.setInputFormat(instances);
729      instances = Filter.useFilter(instances, m_Missing); 
730    } 
731    else {
732      m_Missing = null;
733    }
734
735    if (getKernel().getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {
736        boolean onlyNumeric = true;
737        if (!m_checksTurnedOff) {
738          for (int i = 0; i < instances.numAttributes(); i++) {
739            if (i != instances.classIndex()) {
740              if (!instances.attribute(i).isNumeric()) {
741                onlyNumeric = false;
742                break;
743              }
744            }
745          }
746        }
747       
748        if (!onlyNumeric) {
749          m_NominalToBinary = new NominalToBinary();
750          m_NominalToBinary.setInputFormat(instances);
751          instances = Filter.useFilter(instances, m_NominalToBinary);
752        } 
753        else {
754          m_NominalToBinary = null;
755        }
756    }
757    else {
758      m_NominalToBinary = null;
759    }
760
761    if ((m_Filter != null) && (m_Filter.getClass() != AllFilter.class)) {
762      m_ActualFilter = Filter.makeCopy(m_Filter);
763      m_ActualFilter.setInputFormat(instances);
764      instances = Filter.useFilter(instances, m_ActualFilter);
765    }
766    else {
767      m_ActualFilter = null;
768    }
769
770    m_NumTrainInstances = instances.numInstances();
771
772    // set factor for kernel
773    m_ActualKernel = Kernel.makeCopy(m_Kernel);
774    if (m_ActualKernel instanceof RBFKernel)
775      ((RBFKernel) m_ActualKernel).setGamma(
776          m_KernelFactor * ((RBFKernel) m_ActualKernel).getGamma());
777    // build kernel
778    m_ActualKernel.buildKernel(instances);
779
780    m_Initialized = true;
781  }
782
783  /**
784   * Returns the Capabilities of this filter.
785   *
786   * @return            the capabilities of this object
787   * @see               Capabilities
788   */
789  public Capabilities getCapabilities() {
790    Capabilities        result;
791   
792    if (getKernel() == null) {
793      result = super.getCapabilities();
794      result.disableAll();
795    } else {
796      result = getKernel().getCapabilities();
797    }
798
799    result.setMinimumNumberInstances(0);
800   
801    return result;
802  }
803
804  /**
805   * Processes the given data (may change the provided dataset) and returns
806   * the modified version. This method is called in batchFinished().
807   *
808   * @param instances   the data to process
809   * @return            the modified data
810   * @throws Exception  in case the processing goes wrong
811   * @see               #batchFinished()
812   */
813  protected Instances process(Instances instances) throws Exception {
814    // initializing necessary?
815    if (!m_Initialized) {
816      // do we have a file to initialize with?
817      if ((getInitFile() != null) && getInitFile().isFile()) {
818        DataSource source = new DataSource(getInitFile().getAbsolutePath());
819        Instances data = source.getDataSet();
820        m_InitFileClassIndex.setUpper(data.numAttributes() - 1);
821        data.setClassIndex(m_InitFileClassIndex.getIndex());
822        initFilter(data);
823      }
824      else {
825        initFilter(instances);
826      }
827    }
828
829    // apply filters
830    if (m_Missing != null)
831      instances = Filter.useFilter(instances, m_Missing); 
832    if (m_NominalToBinary != null)
833      instances = Filter.useFilter(instances, m_NominalToBinary); 
834    if (m_ActualFilter != null)
835      instances = Filter.useFilter(instances, m_ActualFilter);
836
837    // backup class attribute and remove it
838    double[] classes = instances.attributeToDoubleArray(instances.classIndex());
839    int classIndex = instances.classIndex();
840    instances.setClassIndex(-1);
841    instances.deleteAttributeAt(classIndex);
842
843    // generate new header
844    FastVector atts = new FastVector();
845    for (int j = 0; j < m_NumTrainInstances; j++)
846      atts.addElement(new Attribute("Kernel " + j));
847    atts.addElement(new Attribute("Class"));
848    Instances result = new Instances("Kernel", atts, 0);
849    result.setClassIndex(result.numAttributes() - 1);
850
851    // compute matrix
852    for (int i = 0; i < instances.numInstances(); i++) {
853      double[] k = new double[m_NumTrainInstances + 1];
854     
855      for (int j = 0; j < m_NumTrainInstances; j++) {
856        double v = m_ActualKernel.eval(-1, j, instances.instance(i));
857        k[j] = v;
858      }
859      k[k.length - 1] = classes[i];
860
861      // create new instance
862      Instance in = new DenseInstance(1.0, k);
863      result.add(in);   
864    }
865
866    if (!isFirstBatchDone())
867      setOutputFormat(result);
868   
869    return result;
870  }
871 
872  /**
873   * Returns the revision string.
874   *
875   * @return            the revision
876   */
877  public String getRevision() {
878    return RevisionUtils.extract("$Revision: 5987 $");
879  }
880
881  /**
882   * runs the filter with the given arguments
883   *
884   * @param args      the commandline arguments
885   */
886  public static void main(String[] args) {
887    runFilter(new KernelFilter(), args);
888  }
889}
Note: See TracBrowser for help on using the repository browser.