source: src/main/java/weka/classifiers/mi/CitationKNN.java @ 24

Last change on this file since 24 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 33.3 KB
RevLine 
[4]1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 * CitationKNN.java
19 * Copyright (C) 2005 Miguel Garcia Torres
20 */
21
22package weka.classifiers.mi;
23
24import weka.classifiers.Classifier;
25import weka.classifiers.AbstractClassifier;
26import weka.core.Capabilities;
27import weka.core.Instance;
28import weka.core.Instances;
29import weka.core.MultiInstanceCapabilitiesHandler;
30import weka.core.Option;
31import weka.core.OptionHandler;
32import weka.core.RevisionHandler;
33import weka.core.RevisionUtils;
34import weka.core.TechnicalInformation;
35import weka.core.TechnicalInformationHandler;
36import weka.core.Utils;
37import weka.core.Capabilities.Capability;
38import weka.core.TechnicalInformation.Field;
39import weka.core.TechnicalInformation.Type;
40
41import java.io.Serializable;
42import java.util.Enumeration;
43import java.util.Vector;
44/**
45 <!-- globalinfo-start -->
46 * Modified version of the Citation kNN multi instance classifier.<br/>
47 * <br/>
48 * For more information see:<br/>
49 * <br/>
50 * Jun Wang, Zucker, Jean-Daniel: Solving Multiple-Instance Problem: A Lazy Learning Approach. In: 17th International Conference on Machine Learning, 1119-1125, 2000.
51 * <p/>
52 <!-- globalinfo-end -->
53 *
54 <!-- technical-bibtex-start -->
55 * BibTeX:
56 * <pre>
57 * &#64;inproceedings{Wang2000,
58 *    author = {Jun Wang and Zucker and Jean-Daniel},
59 *    booktitle = {17th International Conference on Machine Learning},
60 *    editor = {Pat Langley},
61 *    pages = {1119-1125},
62 *    title = {Solving Multiple-Instance Problem: A Lazy Learning Approach},
63 *    year = {2000}
64 * }
65 * </pre>
66 * <p/>
67 <!-- technical-bibtex-end -->
68 *
69 <!-- options-start -->
70 * Valid options are: <p/>
71 *
72 * <pre> -R &lt;number of references&gt;
73 *  Number of Nearest References (default 1)</pre>
74 *
75 * <pre> -C &lt;number of citers&gt;
76 *  Number of Nearest Citers (default 1)</pre>
77 *
78 * <pre> -H &lt;rank&gt;
79 *  Rank of the Hausdorff Distance (default 1)</pre>
80 *
81 <!-- options-end -->
82 *
83 * @author Miguel Garcia Torres (mgarciat@ull.es)
84 * @version $Revision: 5928 $
85 */
86public class CitationKNN 
87  extends AbstractClassifier
88  implements OptionHandler, MultiInstanceCapabilitiesHandler,
89             TechnicalInformationHandler {
90
91  /** for serialization */
92  static final long serialVersionUID = -8435377743874094852L;
93 
94  /** The index of the class attribute */
95  protected int m_ClassIndex;
96
97  /** The number of the class labels */
98  protected int m_NumClasses;
99
100  /** */
101  protected int m_IdIndex;   
102
103  /** Debugging output */
104  protected boolean m_Debug;
105
106  /** Class labels for each bag */
107  protected int[] m_Classes;
108
109  /** attribute name structure of the relational attribute*/
110  protected Instances m_Attributes;
111
112  /** Number of references */
113  protected int m_NumReferences = 1;
114
115  /** Number of citers*/
116  protected int m_NumCiters = 1;
117
118  /** Training bags*/
119  protected Instances m_TrainBags;
120
121  /** Different debugging output */
122  protected boolean m_CNNDebug = false;
123
124  protected boolean m_CitersDebug = false;
125
126  protected boolean m_ReferencesDebug = false;
127
128  protected boolean m_HDistanceDebug = false;
129
130  protected boolean m_NeighborListDebug = false;
131
132  /** C nearest neighbors considering all the bags*/
133  protected NeighborList[] m_CNN;
134
135  /** C nearest citers */
136  protected int[] m_Citers;
137
138  /** R nearest references */
139  protected int[] m_References;
140
141  /** Rank associated to the Hausdorff distance*/
142  protected int m_HDRank = 1;
143
144  /** Normalization of the euclidean distance */
145  private double[] m_Diffs;
146
147  private double[] m_Min;
148
149  private double m_MinNorm = 0.95;
150
151  private double[] m_Max;
152
153  private double m_MaxNorm = 1.05;
154
155  /**
156   * Returns a string describing this filter
157   *
158   * @return a description of the filter suitable for
159   * displaying in the explorer/experimenter gui
160   */
161  public String globalInfo() {
162    return 
163        "Modified version of the Citation kNN multi instance classifier.\n\n"
164      + "For more information see:\n\n"
165      + getTechnicalInformation().toString();
166  }
167
168  /**
169   * Returns an instance of a TechnicalInformation object, containing
170   * detailed information about the technical background of this class,
171   * e.g., paper reference or book this class is based on.
172   *
173   * @return the technical information about this class
174   */
175  public TechnicalInformation getTechnicalInformation() {
176    TechnicalInformation        result;
177   
178    result = new TechnicalInformation(Type.INPROCEEDINGS);
179    result.setValue(Field.AUTHOR, "Jun Wang and Zucker and Jean-Daniel");
180    result.setValue(Field.TITLE, "Solving Multiple-Instance Problem: A Lazy Learning Approach");
181    result.setValue(Field.BOOKTITLE, "17th International Conference on Machine Learning");
182    result.setValue(Field.EDITOR, "Pat Langley");
183    result.setValue(Field.YEAR, "2000");
184    result.setValue(Field.PAGES, "1119-1125");
185   
186    return result;
187  }
188
189  /**
190   * Calculates the normalization of each attribute.
191   */
192  public void preprocessData(){
193    int i,j, k;
194    double min, max;
195    Instances instances;
196    Instance instance;
197    // compute the min/max of each feature
198
199    for (i=0;i<m_Attributes.numAttributes();i++) {
200      min=Double.POSITIVE_INFINITY ;
201      max=Double.NEGATIVE_INFINITY ;
202      for(j = 0; j < m_TrainBags.numInstances(); j++){
203        instances = m_TrainBags.instance(j).relationalValue(1);
204        for (k=0;k<instances.numInstances();k++) {
205          instance = instances.instance(k);
206          if(instance.value(i) < min)
207            min= instance.value(i);
208          if(instance.value(i) > max)
209            max= instance.value(i);
210        }
211      }
212      m_Min[i] = min * m_MinNorm;
213      m_Max[i] = max * m_MaxNorm;
214      m_Diffs[i]= max * m_MaxNorm - min * m_MinNorm;
215    }       
216
217  }
218
219  /**
220   * Returns the tip text for this property
221   *
222   * @return tip text for this property suitable for
223   * displaying in the explorer/experimenter gui
224   */
225  public String HDRankTipText() {
226    return "The rank associated to the Hausdorff distance.";
227  }
228
229  /**
230   * Sets the rank associated to the Hausdorff distance
231   * @param hDRank the rank of the Hausdorff distance
232   */
233  public void setHDRank(int hDRank){
234    m_HDRank = hDRank;
235  }
236
237  /**
238   * Returns the rank associated to the Hausdorff distance
239   * @return the rank number
240   */
241  public int getHDRank(){
242    return m_HDRank;
243  }
244
245  /**
246   * Returns the tip text for this property
247   *
248   * @return tip text for this property suitable for
249   * displaying in the explorer/experimenter gui
250   */
251  public String numReferencesTipText() {
252    return 
253        "The number of references considered to estimate the class "
254      + "prediction of tests bags.";
255  }
256
257  /**
258   * Sets the number of references considered to estimate
259   * the class prediction of tests bags
260   * @param numReferences the number of references
261   */
262  public void setNumReferences(int numReferences){
263    m_NumReferences = numReferences;
264  }
265
266  /**
267   * Returns the number of references considered to estimate
268   * the class prediction of tests bags
269   * @return the number of references
270   */
271  public int getNumReferences(){
272    return m_NumReferences;
273  }
274
275  /**
276   * Returns the tip text for this property
277   *
278   * @return tip text for this property suitable for
279   * displaying in the explorer/experimenter gui
280   */
281  public String numCitersTipText() {
282    return 
283        "The number of citers considered to estimate the class "
284      + "prediction of test bags.";
285  }
286
287  /**
288   * Sets the number of citers considered to estimate
289   * the class prediction of tests bags
290   * @param numCiters the number of citers
291   */
292  public void setNumCiters(int numCiters){
293    m_NumCiters = numCiters;
294  }
295
296  /**
297   * Returns the number of citers considered to estimate
298   * the class prediction of tests bags
299   * @return the number of citers
300   */
301  public int getNumCiters(){
302    return m_NumCiters;
303  }
304
305  /**
306   * Returns default capabilities of the classifier.
307   *
308   * @return      the capabilities of this classifier
309   */
310  public Capabilities getCapabilities() {
311    Capabilities result = super.getCapabilities();
312    result.disableAll();
313
314    // attributes
315    result.enable(Capability.NOMINAL_ATTRIBUTES);
316    result.enable(Capability.NUMERIC_ATTRIBUTES);
317    result.enable(Capability.DATE_ATTRIBUTES);
318    result.enable(Capability.RELATIONAL_ATTRIBUTES);
319    result.enable(Capability.MISSING_VALUES);
320
321    // class
322    result.enable(Capability.NOMINAL_CLASS);
323    result.enable(Capability.MISSING_CLASS_VALUES);
324   
325    // other
326    result.enable(Capability.ONLY_MULTIINSTANCE);
327   
328    return result;
329  }
330
331  /**
332   * Returns the capabilities of this multi-instance classifier for the
333   * relational data.
334   *
335   * @return            the capabilities of this object
336   * @see               Capabilities
337   */
338  public Capabilities getMultiInstanceCapabilities() {
339    Capabilities result = super.getCapabilities();
340    result.disableAll();
341   
342    // attributes
343    result.enable(Capability.NOMINAL_ATTRIBUTES);
344    result.enable(Capability.NUMERIC_ATTRIBUTES);
345    result.enable(Capability.DATE_ATTRIBUTES);
346    result.enable(Capability.MISSING_VALUES);
347
348    // class
349    result.disableAllClasses();
350    result.enable(Capability.NO_CLASS);
351   
352    return result;
353  }
354
355  /**
356   * Builds the classifier
357   *
358   * @param train the training data to be used for generating the
359   * boosted classifier.
360   * @throws Exception if the classifier could not be built successfully
361   */
362  public void buildClassifier(Instances train) throws Exception {
363    // can classifier handle the data?
364    getCapabilities().testWithFail(train);
365
366    // remove instances with missing class
367    train = new Instances(train);
368    train.deleteWithMissingClass();
369   
370    m_TrainBags = train;
371    m_ClassIndex = train.classIndex();
372    m_IdIndex = 0;
373    m_NumClasses = train.numClasses();
374
375    m_Classes  = new int [train.numInstances()]; // Class values
376    m_Attributes = train.instance(0).relationalValue(1).stringFreeStructure();
377
378    m_Citers = new int[train.numClasses()];
379    m_References = new int[train.numClasses()];
380
381    m_Diffs = new double[m_Attributes.numAttributes()];
382    m_Min = new double[m_Attributes.numAttributes()];
383    m_Max = new double[m_Attributes.numAttributes()];   
384
385    preprocessData();
386
387    buildCNN();
388
389    if(m_CNNDebug){
390      System.out.println("########################################### ");
391      System.out.println("###########CITATION######################## ");
392      System.out.println("########################################### ");
393      for(int i = 0; i < m_CNN.length; i++){
394        System.out.println("Bag: " + i);
395        m_CNN[i].printReducedList();
396      }
397    }           
398  }
399
400  /**
401   * generates all the variables associated to the citation
402   * classifier
403   *
404   * @throws Exception if generation fails
405   */
406  public void buildCNN() throws Exception {
407
408    int numCiters = 0;
409
410    if((m_NumCiters >= m_TrainBags.numInstances()) ||
411        (m_NumCiters < 0))
412      throw new Exception("Number of citers is out of the range [0, numInstances)");
413    else
414      numCiters = m_NumCiters;
415
416    m_CNN = new NeighborList[m_TrainBags.numInstances()]; 
417    Instance bag;
418
419    for(int i = 0; i< m_TrainBags.numInstances(); i++){
420      bag = m_TrainBags.instance(i);
421      //first we find its neighbors
422      NeighborList neighborList = findNeighbors(bag, numCiters, m_TrainBags);
423      m_CNN[i] = neighborList;
424    }
425  }
426
427  /**
428   * calculates the citers associated to a bag
429   * @param bag the bag cited
430   */
431  public void countBagCiters(Instance bag){
432
433    //Initialization of the vector
434    for(int i = 0; i < m_TrainBags.numClasses(); i++)
435      m_Citers[i] = 0;
436    //
437    if(m_CitersDebug == true)
438      System.out.println("-------CITERS--------");
439
440    NeighborList neighborList;
441    NeighborNode current;
442    boolean stopSearch = false;
443    int index;
444
445    // compute the distance between the test bag and each training bag. Update
446    // the bagCiter count in case it be a neighbour
447
448    double bagDistance = 0;
449    for(int i = 0; i < m_TrainBags.numInstances(); i++){
450      //measure the distance
451      bagDistance =  distanceSet(bag, m_TrainBags.instance(i));
452      if(m_CitersDebug == true){
453        System.out.print("bag - bag(" + i + "): " + bagDistance);
454        System.out.println("   <" + m_TrainBags.instance(i).classValue() + ">");
455      }
456      //compare the distance to see if it would belong to the
457      // neighborhood of each training exemplar
458      neighborList = m_CNN[i];
459      current = neighborList.mFirst;
460
461      while((current != null) && (!stopSearch)) {
462        if(m_CitersDebug == true)
463          System.out.println("\t\tciter Distance: " + current.mDistance);
464        if(current.mDistance < bagDistance){
465          current = current.mNext;
466        } else{ 
467          stopSearch = true;               
468          if(m_CitersDebug == true){
469            System.out.println("\t***");
470          }
471        }
472      } 
473
474      if(stopSearch == true){
475        stopSearch = false;
476        index = (int)(m_TrainBags.instance(i)).classValue();
477        m_Citers[index] += 1;
478      }
479
480    }
481
482    if(m_CitersDebug == true){
483      for(int i= 0; i < m_Citers.length; i++){
484        System.out.println("[" + i + "]: " + m_Citers[i]);
485      }
486    }
487
488  }
489
490  /**
491   * Calculates the references of the exemplar bag
492   * @param bag the exemplar to which the nearest references
493   * will be calculated
494   */
495  public void countBagReferences(Instance bag){
496    int index = 0, referencesIndex = 0;
497
498    if(m_TrainBags.numInstances() < m_NumReferences)
499      referencesIndex = m_TrainBags.numInstances() - 1;
500    else
501      referencesIndex = m_NumReferences;
502
503    if(m_CitersDebug == true){
504      System.out.println("-------References (" + referencesIndex+ ")--------");
505    }
506    //Initialization of the vector
507    for(int i = 0; i < m_References.length; i++)
508      m_References[i] = 0;
509
510    if(referencesIndex > 0){
511      //first we find its neighbors
512      NeighborList neighborList = findNeighbors(bag, referencesIndex, m_TrainBags);
513      if(m_ReferencesDebug == true){
514        System.out.println("Bag: " + bag + " Neighbors: ");
515        neighborList.printReducedList();
516      }
517      NeighborNode current = neighborList.mFirst;
518      while(current != null){
519        index = (int) current.mBag.classValue();
520        m_References[index] += 1;
521        current = current.mNext;
522      }
523    }
524    if(m_ReferencesDebug == true){
525      System.out.println("References:");
526      for(int j = 0; j < m_References.length; j++)
527        System.out.println("[" + j + "]: " + m_References[j]);
528    }
529  }
530
531  /**
532   * Build the list of nearest k neighbors to the given test instance.
533   * @param bag the bag to search for neighbors of
534   * @param kNN the number of nearest neighbors
535   * @param bags the data
536   * @return a list of neighbors
537   */
538  protected NeighborList findNeighbors(Instance bag, int kNN, Instances bags){
539    double distance;
540    int index = 0;
541
542    if(kNN > bags.numInstances())
543      kNN = bags.numInstances() - 1;
544
545    NeighborList neighborList = new NeighborList(kNN);
546    for(int i = 0; i < bags.numInstances(); i++){
547      if(bag != bags.instance(i)){ // for hold-one-out cross-validation
548        distance =  distanceSet(bag, bags.instance(i)) ; //mDistanceSet.distance(bag, mInstances, bags.exemplar(i), mInstances);
549        if(m_NeighborListDebug)
550          System.out.println("distance(bag, " + i + "): " + distance);
551        if(neighborList.isEmpty() || (index < kNN) || (distance <= neighborList.mLast.mDistance))
552          neighborList.insertSorted(distance, bags.instance(i), i);
553        index++;
554      } 
555    }
556
557    if(m_NeighborListDebug){
558      System.out.println("bag neighbors:");
559      neighborList.printReducedList();
560    }
561
562    return neighborList;
563  }
564
565  /**
566   * Calculates the distance between two instances
567   * @param first instance
568   * @param second instance
569   * @return the distance value
570   */
571  public double distanceSet(Instance first, Instance second){
572    double[] h_f = new double[first.relationalValue(1).numInstances()];
573    double distance;
574
575    //initilization
576    for(int i = 0; i < h_f.length; i++)
577      h_f[i] = Double.MAX_VALUE;
578
579
580    int rank;
581
582
583    if(m_HDRank >= first.relationalValue(1).numInstances())
584      rank = first.relationalValue(1).numInstances();
585    else if(m_HDRank < 1)
586      rank = 1;
587    else 
588      rank = m_HDRank;
589
590    if(m_HDistanceDebug){
591      System.out.println("-------HAUSDORFF DISTANCE--------");
592      System.out.println("rank: " + rank + "\nset of instances:");
593      System.out.println("\tset 1:");
594      for(int i = 0; i < first.relationalValue(1).numInstances(); i++)
595        System.out.println(first.relationalValue(1).instance(i));
596
597      System.out.println("\n\tset 2:");
598      for(int i = 0; i < second.relationalValue(1).numInstances(); i++)
599        System.out.println(second.relationalValue(1).instance(i));
600
601      System.out.println("\n");
602    }
603
604    //for each instance in bag first
605    for(int i = 0; i < first.relationalValue(1).numInstances(); i++){
606      // calculate the distance to each instance in
607      // bag second
608      if(m_HDistanceDebug){
609        System.out.println("\nDistances:");
610      }
611      for(int j = 0; j < second.relationalValue(1).numInstances(); j++){
612        distance = distance(first.relationalValue(1).instance(i), second.relationalValue(1).instance(j));
613        if(distance < h_f[i])
614          h_f[i] = distance;
615        if(m_HDistanceDebug){
616          System.out.println("\tdist(" + i + ", "+ j + "): " + distance + "  --> h_f[" + i + "]: " + h_f[i]);
617        }
618      }
619    }
620    int[] index_f = Utils.stableSort(h_f);
621
622    if(m_HDistanceDebug){
623      System.out.println("\nRanks:\n");
624      for(int i = 0; i < index_f.length; i++)
625        System.out.println("\trank " + (i + 1) + ": " + h_f[index_f[i]]);
626
627      System.out.println("\n\t\t>>>>> rank " + rank + ": " + h_f[index_f[rank - 1]] + " <<<<<");
628    }
629
630    return h_f[index_f[rank - 1]];
631  }
632
633  /**
634   * distance between two instances
635   * @param first the first instance
636   * @param second the other instance
637   * @return the distance in double precision
638   */
639  public double distance(Instance first, Instance second){
640
641    double sum = 0, diff;
642    for(int i = 0; i < m_Attributes.numAttributes(); i++){
643      diff = (first.value(i) - m_Min[i])/ m_Diffs[i] - 
644        (second.value(i) - m_Min[i])/ m_Diffs[i];
645      sum += diff * diff;
646    }
647    return sum = Math.sqrt(sum);
648  }
649
650  /**
651   * Computes the distribution for a given exemplar
652   *
653   * @param bag the exemplar for which distribution is computed
654   * @return the distribution
655   * @throws Exception if the distribution can't be computed successfully
656   */
657  public double[] distributionForInstance(Instance bag) 
658    throws Exception {
659
660    if(m_TrainBags.numInstances() == 0)
661      throw new Exception("No training bags!");
662
663    updateNormalization(bag);
664
665    //build references (R nearest neighbors)
666    countBagReferences(bag);
667
668    //build citers
669    countBagCiters(bag);
670
671    return makeDistribution();
672  }
673
674  /**
675   * Updates the normalization of each attribute.
676   *
677   * @param bag the exemplar to update the normalization for
678   */
679  public void updateNormalization(Instance bag){
680    int i, k;
681    double min, max;
682    Instances instances;
683    Instance instance;
684    // compute the min/max of each feature
685    for (i = 0; i < m_TrainBags.attribute(1).relation().numAttributes(); i++) {
686      min = m_Min[i] / m_MinNorm;
687      max = m_Max[i] / m_MaxNorm;
688
689      instances = bag.relationalValue(1);
690      for (k=0;k<instances.numInstances();k++) {
691        instance = instances.instance(k);
692        if(instance.value(i) < min)
693          min = instance.value(i);
694        if(instance.value(i) > max)
695          max = instance.value(i);
696      }
697      m_Min[i] = min * m_MinNorm;
698      m_Max[i] = max * m_MaxNorm;
699      m_Diffs[i]= max * m_MaxNorm - min * m_MinNorm;
700    }
701  }
702
703  /**
704   * Wether the instances of two exemplars are or  are not equal
705   * @param exemplar1 first exemplar
706   * @param exemplar2 second exemplar
707   * @return if the instances of the exemplars are equal or not
708   */
709  public boolean equalExemplars(Instance exemplar1, Instance exemplar2){
710    if(exemplar1.relationalValue(1).numInstances() == 
711        exemplar2.relationalValue(1).numInstances()){
712      Instances instances1 = exemplar1.relationalValue(1);
713      Instances instances2 = exemplar2.relationalValue(1);
714      for(int i = 0; i < instances1.numInstances(); i++){
715        Instance instance1 = instances1.instance(i);
716        Instance instance2 = instances2.instance(i);
717        for(int j = 0; j < instance1.numAttributes(); j++){
718          if(instance1.value(j) != instance2.value(j)){
719            return false;
720          }
721        }
722      }
723      return true;
724        }
725    return false;
726  }
727
728  /**
729   * Turn the references and citers list into a probability distribution
730   *
731   * @return the probability distribution
732   * @throws Exception if computation of distribution fails
733   */
734  protected double[] makeDistribution() throws Exception {
735   
736    double total = 0;
737    double[] distribution = new double[m_TrainBags.numClasses()];
738    boolean debug = false;
739
740    total = (double)m_TrainBags.numClasses() / Math.max(1, m_TrainBags.numInstances());
741
742    for(int i = 0; i < m_TrainBags.numClasses(); i++){
743      distribution[i] = 1.0 / Math.max(1, m_TrainBags.numInstances());
744      if(debug) System.out.println("distribution[" + i + "]: " + distribution[i]);
745    }
746
747    if(debug)System.out.println("total: " + total);
748
749    for(int i = 0; i < m_TrainBags.numClasses(); i++){
750      distribution[i] += m_References[i];
751      distribution[i] += m_Citers[i];
752    }
753
754    total = 0;
755    //total
756    for(int i = 0; i < m_TrainBags.numClasses(); i++){
757      total += distribution[i];
758      if(debug)System.out.println("distribution[" + i + "]: " + distribution[i]);
759    }
760
761    for(int i = 0; i < m_TrainBags.numClasses(); i++){
762      distribution[i] = distribution[i] / total;
763      if(debug)System.out.println("distribution[" + i + "]: " + distribution[i]);
764
765    }
766
767    return distribution;
768  }
769
770  /**
771   * Returns an enumeration of all the available options..
772   *
773   * @return an enumeration of all available options.
774   */
775  public Enumeration listOptions(){
776    Vector result = new Vector();
777
778    result.addElement(new Option(
779          "\tNumber of Nearest References (default 1)",
780          "R", 0, "-R <number of references>"));
781   
782    result.addElement(new Option(
783          "\tNumber of Nearest Citers (default 1)",
784          "C", 0, "-C <number of citers>"));
785   
786    result.addElement(new Option(
787          "\tRank of the Hausdorff Distance (default 1)",
788          "H", 0, "-H <rank>"));
789
790    return result.elements();
791  }
792
793  /**
794   * Sets the OptionHandler's options using the given list. All options
795   * will be set (or reset) during this call (i.e. incremental setting
796   * of options is not possible). <p/>
797   *
798   <!-- options-start -->
799   * Valid options are: <p/>
800   *
801   * <pre> -R &lt;number of references&gt;
802   *  Number of Nearest References (default 1)</pre>
803   *
804   * <pre> -C &lt;number of citers&gt;
805   *  Number of Nearest Citers (default 1)</pre>
806   *
807   * <pre> -H &lt;rank&gt;
808   *  Rank of the Hausdorff Distance (default 1)</pre>
809   *
810   <!-- options-end -->
811   *
812   * @param options the list of options as an array of strings
813   * @throws Exception if an option is not supported
814   */
815  public void setOptions(String[] options) throws Exception{
816    setDebug(Utils.getFlag('D', options));
817
818    String option = Utils.getOption('R', options);
819    if(option.length() != 0)
820      setNumReferences(Integer.parseInt(option));
821    else
822      setNumReferences(1);
823
824    option = Utils.getOption('C', options);
825    if(option.length() != 0)
826      setNumCiters(Integer.parseInt(option));
827    else
828      setNumCiters(1);
829
830    option = Utils.getOption('H', options);
831    if(option.length() != 0)
832      setHDRank(Integer.parseInt(option));
833    else
834      setHDRank(1);
835  }
836  /**
837   * Gets the current option settings for the OptionHandler.
838   *
839   * @return the list of current option settings as an array of strings
840   */
841  public String[] getOptions() {
842    Vector        result;
843   
844    result = new Vector();
845
846    if (getDebug())
847      result.add("-D");
848   
849    result.add("-R");
850    result.add("" + getNumReferences());
851   
852    result.add("-C");
853    result.add("" + getNumCiters());
854   
855    result.add("-H");
856    result.add("" + getHDRank());
857
858    return (String[]) result.toArray(new String[result.size()]);
859  }
860
861  /**
862   * returns a string representation of the classifier
863   *
864   * @return            the string representation
865   */
866  public String toString() {
867    StringBuffer        result;
868    int                 i;
869   
870    result = new StringBuffer();
871   
872    // title
873    result.append(this.getClass().getName().replaceAll(".*\\.", "") + "\n");
874    result.append(this.getClass().getName().replaceAll(".*\\.", "").replaceAll(".", "=") + "\n\n");
875
876    if (m_Citers == null) {
877      result.append("no model built yet!\n");
878    }
879    else {
880      // internal representation
881      result.append("Citers....: " + Utils.arrayToString(m_Citers) + "\n");
882
883      result.append("References: " + Utils.arrayToString(m_References) + "\n");
884
885      result.append("Min.......: ");
886      for (i = 0; i < m_Min.length; i++) {
887        if (i > 0)
888          result.append(",");
889        result.append(Utils.doubleToString(m_Min[i], 3));
890      }
891      result.append("\n");
892
893      result.append("Max.......: ");
894      for (i = 0; i < m_Max.length; i++) {
895        if (i > 0)
896          result.append(",");
897        result.append(Utils.doubleToString(m_Max[i], 3));
898      }
899      result.append("\n");
900
901      result.append("Diffs.....: ");
902      for (i = 0; i < m_Diffs.length; i++) {
903        if (i > 0)
904          result.append(",");
905        result.append(Utils.doubleToString(m_Diffs[i], 3));
906      }
907      result.append("\n");
908    }
909   
910    return result.toString();
911  }
912 
913  /**
914   * Returns the revision string.
915   *
916   * @return            the revision
917   */
918  public String getRevision() {
919    return RevisionUtils.extract("$Revision: 5928 $");
920  }
921 
922  /**
923   * Main method for testing this class.
924   *
925   * @param argv should contain the command line arguments to the
926   * scheme (see Evaluation)
927   */
928  public static void main(String[] argv) {
929    runClassifier(new CitationKNN(), argv);
930  }
931
932  //########################################################################
933  //########################################################################
934  //########################################################################
935  //########################################################################
936  //########################################################################
937
938  /**
939   * A class for storing data about a neighboring instance
940   */
941  private class NeighborNode 
942    implements Serializable, RevisionHandler {
943
944    /** for serialization */
945    static final long serialVersionUID = -3947320761906511289L;
946   
947    /** The neighbor bag */
948    private Instance mBag;
949
950    /** The distance from the current instance to this neighbor */
951    private double mDistance;
952
953    /** A link to the next neighbor instance */
954    private NeighborNode mNext;
955
956    /** the position in the bag */
957    private int mBagPosition;   
958   
959    /**
960     * Create a new neighbor node.
961     *
962     * @param distance the distance to the neighbor
963     * @param bag the bag instance
964     * @param position the position in the bag
965     * @param next the next neighbor node
966     */
967    public NeighborNode(double distance, Instance bag, int position, NeighborNode next){
968      mDistance = distance;
969      mBag = bag;
970      mNext = next;
971      mBagPosition = position;
972    }
973
974    /**
975     * Create a new neighbor node that doesn't link to any other nodes.
976     *
977     * @param distance the distance to the neighbor
978     * @param bag the neighbor instance
979     * @param position the position in the bag
980     */
981    public NeighborNode(double distance, Instance bag, int position) {
982      this(distance, bag, position, null);
983    }
984   
985    /**
986     * Returns the revision string.
987     *
988     * @return          the revision
989     */
990    public String getRevision() {
991      return RevisionUtils.extract("$Revision: 5928 $");
992    }
993  }
994 
995  //##################################################
996  /**
997   * A class for a linked list to store the nearest k neighbours
998   * to an instance. We use a list so that we can take care of
999   * cases where multiple neighbours are the same distance away.
1000   * i.e. the minimum length of the list is k.
1001   */
1002  private class NeighborList 
1003    implements Serializable, RevisionHandler {
1004   
1005    /** for serialization */
1006    static final long serialVersionUID = 3432555644456217394L;
1007   
1008    /** The first node in the list */
1009    private NeighborNode mFirst;
1010    /** The last node in the list */
1011    private NeighborNode mLast;
1012
1013    /** The number of nodes to attempt to maintain in the list */
1014    private int mLength = 1;
1015
1016    /**
1017     * Creates the neighborlist with a desired length
1018     *
1019     * @param length the length of list to attempt to maintain
1020     */
1021    public NeighborList(int length) {
1022      mLength = length;
1023    }
1024    /**
1025     * Gets whether the list is empty.
1026     *
1027     * @return true if so
1028     */
1029    public boolean isEmpty() {
1030      return (mFirst == null);
1031    }
1032    /**
1033     * Gets the current length of the list.
1034     *
1035     * @return the current length of the list
1036     */
1037    public int currentLength() {
1038
1039      int i = 0;
1040      NeighborNode current = mFirst;
1041      while (current != null) {
1042        i++;
1043        current = current.mNext;
1044      }
1045      return i;
1046    }
1047
1048    /**
1049     * Inserts an instance neighbor into the list, maintaining the list
1050     * sorted by distance.
1051     *
1052     * @param distance the distance to the instance
1053     * @param bag the neighboring instance
1054     * @param position the position in the bag
1055     */
1056    public void insertSorted(double distance, Instance bag, int position) {
1057
1058      if (isEmpty()) {
1059        mFirst = mLast = new NeighborNode(distance, bag, position);
1060      } else {
1061        NeighborNode current = mFirst;
1062        if (distance < mFirst.mDistance) {// Insert at head
1063          mFirst = new NeighborNode(distance, bag, position, mFirst);
1064        } else { // Insert further down the list
1065          for( ;(current.mNext != null) && 
1066              (current.mNext.mDistance < distance); 
1067              current = current.mNext);
1068          current.mNext = new NeighborNode(distance, bag, position, current.mNext);
1069          if (current.equals(mLast)) {
1070            mLast = current.mNext;
1071          }
1072        }
1073
1074        // Trip down the list until we've got k list elements (or more if the
1075        // distance to the last elements is the same).
1076        int valcount = 0;
1077        for(current = mFirst; current.mNext != null; 
1078            current = current.mNext) {
1079          valcount++;
1080          if ((valcount >= mLength) && (current.mDistance != 
1081                current.mNext.mDistance)) {
1082            mLast = current;
1083            current.mNext = null;
1084            break;
1085                }
1086            }
1087      }
1088    }
1089
1090    /**
1091     * Prunes the list to contain the k nearest neighbors. If there are
1092     * multiple neighbors at the k'th distance, all will be kept.
1093     *
1094     * @param k the number of neighbors to keep in the list.
1095     */
1096    public void pruneToK(int k) {
1097      if (isEmpty())
1098        return;
1099      if (k < 1)
1100        k = 1;
1101
1102      int currentK = 0;
1103      double currentDist = mFirst.mDistance;
1104      NeighborNode current = mFirst;
1105      for(; current.mNext != null; current = current.mNext) {
1106        currentK++;
1107        currentDist = current.mDistance;
1108        if ((currentK >= k) && (currentDist != current.mNext.mDistance)) {
1109          mLast = current;
1110          current.mNext = null;
1111          break;
1112        }
1113      }
1114    }
1115
1116    /**
1117     * Prints out the contents of the neighborlist
1118     */
1119    public void printList() {
1120
1121      if (isEmpty()) {
1122        System.out.println("Empty list");
1123      } else {
1124        NeighborNode current = mFirst;
1125        while (current != null) {
1126          System.out.print("Node: instance " + current.mBagPosition + "\n");
1127          System.out.println(current.mBag);
1128          System.out.println(", distance " + current.mDistance);
1129          current = current.mNext;
1130        }
1131        System.out.println();
1132      }
1133    }
1134    /**
1135     * Prints out the contents of the neighborlist
1136     */
1137    public void printReducedList() {
1138
1139      if (isEmpty()) {
1140        System.out.println("Empty list");
1141      } else {
1142        NeighborNode current = mFirst;
1143        while (current != null) {
1144          System.out.print("Node: bag " + current.mBagPosition + "  (" + current.mBag.relationalValue(1).numInstances() +"): ");
1145          //for(int i = 0; i < current.mBag.getInstances().numInstances(); i++){
1146          //System.out.print(" " + (current.mBag).getInstances().instance(i));
1147          //}
1148          System.out.print("   <" + current.mBag.classValue() + ">");
1149          System.out.println("  (d: " + current.mDistance + ")");
1150          current = current.mNext;
1151        }
1152        System.out.println();
1153      }
1154    }
1155   
1156    /**
1157     * Returns the revision string.
1158     *
1159     * @return          the revision
1160     */
1161    public String getRevision() {
1162      return RevisionUtils.extract("$Revision: 5928 $");
1163    }
1164  }
1165}
Note: See TracBrowser for help on using the repository browser.