source: src/main/java/weka/core/Instances.java @ 26

Last change on this file since 26 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 69.4 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    Instances.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.core;
24
25import weka.core.converters.ArffLoader.ArffReader;
26import weka.core.converters.ConverterUtils.DataSource;
27
28import java.io.FileReader;
29import java.io.IOException;
30import java.io.Reader;
31import java.io.Serializable;
32import java.util.Enumeration;
33import java.util.Random;
34import java.util.List;
35import java.util.AbstractList;
36import java.util.ArrayList;
37
38/**
39 * Class for handling an ordered set of weighted instances. <p>
40 *
41 * Typical usage: <p>
42 * <pre>
43 * import weka.core.converters.ConverterUtils.DataSource;
44 * ...
45 *
46 * // Read all the instances in the file (ARFF, CSV, XRFF, ...)
47 * DataSource source = new DataSource(filename);
48 * Instances instances = source.getDataSet();
49 *
50 * // Make the last attribute be the class
51 * instances.setClassIndex(instances.numAttributes() - 1);
52 *
53 * // Print header and instances.
54 * System.out.println("\nDataset:\n");
55 * System.out.println(instances);
56 *
57 * ...
58 * </pre><p>
59 *
60 * All methods that change a set of instances are safe, ie. a change
61 * of a set of instances does not affect any other sets of
62 * instances. All methods that change a datasets's attribute
63 * information clone the dataset before it is changed.
64 *
65 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
66 * @author Len Trigg (trigg@cs.waikato.ac.nz)
67 * @author FracPete (fracpete at waikato dot ac dot nz)
68 * @version $Revision: 5987 $
69 */
70public class Instances extends AbstractList<Instance>
71  implements Serializable, RevisionHandler {
72 
73  /** for serialization */
74  static final long serialVersionUID = -19412345060742748L;
75 
76  /** The filename extension that should be used for arff files */
77  public final static String FILE_EXTENSION = ".arff";
78
79  /** The filename extension that should be used for bin. serialized instances files */
80  public final static String SERIALIZED_OBJ_FILE_EXTENSION = ".bsi";
81
82  /** The keyword used to denote the start of an arff header */
83  public final static String ARFF_RELATION = "@relation";
84
85  /** The keyword used to denote the start of the arff data section */
86  public final static String ARFF_DATA = "@data";
87
88  /** The dataset's name. */
89  protected /*@spec_public non_null@*/ String m_RelationName;         
90
91  /** The attribute information. */
92  protected /*@spec_public non_null@*/ ArrayList<Attribute> m_Attributes;
93  /*  public invariant (\forall int i; 0 <= i && i < m_Attributes.size();
94                    m_Attributes.get(i) != null);
95  */
96
97  /** The instances. */
98  protected /*@spec_public non_null@*/ ArrayList<Instance> m_Instances;
99
100  /** The class attribute's index */
101  protected int m_ClassIndex;
102  //@ protected invariant classIndex() == m_ClassIndex;
103
104  /** The lines read so far in case of incremental loading. Since the
105   * StreamTokenizer will be re-initialized with every instance that is read,
106   * we have to keep track of the number of lines read so far.
107   * @see #readInstance(Reader) */
108  protected int m_Lines = 0;
109 
110  /**
111   * Reads an ARFF file from a reader, and assigns a weight of
112   * one to each instance. Lets the index of the class
113   * attribute be undefined (negative).
114   *
115   * @param reader the reader
116   * @throws IOException if the ARFF file is not read
117   * successfully
118   */
119  public Instances(/*@non_null@*/Reader reader) throws IOException {
120    ArffReader arff = new ArffReader(reader);
121    Instances dataset = arff.getData();
122    initialize(dataset, dataset.numInstances());
123    dataset.copyInstances(0, this, dataset.numInstances());
124    compactify();
125  }
126 
127  /**
128   * Reads the header of an ARFF file from a reader and
129   * reserves space for the given number of instances. Lets
130   * the class index be undefined (negative).
131   *
132   * @param reader the reader
133   * @param capacity the capacity
134   * @throws IllegalArgumentException if the header is not read successfully
135   * or the capacity is negative.
136   * @throws IOException if there is a problem with the reader.
137   * @deprecated instead of using this method in conjunction with the
138   * <code>readInstance(Reader)</code> method, one should use the
139   * <code>ArffLoader</code> or <code>DataSource</code> class instead.
140   * @see weka.core.converters.ArffLoader
141   * @see weka.core.converters.ConverterUtils.DataSource
142   */
143  //@ requires capacity >= 0;
144  //@ ensures classIndex() == -1;
145  @Deprecated public Instances(/*@non_null@*/Reader reader, int capacity)
146    throws IOException {
147
148    ArffReader arff = new ArffReader(reader, 0);
149    Instances header = arff.getStructure();
150    initialize(header, capacity);
151    m_Lines = arff.getLineNo();
152  }
153
154  /**
155   * Constructor copying all instances and references to
156   * the header information from the given set of instances.
157   *
158   * @param dataset the set to be copied
159   */
160  public Instances(/*@non_null@*/Instances dataset) {
161
162    this(dataset, dataset.numInstances());
163
164    dataset.copyInstances(0, this, dataset.numInstances());
165  }
166
167  /**
168   * Constructor creating an empty set of instances. Copies references
169   * to the header information from the given set of instances. Sets
170   * the capacity of the set of instances to 0 if its negative.
171   *
172   * @param dataset the instances from which the header
173   * information is to be taken
174   * @param capacity the capacity of the new dataset
175   */
176  public Instances(/*@non_null@*/Instances dataset, int capacity) {
177    initialize(dataset, capacity);
178  }
179
180  /**
181   * initializes with the header information of the given dataset and sets
182   * the capacity of the set of instances.
183   *
184   * @param dataset the dataset to use as template
185   * @param capacity the number of rows to reserve
186   */
187  protected void initialize(Instances dataset, int capacity) {
188    if (capacity < 0)
189      capacity = 0;
190   
191    // Strings only have to be "shallow" copied because
192    // they can't be modified.
193    m_ClassIndex   = dataset.m_ClassIndex;
194    m_RelationName = dataset.m_RelationName;
195    m_Attributes   = dataset.m_Attributes;
196    m_Instances    = new ArrayList<Instance>(capacity);
197  }
198 
199  /**
200   * Creates a new set of instances by copying a
201   * subset of another set.
202   *
203   * @param source the set of instances from which a subset
204   * is to be created
205   * @param first the index of the first instance to be copied
206   * @param toCopy the number of instances to be copied
207   * @throws IllegalArgumentException if first and toCopy are out of range
208   */
209  //@ requires 0 <= first;
210  //@ requires 0 <= toCopy;
211  //@ requires first + toCopy <= source.numInstances();
212  public Instances(/*@non_null@*/Instances source, int first, int toCopy) {
213   
214    this(source, toCopy);
215
216    if ((first < 0) || ((first + toCopy) > source.numInstances())) {
217      throw new IllegalArgumentException("Parameters first and/or toCopy out "+
218                                         "of range");
219    }
220    source.copyInstances(first, this, toCopy);
221  }
222
223  /**
224   * Creates an empty set of instances. Uses the given
225   * attribute information. Sets the capacity of the set of
226   * instances to 0 if its negative. Given attribute information
227   * must not be changed after this constructor has been used.
228   *
229   * @param name the name of the relation
230   * @param attInfo the attribute information
231   * @param capacity the capacity of the set
232   */
233  public Instances(/*@non_null@*/String name, 
234                   /*@non_null@*/ArrayList<Attribute> attInfo, int capacity) {
235
236    m_RelationName = name;
237    m_ClassIndex = -1;
238    m_Attributes = attInfo;
239    for (int i = 0; i < numAttributes(); i++) {
240      attribute(i).setIndex(i);
241    }
242    m_Instances = new ArrayList<Instance>(capacity);
243  }
244
245  /**
246   * Create a copy of the structure if the data has string or
247   * relational attributes, "cleanses" string types (i.e. doesn't
248   * contain references to the strings seen in the past) and all
249   * relational attributes.
250   *
251   * @return a copy of the instance structure.
252   */
253  public Instances stringFreeStructure() {
254
255    ArrayList<Attribute> newAtts = new ArrayList<Attribute>();
256    for (int i = 0 ; i < m_Attributes.size(); i++) {
257      Attribute att = (Attribute)m_Attributes.get(i);
258      if (att.type() == Attribute.STRING) {
259        newAtts.add(new Attribute(att.name(), (List<String>)null, i));
260      } else if (att.type() == Attribute.RELATIONAL) {
261        newAtts.add(new Attribute(att.name(), new Instances(att.relation(), 0), i));
262      }
263    }
264    if (newAtts.size() == 0) {
265      return new Instances(this, 0);
266    }
267    ArrayList<Attribute> atts = Utils.cast(m_Attributes.clone());
268    for (int i = 0; i < newAtts.size(); i++) {
269      atts.set(((Attribute)newAtts.get(i)).index(), newAtts.get(i));
270    }
271    Instances result = new Instances(this, 0);
272    result.m_Attributes = atts;
273    return result;
274  }
275
276  /**
277   * Adds one instance to the end of the set.
278   * Shallow copies instance before it is added. Increases the
279   * size of the dataset if it is not large enough. Does not
280   * check if the instance is compatible with the dataset.
281   * Note: String or relational values are not transferred.
282   *
283   * @param instance the instance to be added
284   */
285  public boolean add(/*@non_null@*/ Instance instance) {
286
287    Instance newInstance = (Instance)instance.copy();
288
289    newInstance.setDataset(this);
290    m_Instances.add(newInstance);
291
292    return true;
293  }
294
295  /**
296   * Adds one instance to the end of the set.
297   * Shallow copies instance before it is added. Increases the
298   * size of the dataset if it is not large enough. Does not
299   * check if the instance is compatible with the dataset.
300   * Note: String or relational values are not transferred.
301   *
302   * @param index position where instance is to be inserted
303   * @param instance the instance to be added
304   */
305  //@ requires 0 <= index;
306  //@ requires index < m_Instances.size();
307  public void add(int index, /*@non_null@*/ Instance instance) {
308
309    Instance newInstance = (Instance)instance.copy();
310
311    newInstance.setDataset(this);
312    m_Instances.add(index, newInstance);
313  }
314
315  /**
316   * Returns an attribute.
317   *
318   * @param index the attribute's index (index starts with 0)
319   * @return the attribute at the given position
320   */
321  //@ requires 0 <= index;
322  //@ requires index < m_Attributes.size();
323  //@ ensures \result != null;
324  public /*@pure@*/ Attribute attribute(int index) {
325   
326    return (Attribute) m_Attributes.get(index);
327  }
328
329  /**
330   * Returns an attribute given its name. If there is more than
331   * one attribute with the same name, it returns the first one.
332   * Returns null if the attribute can't be found.
333   *
334   * @param name the attribute's name
335   * @return the attribute with the given name, null if the
336   * attribute can't be found
337   */ 
338  public /*@pure@*/ Attribute attribute(String name) {
339   
340    for (int i = 0; i < numAttributes(); i++) {
341      if (attribute(i).name().equals(name)) {
342        return attribute(i);
343      }
344    }
345    return null;
346  }
347
348  /**
349   * Checks for attributes of the given type in the dataset
350   *
351   * @param attType  the attribute type to look for
352   * @return         true if attributes of the given type are present
353   */
354  public boolean checkForAttributeType(int attType) {
355   
356    int i = 0;
357   
358    while (i < m_Attributes.size()) {
359      if (attribute(i++).type() == attType) {
360        return true;
361      }
362    }
363    return false;
364  }
365
366  /**
367   * Checks for string attributes in the dataset
368   *
369   * @return true if string attributes are present, false otherwise
370   */
371  public /*@pure@*/ boolean checkForStringAttributes() {
372    return checkForAttributeType(Attribute.STRING);
373  }
374
375  /**
376   * Checks if the given instance is compatible
377   * with this dataset. Only looks at the size of
378   * the instance and the ranges of the values for
379   * nominal and string attributes.
380   *
381   * @param instance the instance to check
382   * @return true if the instance is compatible with the dataset
383   */
384  public /*@pure@*/ boolean checkInstance(Instance instance) {
385
386    if (instance.numAttributes() != numAttributes()) {
387      return false;
388    }
389    for (int i = 0; i < numAttributes(); i++) {
390      if (instance.isMissing(i)) {
391        continue;
392      } else if (attribute(i).isNominal() ||
393                 attribute(i).isString()) {
394        if (!(Utils.eq(instance.value(i),
395                       (double)(int)instance.value(i)))) {
396          return false;
397        } else if (Utils.sm(instance.value(i), 0) ||
398                   Utils.gr(instance.value(i),
399                            attribute(i).numValues())) {
400          return false;
401        }
402      }
403    }
404    return true;
405  }
406       
407  /**
408   * Returns the class attribute.
409   *
410   * @return the class attribute
411   * @throws UnassignedClassException if the class is not set
412   */
413  //@ requires classIndex() >= 0;
414  public /*@pure@*/ Attribute classAttribute() {
415
416    if (m_ClassIndex < 0) {
417      throw new UnassignedClassException("Class index is negative (not set)!");
418    }
419    return attribute(m_ClassIndex);
420  }
421
422  /**
423   * Returns the class attribute's index. Returns negative number
424   * if it's undefined.
425   *
426   * @return the class index as an integer
427   */
428  // ensures \result == m_ClassIndex;
429  public /*@pure@*/ int classIndex() {
430   
431    return m_ClassIndex;
432  }
433 
434  /**
435   * Compactifies the set of instances. Decreases the capacity of
436   * the set so that it matches the number of instances in the set.
437   */
438  public void compactify() {
439
440    m_Instances.trimToSize();
441  }
442
443  /**
444   * Removes all instances from the set.
445   */
446  public void delete() {
447   
448    m_Instances = new ArrayList<Instance>();
449  }
450
451  /**
452   * Removes an instance at the given position from the set.
453   *
454   * @param index the instance's position (index starts with 0)
455   */
456  //@ requires 0 <= index && index < numInstances();
457  public void delete(int index) {
458   
459    m_Instances.remove(index);
460  }
461
462  /**
463   * Deletes an attribute at the given position
464   * (0 to numAttributes() - 1). A deep copy of the attribute
465   * information is performed before the attribute is deleted.
466   *
467   * @param position the attribute's position (position starts with 0)
468   * @throws IllegalArgumentException if the given index is out of range
469   *            or the class attribute is being deleted
470   */
471  //@ requires 0 <= position && position < numAttributes();
472  //@ requires position != classIndex();
473  public void deleteAttributeAt(int position) {
474         
475    if ((position < 0) || (position >= m_Attributes.size())) {
476      throw new IllegalArgumentException("Index out of range");
477    }
478    if (position == m_ClassIndex) {
479      throw new IllegalArgumentException("Can't delete class attribute");
480    }
481    freshAttributeInfo();
482    if (m_ClassIndex > position) {
483      m_ClassIndex--;
484    }
485    m_Attributes.remove(position);
486    for (int i = position; i < m_Attributes.size(); i++) {
487      Attribute current = (Attribute)m_Attributes.get(i);
488      current.setIndex(current.index() - 1);
489    }
490    for (int i = 0; i < numInstances(); i++) {
491      instance(i).setDataset(null);
492      instance(i).deleteAttributeAt(position); 
493      instance(i).setDataset(this);
494    }
495  }
496
497  /**
498   * Deletes all attributes of the given type in the dataset. A deep copy of
499   * the attribute information is performed before an attribute is deleted.
500   *
501   * @param attType the attribute type to delete
502   * @throws IllegalArgumentException if attribute couldn't be
503   * successfully deleted (probably because it is the class attribute).
504   */
505  public void deleteAttributeType(int attType) {
506    int i = 0;
507    while (i < m_Attributes.size()) {
508      if (attribute(i).type() == attType) {
509        deleteAttributeAt(i);
510      } else {
511        i++;
512      }
513    }
514  }
515
516  /**
517   * Deletes all string attributes in the dataset. A deep copy of the attribute
518   * information is performed before an attribute is deleted.
519   *
520   * @throws IllegalArgumentException if string attribute couldn't be
521   * successfully deleted (probably because it is the class attribute).
522   * @see #deleteAttributeType(int)
523   */
524  public void deleteStringAttributes() {
525    deleteAttributeType(Attribute.STRING);
526  }
527
528  /**
529   * Removes all instances with missing values for a particular
530   * attribute from the dataset.
531   *
532   * @param attIndex the attribute's index (index starts with 0)
533   */
534  //@ requires 0 <= attIndex && attIndex < numAttributes();
535  public void deleteWithMissing(int attIndex) {
536
537    ArrayList<Instance> newInstances = new ArrayList<Instance>(numInstances());
538
539    for (int i = 0; i < numInstances(); i++) {
540      if (!instance(i).isMissing(attIndex)) {
541        newInstances.add(instance(i));
542      }
543    }
544    m_Instances = newInstances;
545  }
546
547  /**
548   * Removes all instances with missing values for a particular
549   * attribute from the dataset.
550   *
551   * @param att the attribute
552   */
553  public void deleteWithMissing(/*@non_null@*/ Attribute att) {
554
555    deleteWithMissing(att.index());
556  }
557
558  /**
559   * Removes all instances with a missing class value
560   * from the dataset.
561   *
562   * @throws UnassignedClassException if class is not set
563   */
564  public void deleteWithMissingClass() {
565
566    if (m_ClassIndex < 0) {
567      throw new UnassignedClassException("Class index is negative (not set)!");
568    }
569    deleteWithMissing(m_ClassIndex);
570  }
571
572  /**
573   * Returns an enumeration of all the attributes.
574   *
575   * @return enumeration of all the attributes.
576   */
577  public /*@non_null pure@*/ Enumeration enumerateAttributes() {
578
579    return new WekaEnumeration(m_Attributes, m_ClassIndex);
580  }
581
582  /**
583   * Returns an enumeration of all instances in the dataset.
584   *
585   * @return enumeration of all instances in the dataset
586   */
587  public /*@non_null pure@*/ Enumeration enumerateInstances() {
588
589    return new WekaEnumeration(m_Instances);
590  }
591
592  /**
593   * Checks if two headers are equivalent. If not, then returns a message why
594   * they differ.
595   *
596   * @param dataset     another dataset
597   * @return            null if the header of the given dataset is equivalent
598   *                    to this header, otherwise a message with details on
599   *                    why they differ
600   */
601  public String equalHeadersMsg(Instances dataset) {
602    // Check class and all attributes
603    if (m_ClassIndex != dataset.m_ClassIndex)
604      return "Class index differ: " + (m_ClassIndex+1) + " != " + (dataset.m_ClassIndex+1);
605
606    if (m_Attributes.size() != dataset.m_Attributes.size())
607      return "Different number of attributes: " + m_Attributes.size() + " != " + dataset.m_Attributes.size();
608   
609    for (int i = 0; i < m_Attributes.size(); i++) {
610      String msg = attribute(i).equalsMsg(dataset.attribute(i));
611      if (msg != null)
612        return "Attributes differ at position " + (i+1) + ":\n" + msg;
613    }
614   
615    return null;
616  }
617
618  /**
619   * Checks if two headers are equivalent.
620   *
621   * @param dataset another dataset
622   * @return true if the header of the given dataset is equivalent
623   * to this header
624   */
625  public /*@pure@*/ boolean equalHeaders(Instances dataset){
626    return (equalHeadersMsg(dataset) == null);
627  }
628 
629  /**
630   * Returns the first instance in the set.
631   *
632   * @return the first instance in the set
633   */
634  //@ requires numInstances() > 0;
635  public /*@non_null pure@*/ Instance firstInstance() {
636   
637    return (Instance)m_Instances.get(0);
638  }
639
640  /**
641   * Returns a random number generator. The initial seed of the random
642   * number generator depends on the given seed and the hash code of
643   * a string representation of a instances chosen based on the given
644   * seed.
645   *
646   * @param seed the given seed
647   * @return the random number generator
648   */
649  public Random getRandomNumberGenerator(long seed) {
650
651    Random r = new Random(seed);
652    r.setSeed(instance(r.nextInt(numInstances())).toStringNoWeight().hashCode() + seed);
653    return r;
654  }
655 
656  /**
657   * Inserts an attribute at the given position (0 to
658   * numAttributes()) and sets all values to be missing.
659   * Shallow copies the attribute before it is inserted, and performs
660   * a deep copy of the existing attribute information.
661   *
662   * @param att the attribute to be inserted
663   * @param position the attribute's position (position starts with 0)
664   * @throws IllegalArgumentException if the given index is out of range
665   */
666  //@ requires 0 <= position;
667  //@ requires position <= numAttributes();
668  public void insertAttributeAt(/*@non_null@*/ Attribute att, int position) {
669         
670    if ((position < 0) ||
671        (position > m_Attributes.size())) {
672      throw new IllegalArgumentException("Index out of range");
673    }
674    att = (Attribute)att.copy();
675    freshAttributeInfo();
676    att.setIndex(position);
677    m_Attributes.add(position, att);
678    for (int i = position + 1; i < m_Attributes.size(); i++) {
679      Attribute current = (Attribute)m_Attributes.get(i);
680      current.setIndex(current.index() + 1);
681    }
682    for (int i = 0; i < numInstances(); i++) {
683      instance(i).setDataset(null);
684      instance(i).insertAttributeAt(position);
685      instance(i).setDataset(this);
686    }
687    if (m_ClassIndex >= position) {
688      m_ClassIndex++;
689    }
690  }
691
692  /**
693   * Returns the instance at the given position.
694   *
695   * @param index the instance's index (index starts with 0)
696   * @return the instance at the given position
697   */
698  //@ requires 0 <= index;
699  //@ requires index < numInstances();
700  public /*@non_null pure@*/ Instance instance(int index) {
701
702    return m_Instances.get(index);
703  }
704
705  /**
706   * Returns the instance at the given position.
707   *
708   * @param index the instance's index (index starts with 0)
709   * @return the instance at the given position
710   */
711  //@ requires 0 <= index;
712  //@ requires index < numInstances();
713  public /*@non_null pure@*/ Instance get(int index) {
714
715    return m_Instances.get(index);
716  }
717
718  /**
719   * Returns the kth-smallest attribute value of a numeric attribute.
720   * Note that calling this method will change the order of the data!
721   *
722   * @param att the Attribute object
723   * @param k the value of k
724   * @return the kth-smallest value
725   */
726  public double kthSmallestValue(Attribute att, int k) {
727
728    return kthSmallestValue(att.index(), k);
729  }
730
731  /**
732   * Returns the kth-smallest attribute value of a numeric attribute.
733   * Note that calling this method will change the order of the data!
734   * The number of non-missing values in the data must be as least
735   * as last as k for this to work.
736   *
737   * @param attIndex the attribute's index
738   * @param k the value of k
739   * @return the kth-smallest value
740   */
741  public double kthSmallestValue(int attIndex, int k) {
742   
743    if (!attribute(attIndex).isNumeric()) {
744      throw new IllegalArgumentException("Instances: attribute must be numeric to compute kth-smallest value.");
745    }
746
747    int i,j;
748
749    // move all instances with missing values to end
750    j = numInstances() - 1;
751    i = 0;
752    while (i <= j) {
753      if (instance(j).isMissing(attIndex)) {
754        j--;
755      } else {
756        if (instance(i).isMissing(attIndex)) {
757          swap(i,j);
758          j--;
759        }
760        i++;
761      }
762    }
763
764    if ((k < 1) || (k > j+1)) {
765      throw new IllegalArgumentException("Instances: value for k for computing kth-smallest value too large.");
766    }
767
768    return instance(select(attIndex, 0, j, k)).value(attIndex);
769  }
770
771  /**
772   * Returns the last instance in the set.
773   *
774   * @return the last instance in the set
775   */
776  //@ requires numInstances() > 0;
777  public /*@non_null pure@*/ Instance lastInstance() {
778   
779    return (Instance)m_Instances.get(m_Instances.size() - 1);
780  }
781
782  /**
783   * Returns the mean (mode) for a numeric (nominal) attribute as
784   * a floating-point value. Returns 0 if the attribute is neither nominal nor
785   * numeric. If all values are missing it returns zero.
786   *
787   * @param attIndex the attribute's index (index starts with 0)
788   * @return the mean or the mode
789   */
790  public /*@pure@*/ double meanOrMode(int attIndex) {
791
792    double result, found;
793    int [] counts;
794
795    if (attribute(attIndex).isNumeric()) {
796      result = found = 0;
797      for (int j = 0; j < numInstances(); j++) {
798        if (!instance(j).isMissing(attIndex)) {
799          found += instance(j).weight();
800          result += instance(j).weight()*instance(j).value(attIndex);
801        }
802      }
803      if (found <= 0) {
804        return 0;
805      } else {
806        return result / found;
807      }
808    } else if (attribute(attIndex).isNominal()) {
809      counts = new int[attribute(attIndex).numValues()];
810      for (int j = 0; j < numInstances(); j++) {
811        if (!instance(j).isMissing(attIndex)) {
812          counts[(int) instance(j).value(attIndex)] += instance(j).weight();
813        }
814      }
815      return (double)Utils.maxIndex(counts);
816    } else {
817      return 0;
818    }
819  }
820
821  /**
822   * Returns the mean (mode) for a numeric (nominal) attribute as a
823   * floating-point value.  Returns 0 if the attribute is neither
824   * nominal nor numeric.  If all values are missing it returns zero.
825   *
826   * @param att the attribute
827   * @return the mean or the mode
828   */
829  public /*@pure@*/ double meanOrMode(Attribute att) {
830
831    return meanOrMode(att.index());
832  }
833
834  /**
835   * Returns the number of attributes.
836   *
837   * @return the number of attributes as an integer
838   */
839  //@ ensures \result == m_Attributes.size();
840  public /*@pure@*/ int numAttributes() {
841
842    return m_Attributes.size();
843  }
844
845  /**
846   * Returns the number of class labels.
847   *
848   * @return the number of class labels as an integer if the class
849   * attribute is nominal, 1 otherwise.
850   * @throws UnassignedClassException if the class is not set
851   */
852  //@ requires classIndex() >= 0;
853  public /*@pure@*/ int numClasses() {
854   
855    if (m_ClassIndex < 0) {
856      throw new UnassignedClassException("Class index is negative (not set)!");
857    }
858    if (!classAttribute().isNominal()) {
859      return 1;
860    } else {
861      return classAttribute().numValues();
862    }
863  }
864
865  /**
866   * Returns the number of distinct values of a given attribute.
867   * Returns the number of instances if the attribute is a
868   * string attribute. The value 'missing' is not counted.
869   *
870   * @param attIndex the attribute (index starts with 0)
871   * @return the number of distinct values of a given attribute
872   */
873  //@ requires 0 <= attIndex;
874  //@ requires attIndex < numAttributes();
875  public /*@pure@*/ int numDistinctValues(int attIndex) {
876
877    if (attribute(attIndex).isNumeric()) {
878      double [] attVals = attributeToDoubleArray(attIndex);
879      int [] sorted = Utils.sort(attVals);
880      double prev = 0;
881      int counter = 0;
882      for (int i = 0; i < sorted.length; i++) {
883        Instance current = instance(sorted[i]);
884        if (current.isMissing(attIndex)) {
885          break;
886        }
887        if ((i == 0) || 
888            (current.value(attIndex) > prev)) {
889          prev = current.value(attIndex);
890          counter++;
891        }
892      }
893      return counter;
894    } else {
895      return attribute(attIndex).numValues();
896    }
897  }
898
899  /**
900   * Returns the number of distinct values of a given attribute.
901   * Returns the number of instances if the attribute is a
902   * string attribute. The value 'missing' is not counted.
903   *
904   * @param att the attribute
905   * @return the number of distinct values of a given attribute
906   */
907  public /*@pure@*/ int numDistinctValues(/*@non_null@*/Attribute att) {
908
909    return numDistinctValues(att.index());
910  }
911 
912  /**
913   * Returns the number of instances in the dataset.
914   *
915   * @return the number of instances in the dataset as an integer
916   */
917  //@ ensures \result == m_Instances.size();
918  public /*@pure@*/ int numInstances() {
919
920    return m_Instances.size();
921  }
922 
923  /**
924   * Returns the number of instances in the dataset.
925   *
926   * @return the number of instances in the dataset as an integer
927   */
928  //@ ensures \result == m_Instances.size();
929  public /*@pure@*/ int size() {
930
931    return m_Instances.size();
932  }
933
934  /**
935   * Shuffles the instances in the set so that they are ordered
936   * randomly.
937   *
938   * @param random a random number generator
939   */
940  public void randomize(Random random) {
941
942    for (int j = numInstances() - 1; j > 0; j--)
943      swap(j, random.nextInt(j+1));
944  }
945 
946  /**
947   * Reads a single instance from the reader and appends it
948   * to the dataset.  Automatically expands the dataset if it
949   * is not large enough to hold the instance. This method does
950   * not check for carriage return at the end of the line.
951   *
952   * @param reader the reader
953   * @return false if end of file has been reached
954   * @throws IOException if the information is not read
955   * successfully
956   * @deprecated instead of using this method in conjunction with the
957   * <code>readInstance(Reader)</code> method, one should use the
958   * <code>ArffLoader</code> or <code>DataSource</code> class instead.
959   * @see weka.core.converters.ArffLoader
960   * @see weka.core.converters.ConverterUtils.DataSource
961   */ 
962  @Deprecated public boolean readInstance(Reader reader) throws IOException {
963
964    ArffReader arff = new ArffReader(reader, this, m_Lines, 1);
965    Instance inst = arff.readInstance(arff.getData(), false);
966    m_Lines = arff.getLineNo();
967    if (inst != null) {
968      add(inst);
969      return true;
970    }
971    else {
972      return false;
973    }
974  }   
975
976  /**
977   * Returns the relation's name.
978   *
979   * @return the relation's name as a string
980   */
981  //@ ensures \result == m_RelationName;
982  public /*@pure@*/ String relationName() {
983
984    return m_RelationName;
985  }
986
987  /**
988   * Removes the instance at the given position.
989   *
990   * @param index the instance's index (index starts with 0)
991   * @return the instance at the given position
992   */
993  //@ requires 0 <= index;
994  //@ requires index < numInstances();
995  public Instance remove(int index) {
996
997    return m_Instances.remove(index);
998  }
999
1000  /**
1001   * Renames an attribute. This change only affects this
1002   * dataset.
1003   *
1004   * @param att the attribute's index (index starts with 0)
1005   * @param name the new name
1006   */
1007  public void renameAttribute(int att, String name) {
1008
1009    Attribute newAtt = attribute(att).copy(name);
1010    ArrayList<Attribute> newVec = new ArrayList<Attribute>(numAttributes());
1011
1012    for (int i = 0; i < numAttributes(); i++) {
1013      if (i == att) {
1014        newVec.add(newAtt);
1015      } else {
1016        newVec.add(attribute(i));
1017      }
1018    }
1019    m_Attributes = newVec;
1020  }
1021
1022  /**
1023   * Renames an attribute. This change only affects this
1024   * dataset.
1025   *
1026   * @param att the attribute
1027   * @param name the new name
1028   */
1029  public void renameAttribute(Attribute att, String name) {
1030
1031    renameAttribute(att.index(), name);
1032  }
1033
1034  /**
1035   * Renames the value of a nominal (or string) attribute value. This
1036   * change only affects this dataset.
1037   *
1038   * @param att the attribute's index (index starts with 0)
1039   * @param val the value's index (index starts with 0)
1040   * @param name the new name
1041   */
1042  public void renameAttributeValue(int att, int val, String name) {
1043
1044    Attribute newAtt = (Attribute)attribute(att).copy();
1045    ArrayList<Attribute> newVec = new ArrayList<Attribute>(numAttributes());
1046
1047    newAtt.setValue(val, name);
1048    for (int i = 0; i < numAttributes(); i++) {
1049      if (i == att) {
1050        newVec.add(newAtt);
1051      } else {
1052        newVec.add(attribute(i));
1053      }
1054    }
1055    m_Attributes = newVec;
1056  }
1057
1058  /**
1059   * Renames the value of a nominal (or string) attribute value. This
1060   * change only affects this dataset.
1061   *
1062   * @param att the attribute
1063   * @param val the value
1064   * @param name the new name
1065   */
1066  public void renameAttributeValue(Attribute att, String val, 
1067                                         String name) {
1068
1069    int v = att.indexOfValue(val);
1070    if (v == -1) throw new IllegalArgumentException(val + " not found");
1071    renameAttributeValue(att.index(), v, name);
1072  }
1073
1074  /**
1075   * Creates a new dataset of the same size using random sampling
1076   * with replacement.
1077   *
1078   * @param random a random number generator
1079   * @return the new dataset
1080   */
1081  public Instances resample(Random random) {
1082
1083    Instances newData = new Instances(this, numInstances());
1084    while (newData.numInstances() < numInstances()) {
1085      newData.add(instance(random.nextInt(numInstances())));
1086    }
1087    return newData;
1088  }
1089
1090  /**
1091   * Creates a new dataset of the same size using random sampling
1092   * with replacement according to the current instance weights. The
1093   * weights of the instances in the new dataset are set to one.
1094   *
1095   * @param random a random number generator
1096   * @return the new dataset
1097   */
1098  public Instances resampleWithWeights(Random random) {
1099
1100    double [] weights = new double[numInstances()];
1101    for (int i = 0; i < weights.length; i++) {
1102      weights[i] = instance(i).weight();
1103    }
1104    return resampleWithWeights(random, weights);
1105  }
1106
1107
1108  /**
1109   * Creates a new dataset of the same size using random sampling
1110   * with replacement according to the given weight vector. The
1111   * weights of the instances in the new dataset are set to one.
1112   * The length of the weight vector has to be the same as the
1113   * number of instances in the dataset, and all weights have to
1114   * be positive.
1115   *
1116   * @param random a random number generator
1117   * @param weights the weight vector
1118   * @return the new dataset
1119   * @throws IllegalArgumentException if the weights array is of the wrong
1120   * length or contains negative weights.
1121   */
1122  public Instances resampleWithWeights(Random random, 
1123                                             double[] weights) {
1124
1125    if (weights.length != numInstances()) {
1126      throw new IllegalArgumentException("weights.length != numInstances.");
1127    }
1128    Instances newData = new Instances(this, numInstances());
1129    if (numInstances() == 0) {
1130      return newData;
1131    }
1132    double[] probabilities = new double[numInstances()];
1133    double sumProbs = 0, sumOfWeights = Utils.sum(weights);
1134    for (int i = 0; i < numInstances(); i++) {
1135      sumProbs += random.nextDouble();
1136      probabilities[i] = sumProbs;
1137    }
1138    Utils.normalize(probabilities, sumProbs / sumOfWeights);
1139
1140    // Make sure that rounding errors don't mess things up
1141    probabilities[numInstances() - 1] = sumOfWeights;
1142    int k = 0; int l = 0;
1143    sumProbs = 0;
1144    while ((k < numInstances() && (l < numInstances()))) {
1145      if (weights[l] < 0) {
1146        throw new IllegalArgumentException("Weights have to be positive.");
1147      }
1148      sumProbs += weights[l];
1149      while ((k < numInstances()) &&
1150             (probabilities[k] <= sumProbs)) { 
1151        newData.add(instance(l));
1152        newData.instance(k).setWeight(1);
1153        k++;
1154      }
1155      l++;
1156    }
1157    return newData;
1158  }
1159
1160  /**
1161   * Replaces the instance at the given position.
1162   * Shallow copies instance before it is added. Does not
1163   * check if the instance is compatible with the dataset.
1164   * Note: String or relational values are not transferred.
1165   *
1166   * @param index position where instance is to be inserted
1167   * @param instance the instance to be inserted
1168   * @return the instance previously at that position
1169   */
1170  //@ requires 0 <= index;
1171  //@ requires index < m_Instances.size();
1172  public Instance set(int index, /*@non_null@*/ Instance instance) {
1173
1174    Instance newInstance = (Instance)instance.copy();
1175    Instance oldInstance = m_Instances.get(index);
1176
1177    newInstance.setDataset(this);
1178    m_Instances.set(index, newInstance);
1179
1180    return oldInstance;
1181  }
1182
1183  /**
1184   * Sets the class attribute.
1185   *
1186   * @param att attribute to be the class
1187   */
1188  public void setClass(Attribute att) {
1189
1190    m_ClassIndex = att.index();
1191  }
1192
1193  /**
1194   * Sets the class index of the set.
1195   * If the class index is negative there is assumed to be no class.
1196   * (ie. it is undefined)
1197   *
1198   * @param classIndex the new class index (index starts with 0)
1199   * @throws IllegalArgumentException if the class index is too big or < 0
1200   */
1201  public void setClassIndex(int classIndex) {
1202
1203    if (classIndex >= numAttributes()) {
1204      throw new IllegalArgumentException("Invalid class index: " + classIndex);
1205    }
1206    m_ClassIndex = classIndex;
1207  }
1208
1209  /**
1210   * Sets the relation's name.
1211   *
1212   * @param newName the new relation name.
1213   */
1214  public void setRelationName(/*@non_null@*/String newName) {
1215   
1216    m_RelationName = newName;
1217  }
1218
1219  /**
1220   * Sorts the instances based on an attribute. For numeric attributes,
1221   * instances are sorted in ascending order. For nominal attributes,
1222   * instances are sorted based on the attribute label ordering
1223   * specified in the header. Instances with missing values for the
1224   * attribute are placed at the end of the dataset.
1225   *
1226   * @param attIndex the attribute's index (index starts with 0)
1227   */
1228  public void sort(int attIndex) {
1229
1230    int i,j;
1231
1232    // move all instances with missing values to end
1233    j = numInstances() - 1;
1234    i = 0;
1235    while (i <= j) {
1236      if (instance(j).isMissing(attIndex)) {
1237        j--;
1238      } else {
1239        if (instance(i).isMissing(attIndex)) {
1240          swap(i,j);
1241          j--;
1242        }
1243        i++;
1244      }
1245    }
1246    quickSort(attIndex, 0, j);
1247  }
1248
1249  /**
1250   * Sorts the instances based on an attribute. For numeric attributes,
1251   * instances are sorted into ascending order. For nominal attributes,
1252   * instances are sorted based on the attribute label ordering
1253   * specified in the header. Instances with missing values for the
1254   * attribute are placed at the end of the dataset.
1255   *
1256   * @param att the attribute
1257   */
1258  public void sort(Attribute att) {
1259
1260    sort(att.index());
1261  }
1262
1263  /**
1264   * Stratifies a set of instances according to its class values
1265   * if the class attribute is nominal (so that afterwards a
1266   * stratified cross-validation can be performed).
1267   *
1268   * @param numFolds the number of folds in the cross-validation
1269   * @throws UnassignedClassException if the class is not set
1270   */
1271  public void stratify(int numFolds) {
1272   
1273    if (numFolds <= 1) {
1274      throw new IllegalArgumentException("Number of folds must be greater than 1");
1275    }
1276    if (m_ClassIndex < 0) {
1277      throw new UnassignedClassException("Class index is negative (not set)!");
1278    }
1279    if (classAttribute().isNominal()) {
1280
1281      // sort by class
1282      int index = 1;
1283      while (index < numInstances()) {
1284        Instance instance1 = instance(index - 1);
1285        for (int j = index; j < numInstances(); j++) {
1286          Instance instance2 = instance(j);
1287          if ((instance1.classValue() == instance2.classValue()) ||
1288              (instance1.classIsMissing() && 
1289               instance2.classIsMissing())) {
1290            swap(index,j);
1291            index++;
1292          }
1293        }
1294        index++;
1295      }
1296      stratStep(numFolds);
1297    }
1298  }
1299 
1300  /**
1301   * Computes the sum of all the instances' weights.
1302   *
1303   * @return the sum of all the instances' weights as a double
1304   */
1305  public /*@pure@*/ double sumOfWeights() {
1306   
1307    double sum = 0;
1308
1309    for (int i = 0; i < numInstances(); i++) {
1310      sum += instance(i).weight();
1311    }
1312    return sum;
1313  }
1314
1315  /**
1316   * Creates the test set for one fold of a cross-validation on
1317   * the dataset.
1318   *
1319   * @param numFolds the number of folds in the cross-validation. Must
1320   * be greater than 1.
1321   * @param numFold 0 for the first fold, 1 for the second, ...
1322   * @return the test set as a set of weighted instances
1323   * @throws IllegalArgumentException if the number of folds is less than 2
1324   * or greater than the number of instances.
1325   */
1326  //@ requires 2 <= numFolds && numFolds < numInstances();
1327  //@ requires 0 <= numFold && numFold < numFolds;
1328  public Instances testCV(int numFolds, int numFold) {
1329
1330    int numInstForFold, first, offset;
1331    Instances test;
1332   
1333    if (numFolds < 2) {
1334      throw new IllegalArgumentException("Number of folds must be at least 2!");
1335    }
1336    if (numFolds > numInstances()) {
1337      throw new IllegalArgumentException("Can't have more folds than instances!");
1338    }
1339    numInstForFold = numInstances() / numFolds;
1340    if (numFold < numInstances() % numFolds){
1341      numInstForFold++;
1342      offset = numFold;
1343    }else
1344      offset = numInstances() % numFolds;
1345    test = new Instances(this, numInstForFold);
1346    first = numFold * (numInstances() / numFolds) + offset;
1347    copyInstances(first, test, numInstForFold);
1348    return test;
1349  }
1350 
1351  /**
1352   * Returns the dataset as a string in ARFF format. Strings
1353   * are quoted if they contain whitespace characters, or if they
1354   * are a question mark.
1355   *
1356   * @return the dataset in ARFF format as a string
1357   */
1358  public String toString() {
1359   
1360    StringBuffer text = new StringBuffer();
1361   
1362    text.append(ARFF_RELATION).append(" ").
1363      append(Utils.quote(m_RelationName)).append("\n\n");
1364    for (int i = 0; i < numAttributes(); i++) {
1365      text.append(attribute(i)).append("\n");
1366    }
1367    text.append("\n").append(ARFF_DATA).append("\n");
1368
1369    text.append(stringWithoutHeader());
1370    return text.toString();
1371  }
1372
1373  /**
1374   * Returns the instances in the dataset as a string in ARFF format. Strings
1375   * are quoted if they contain whitespace characters, or if they
1376   * are a question mark.
1377   *
1378   * @return the dataset in ARFF format as a string
1379   */
1380  protected String stringWithoutHeader() {
1381   
1382    StringBuffer text = new StringBuffer();
1383
1384    for (int i = 0; i < numInstances(); i++) {
1385      text.append(instance(i));
1386      if (i < numInstances() - 1) {
1387        text.append('\n');
1388      }
1389    }
1390    return text.toString();
1391  }
1392
1393  /**
1394   * Creates the training set for one fold of a cross-validation
1395   * on the dataset.
1396   *
1397   * @param numFolds the number of folds in the cross-validation. Must
1398   * be greater than 1.
1399   * @param numFold 0 for the first fold, 1 for the second, ...
1400   * @return the training set
1401   * @throws IllegalArgumentException if the number of folds is less than 2
1402   * or greater than the number of instances.
1403   */
1404  //@ requires 2 <= numFolds && numFolds < numInstances();
1405  //@ requires 0 <= numFold && numFold < numFolds;
1406  public Instances trainCV(int numFolds, int numFold) {
1407
1408    int numInstForFold, first, offset;
1409    Instances train;
1410 
1411    if (numFolds < 2) {
1412      throw new IllegalArgumentException("Number of folds must be at least 2!");
1413    }
1414    if (numFolds > numInstances()) {
1415      throw new IllegalArgumentException("Can't have more folds than instances!");
1416    }
1417    numInstForFold = numInstances() / numFolds;
1418    if (numFold < numInstances() % numFolds) {
1419      numInstForFold++;
1420      offset = numFold;
1421    }else
1422      offset = numInstances() % numFolds;
1423    train = new Instances(this, numInstances() - numInstForFold);
1424    first = numFold * (numInstances() / numFolds) + offset;
1425    copyInstances(0, train, first);
1426    copyInstances(first + numInstForFold, train,
1427                  numInstances() - first - numInstForFold);
1428
1429    return train;
1430  }
1431
1432  /**
1433   * Creates the training set for one fold of a cross-validation
1434   * on the dataset. The data is subsequently randomized based
1435   * on the given random number generator.
1436   *
1437   * @param numFolds the number of folds in the cross-validation. Must
1438   * be greater than 1.
1439   * @param numFold 0 for the first fold, 1 for the second, ...
1440   * @param random the random number generator
1441   * @return the training set
1442   * @throws IllegalArgumentException if the number of folds is less than 2
1443   * or greater than the number of instances.
1444   */
1445  //@ requires 2 <= numFolds && numFolds < numInstances();
1446  //@ requires 0 <= numFold && numFold < numFolds;
1447  public Instances trainCV(int numFolds, int numFold, Random random) {
1448
1449    Instances train = trainCV(numFolds, numFold);
1450    train.randomize(random);
1451    return train;
1452  }
1453
1454  /**
1455   * Computes the variance for a numeric attribute.
1456   *
1457   * @param attIndex the numeric attribute (index starts with 0)
1458   * @return the variance if the attribute is numeric
1459   * @throws IllegalArgumentException if the attribute is not numeric
1460   */
1461  public /*@pure@*/ double variance(int attIndex) {
1462 
1463    double sum = 0, sumSquared = 0, sumOfWeights = 0;
1464
1465    if (!attribute(attIndex).isNumeric()) {
1466      throw new IllegalArgumentException("Can't compute variance because attribute is " +
1467                          "not numeric!");
1468    }
1469    for (int i = 0; i < numInstances(); i++) {
1470      if (!instance(i).isMissing(attIndex)) {
1471        sum += instance(i).weight() * 
1472          instance(i).value(attIndex);
1473        sumSquared += instance(i).weight() * 
1474          instance(i).value(attIndex) *
1475          instance(i).value(attIndex);
1476        sumOfWeights += instance(i).weight();
1477      }
1478    }
1479    if (sumOfWeights <= 1) {
1480      return 0;
1481    }
1482    double result = (sumSquared - (sum * sum / sumOfWeights)) / 
1483      (sumOfWeights - 1);
1484
1485    // We don't like negative variance
1486    if (result < 0) {
1487      return 0;
1488    } else {
1489      return result;
1490    }
1491  }
1492
1493  /**
1494   * Computes the variance for a numeric attribute.
1495   *
1496   * @param att the numeric attribute
1497   * @return the variance if the attribute is numeric
1498   * @throws IllegalArgumentException if the attribute is not numeric
1499   */
1500  public /*@pure@*/ double variance(Attribute att) {
1501   
1502    return variance(att.index());
1503  }
1504 
1505  /**
1506   * Calculates summary statistics on the values that appear in this
1507   * set of instances for a specified attribute.
1508   *
1509   * @param index the index of the attribute to summarize (index starts with 0)
1510   * @return an AttributeStats object with it's fields calculated.
1511   */
1512  //@ requires 0 <= index && index < numAttributes();
1513  public AttributeStats attributeStats(int index) {
1514
1515    AttributeStats result = new AttributeStats();
1516    if (attribute(index).isNominal()) {
1517      result.nominalCounts = new int [attribute(index).numValues()];
1518      result.nominalWeights = new double[attribute(index).numValues()];
1519    }
1520    if (attribute(index).isNumeric()) {
1521      result.numericStats = new weka.experiment.Stats();
1522    }
1523    result.totalCount = numInstances();
1524
1525    double [] attVals = attributeToDoubleArray(index);
1526    int [] sorted = Utils.sort(attVals);
1527    int currentCount = 0;
1528    double currentWeight = 0;
1529    double prev = Double.NaN;
1530    for (int j = 0; j < numInstances(); j++) {
1531      Instance current = instance(sorted[j]);
1532      if (current.isMissing(index)) {
1533        result.missingCount = numInstances() - j;
1534        break;
1535      }
1536      if (current.value(index) == prev) {
1537        currentCount++;
1538        currentWeight += current.weight();
1539      } else {
1540        result.addDistinct(prev, currentCount, currentWeight);
1541        currentCount = 1;
1542        currentWeight = current.weight();
1543        prev = current.value(index);
1544      }
1545    }
1546    result.addDistinct(prev, currentCount, currentWeight);
1547    result.distinctCount--; // So we don't count "missing" as a value
1548    return result;
1549  }
1550 
1551  /**
1552   * Gets the value of all instances in this dataset for a particular
1553   * attribute. Useful in conjunction with Utils.sort to allow iterating
1554   * through the dataset in sorted order for some attribute.
1555   *
1556   * @param index the index of the attribute.
1557   * @return an array containing the value of the desired attribute for
1558   * each instance in the dataset.
1559   */
1560  //@ requires 0 <= index && index < numAttributes();
1561  public /*@pure@*/ double [] attributeToDoubleArray(int index) {
1562
1563    double [] result = new double[numInstances()];
1564    for (int i = 0; i < result.length; i++) {
1565      result[i] = instance(i).value(index);
1566    }
1567    return result;
1568  }
1569
1570  /**
1571   * Generates a string summarizing the set of instances. Gives a breakdown
1572   * for each attribute indicating the number of missing/discrete/unique
1573   * values and other information.
1574   *
1575   * @return a string summarizing the dataset
1576   */
1577  public String toSummaryString() {
1578
1579    StringBuffer result = new StringBuffer();
1580    result.append("Relation Name:  ").append(relationName()).append('\n');
1581    result.append("Num Instances:  ").append(numInstances()).append('\n');
1582    result.append("Num Attributes: ").append(numAttributes()).append('\n');
1583    result.append('\n');
1584
1585    result.append(Utils.padLeft("", 5)).append(Utils.padRight("Name", 25));
1586    result.append(Utils.padLeft("Type", 5)).append(Utils.padLeft("Nom", 5));
1587    result.append(Utils.padLeft("Int", 5)).append(Utils.padLeft("Real", 5));
1588    result.append(Utils.padLeft("Missing", 12));
1589    result.append(Utils.padLeft("Unique", 12));
1590    result.append(Utils.padLeft("Dist", 6)).append('\n');
1591    for (int i = 0; i < numAttributes(); i++) {
1592      Attribute a = attribute(i);
1593      AttributeStats as = attributeStats(i);
1594      result.append(Utils.padLeft("" + (i + 1), 4)).append(' ');
1595      result.append(Utils.padRight(a.name(), 25)).append(' ');
1596      long percent;
1597      switch (a.type()) {
1598      case Attribute.NOMINAL:
1599        result.append(Utils.padLeft("Nom", 4)).append(' ');
1600        percent = Math.round(100.0 * as.intCount / as.totalCount);
1601        result.append(Utils.padLeft("" + percent, 3)).append("% ");
1602        result.append(Utils.padLeft("" + 0, 3)).append("% ");
1603        percent = Math.round(100.0 * as.realCount / as.totalCount);
1604        result.append(Utils.padLeft("" + percent, 3)).append("% ");
1605        break;
1606      case Attribute.NUMERIC:
1607        result.append(Utils.padLeft("Num", 4)).append(' ');
1608        result.append(Utils.padLeft("" + 0, 3)).append("% ");
1609        percent = Math.round(100.0 * as.intCount / as.totalCount);
1610        result.append(Utils.padLeft("" + percent, 3)).append("% ");
1611        percent = Math.round(100.0 * as.realCount / as.totalCount);
1612        result.append(Utils.padLeft("" + percent, 3)).append("% ");
1613        break;
1614      case Attribute.DATE:
1615        result.append(Utils.padLeft("Dat", 4)).append(' ');
1616        result.append(Utils.padLeft("" + 0, 3)).append("% ");
1617        percent = Math.round(100.0 * as.intCount / as.totalCount);
1618        result.append(Utils.padLeft("" + percent, 3)).append("% ");
1619        percent = Math.round(100.0 * as.realCount / as.totalCount);
1620        result.append(Utils.padLeft("" + percent, 3)).append("% ");
1621        break;
1622      case Attribute.STRING:
1623        result.append(Utils.padLeft("Str", 4)).append(' ');
1624        percent = Math.round(100.0 * as.intCount / as.totalCount);
1625        result.append(Utils.padLeft("" + percent, 3)).append("% ");
1626        result.append(Utils.padLeft("" + 0, 3)).append("% ");
1627        percent = Math.round(100.0 * as.realCount / as.totalCount);
1628        result.append(Utils.padLeft("" + percent, 3)).append("% ");
1629        break;
1630      case Attribute.RELATIONAL:
1631        result.append(Utils.padLeft("Rel", 4)).append(' ');
1632        percent = Math.round(100.0 * as.intCount / as.totalCount);
1633        result.append(Utils.padLeft("" + percent, 3)).append("% ");
1634        result.append(Utils.padLeft("" + 0, 3)).append("% ");
1635        percent = Math.round(100.0 * as.realCount / as.totalCount);
1636        result.append(Utils.padLeft("" + percent, 3)).append("% ");
1637        break;
1638      default:
1639        result.append(Utils.padLeft("???", 4)).append(' ');
1640        result.append(Utils.padLeft("" + 0, 3)).append("% ");
1641        percent = Math.round(100.0 * as.intCount / as.totalCount);
1642        result.append(Utils.padLeft("" + percent, 3)).append("% ");
1643        percent = Math.round(100.0 * as.realCount / as.totalCount);
1644        result.append(Utils.padLeft("" + percent, 3)).append("% ");
1645        break;
1646      }
1647      result.append(Utils.padLeft("" + as.missingCount, 5)).append(" /");
1648      percent = Math.round(100.0 * as.missingCount / as.totalCount);
1649      result.append(Utils.padLeft("" + percent, 3)).append("% ");
1650      result.append(Utils.padLeft("" + as.uniqueCount, 5)).append(" /");
1651      percent = Math.round(100.0 * as.uniqueCount / as.totalCount);
1652      result.append(Utils.padLeft("" + percent, 3)).append("% ");
1653      result.append(Utils.padLeft("" + as.distinctCount, 5)).append(' ');
1654      result.append('\n');
1655    }
1656    return result.toString();
1657  }
1658
1659  /**
1660   * Copies instances from one set to the end of another
1661   * one.
1662   *
1663   * @param from the position of the first instance to be copied
1664   * @param dest the destination for the instances
1665   * @param num the number of instances to be copied
1666   */
1667  //@ requires 0 <= from && from <= numInstances() - num;
1668  //@ requires 0 <= num;
1669  protected void copyInstances(int from, /*@non_null@*/ Instances dest, int num) {
1670   
1671    for (int i = 0; i < num; i++) {
1672      dest.add(instance(from + i));
1673    }
1674  }
1675 
1676  /**
1677   * Replaces the attribute information by a clone of
1678   * itself.
1679   */
1680  protected void freshAttributeInfo() {
1681
1682    ArrayList<Attribute> newList = new ArrayList<Attribute>(m_Attributes.size());
1683    for (Attribute att : m_Attributes) {
1684      newList.add((Attribute)att.copy());
1685    }
1686    m_Attributes = newList;
1687  }
1688 
1689  /**
1690   * Returns string including all instances, their weights and
1691   * their indices in the original dataset.
1692   *
1693   * @return description of instance and its weight as a string
1694   */
1695  protected /*@pure@*/ String instancesAndWeights(){
1696
1697    StringBuffer text = new StringBuffer();
1698
1699    for (int i = 0; i < numInstances(); i++) {
1700      text.append(instance(i) + " " + instance(i).weight());
1701      if (i < numInstances() - 1) {
1702        text.append("\n");
1703      }
1704    }
1705    return text.toString();
1706  }
1707 
1708  /**
1709   * Partitions the instances around a pivot. Used by quicksort and
1710   * kthSmallestValue.
1711   *
1712   * @param attIndex the attribute's index (index starts with 0)
1713   * @param l the first index of the subset (index starts with 0)
1714   * @param r the last index of the subset (index starts with 0)
1715   *
1716   * @return the index of the middle element
1717   */
1718  //@ requires 0 <= attIndex && attIndex < numAttributes();
1719  //@ requires 0 <= left && left <= right && right < numInstances();
1720  protected int partition(int attIndex, int l, int r) {
1721   
1722    double pivot = instance((l + r) / 2).value(attIndex);
1723
1724    while (l < r) {
1725      while ((instance(l).value(attIndex) < pivot) && (l < r)) {
1726        l++;
1727      }
1728      while ((instance(r).value(attIndex) > pivot) && (l < r)) {
1729        r--;
1730      }
1731      if (l < r) {
1732        swap(l, r);
1733        l++;
1734        r--;
1735      }
1736    }
1737    if ((l == r) && (instance(r).value(attIndex) > pivot)) {
1738      r--;
1739    } 
1740
1741    return r;
1742  }
1743 
1744  /**
1745   * Implements quicksort according to Manber's "Introduction to
1746   * Algorithms".
1747   *
1748   * @param attIndex the attribute's index (index starts with 0)
1749   * @param left the first index of the subset to be sorted (index starts with 0)
1750   * @param right the last index of the subset to be sorted (index starts with 0)
1751   */
1752  //@ requires 0 <= attIndex && attIndex < numAttributes();
1753  //@ requires 0 <= first && first <= right && right < numInstances();
1754  protected void quickSort(int attIndex, int left, int right) {
1755
1756    if (left < right) {
1757      int middle = partition(attIndex, left, right);
1758      quickSort(attIndex, left, middle);
1759      quickSort(attIndex, middle + 1, right);
1760    }
1761  }
1762 
1763  /**
1764   * Implements computation of the kth-smallest element according
1765   * to Manber's "Introduction to Algorithms".
1766   *
1767   * @param attIndex the attribute's index (index starts with 0)
1768   * @param left the first index of the subset (index starts with 0)
1769   * @param right the last index of the subset (index starts with 0)
1770   * @param k the value of k
1771   *
1772   * @return the index of the kth-smallest element
1773   */
1774  //@ requires 0 <= attIndex && attIndex < numAttributes();
1775  //@ requires 0 <= first && first <= right && right < numInstances();
1776  protected int select(int attIndex, int left, int right, int k) {
1777   
1778    if (left == right) {
1779      return left;
1780    } else {
1781      int middle = partition(attIndex, left, right);
1782      if ((middle - left + 1) >= k) {
1783        return select(attIndex, left, middle, k);
1784      } else {
1785        return select(attIndex, middle + 1, right, k - (middle - left + 1));
1786      }
1787    }
1788  }
1789
1790  /**
1791   * Help function needed for stratification of set.
1792   *
1793   * @param numFolds the number of folds for the stratification
1794   */
1795  protected void stratStep (int numFolds){
1796   
1797    ArrayList<Instance> newVec = new ArrayList<Instance>(m_Instances.size());
1798    int start = 0, j;
1799
1800    // create stratified batch
1801    while (newVec.size() < numInstances()) {
1802      j = start;
1803      while (j < numInstances()) {
1804        newVec.add(instance(j));
1805        j = j + numFolds;
1806      }
1807      start++;
1808    }
1809    m_Instances = newVec;
1810  }
1811 
1812  /**
1813   * Swaps two instances in the set.
1814   *
1815   * @param i the first instance's index (index starts with 0)
1816   * @param j the second instance's index (index starts with 0)
1817   */
1818  //@ requires 0 <= i && i < numInstances();
1819  //@ requires 0 <= j && j < numInstances();
1820  public void swap(int i, int j){
1821   
1822    Instance in = m_Instances.get(i);
1823    m_Instances.set(i, m_Instances.get(j));
1824    m_Instances.set(j, in);
1825  }
1826
1827  /**
1828   * Merges two sets of Instances together. The resulting set will have
1829   * all the attributes of the first set plus all the attributes of the
1830   * second set. The number of instances in both sets must be the same.
1831   *
1832   * @param first the first set of Instances
1833   * @param second the second set of Instances
1834   * @return the merged set of Instances
1835   * @throws IllegalArgumentException if the datasets are not the same size
1836   */
1837  public static Instances mergeInstances(Instances first, Instances second) {
1838
1839    if (first.numInstances() != second.numInstances()) {
1840      throw new IllegalArgumentException("Instance sets must be of the same size");
1841    }
1842
1843    // Create the vector of merged attributes
1844    ArrayList<Attribute> newAttributes = new ArrayList<Attribute>();
1845    for (int i = 0; i < first.numAttributes(); i++) {
1846      newAttributes.add(first.attribute(i));
1847    }
1848    for (int i = 0; i < second.numAttributes(); i++) {
1849      newAttributes.add(second.attribute(i));
1850    }
1851   
1852    // Create the set of Instances
1853    Instances merged = new Instances(first.relationName() + '_'
1854                                     + second.relationName(), 
1855                                     newAttributes, 
1856                                     first.numInstances());
1857    // Merge each instance
1858    for (int i = 0; i < first.numInstances(); i++) {
1859      merged.add(first.instance(i).mergeInstance(second.instance(i)));
1860    }
1861    return merged;
1862  }
1863
1864  /**
1865   * Method for testing this class.
1866   *
1867   * @param argv should contain one element: the name of an ARFF file
1868   */
1869  //@ requires argv != null;
1870  //@ requires argv.length == 1;
1871  //@ requires argv[0] != null;
1872  public static void test(String [] argv) {
1873
1874    Instances instances, secondInstances, train, test, empty;
1875    Random random = new Random(2);
1876    Reader reader;
1877    int start, num;
1878    ArrayList<Attribute> testAtts;
1879    ArrayList<String> testVals;
1880    int i,j;
1881   
1882    try{
1883      if (argv.length > 1) {
1884        throw (new Exception("Usage: Instances [<filename>]"));
1885      }
1886     
1887      // Creating set of instances from scratch
1888      testVals = new ArrayList<String>(2);
1889      testVals.add("first_value");
1890      testVals.add("second_value");
1891      testAtts = new ArrayList<Attribute>(2);
1892      testAtts.add(new Attribute("nominal_attribute", testVals));
1893      testAtts.add(new Attribute("numeric_attribute"));
1894      instances = new Instances("test_set", testAtts, 10);
1895      instances.add(new DenseInstance(instances.numAttributes()));
1896      instances.add(new DenseInstance(instances.numAttributes()));
1897      instances.add(new DenseInstance(instances.numAttributes()));
1898      instances.setClassIndex(0);
1899      System.out.println("\nSet of instances created from scratch:\n");
1900      System.out.println(instances);
1901     
1902      if (argv.length == 1) {
1903        String filename = argv[0];
1904        reader = new FileReader(filename);
1905       
1906        // Read first five instances and print them
1907        System.out.println("\nFirst five instances from file:\n");
1908        instances = new Instances(reader, 1);
1909        instances.setClassIndex(instances.numAttributes() - 1);
1910        i = 0;
1911        while ((i < 5) && (instances.readInstance(reader))) {
1912          i++;
1913        }
1914        System.out.println(instances);
1915
1916        // Read all the instances in the file
1917        reader = new FileReader(filename);
1918        instances = new Instances(reader);
1919
1920        // Make the last attribute be the class
1921        instances.setClassIndex(instances.numAttributes() - 1);
1922       
1923        // Print header and instances.
1924        System.out.println("\nDataset:\n");
1925        System.out.println(instances);
1926        System.out.println("\nClass index: "+instances.classIndex());
1927      }
1928     
1929      // Test basic methods based on class index.
1930      System.out.println("\nClass name: "+instances.classAttribute().name());
1931      System.out.println("\nClass index: "+instances.classIndex());
1932      System.out.println("\nClass is nominal: " +
1933                         instances.classAttribute().isNominal());
1934      System.out.println("\nClass is numeric: " +
1935                         instances.classAttribute().isNumeric());
1936      System.out.println("\nClasses:\n");
1937      for (i = 0; i < instances.numClasses(); i++) {
1938        System.out.println(instances.classAttribute().value(i));
1939      }
1940      System.out.println("\nClass values and labels of instances:\n");
1941      for (i = 0; i < instances.numInstances(); i++) {
1942        Instance inst = instances.instance(i);
1943        System.out.print(inst.classValue() + "\t");
1944        System.out.print(inst.toString(inst.classIndex()));
1945        if (instances.instance(i).classIsMissing()) {
1946          System.out.println("\tis missing");
1947        } else {
1948          System.out.println();
1949        }
1950      }
1951     
1952      // Create random weights.
1953      System.out.println("\nCreating random weights for instances.");
1954      for (i = 0; i < instances.numInstances(); i++) {
1955        instances.instance(i).setWeight(random.nextDouble()); 
1956      }
1957     
1958      // Print all instances and their weights (and the sum of weights).
1959      System.out.println("\nInstances and their weights:\n");
1960      System.out.println(instances.instancesAndWeights());
1961      System.out.print("\nSum of weights: ");
1962      System.out.println(instances.sumOfWeights());
1963     
1964      // Insert an attribute
1965      secondInstances = new Instances(instances);
1966      Attribute testAtt = new Attribute("Inserted");
1967      secondInstances.insertAttributeAt(testAtt, 0);
1968      System.out.println("\nSet with inserted attribute:\n");
1969      System.out.println(secondInstances);
1970      System.out.println("\nClass name: "
1971                         + secondInstances.classAttribute().name());
1972     
1973      // Delete the attribute
1974      secondInstances.deleteAttributeAt(0);
1975      System.out.println("\nSet with attribute deleted:\n");
1976      System.out.println(secondInstances);
1977      System.out.println("\nClass name: "
1978                         + secondInstances.classAttribute().name());
1979     
1980      // Test if headers are equal
1981      System.out.println("\nHeaders equal: "+
1982                         instances.equalHeaders(secondInstances) + "\n");
1983     
1984      // Print data in internal format.
1985      System.out.println("\nData (internal values):\n");
1986      for (i = 0; i < instances.numInstances(); i++) {
1987        for (j = 0; j < instances.numAttributes(); j++) {
1988          if (instances.instance(i).isMissing(j)) {
1989            System.out.print("? ");
1990          } else {
1991            System.out.print(instances.instance(i).value(j) + " ");
1992          }
1993        }
1994        System.out.println();
1995      }
1996     
1997      // Just print header
1998      System.out.println("\nEmpty dataset:\n");
1999      empty = new Instances(instances, 0);
2000      System.out.println(empty);
2001      System.out.println("\nClass name: "+empty.classAttribute().name());
2002
2003      // Create copy and rename an attribute and a value (if possible)
2004      if (empty.classAttribute().isNominal()) {
2005        Instances copy = new Instances(empty, 0);
2006        copy.renameAttribute(copy.classAttribute(), "new_name");
2007        copy.renameAttributeValue(copy.classAttribute(), 
2008                                  copy.classAttribute().value(0), 
2009                                  "new_val_name");
2010        System.out.println("\nDataset with names changed:\n" + copy);
2011        System.out.println("\nOriginal dataset:\n" + empty);
2012      }
2013
2014      // Create and prints subset of instances.
2015      start = instances.numInstances() / 4;
2016      num = instances.numInstances() / 2;
2017      System.out.print("\nSubset of dataset: ");
2018      System.out.println(num + " instances from " + (start + 1) 
2019                         + ". instance");
2020      secondInstances = new Instances(instances, start, num);
2021      System.out.println("\nClass name: "
2022                         + secondInstances.classAttribute().name());
2023
2024      // Print all instances and their weights (and the sum of weights).
2025      System.out.println("\nInstances and their weights:\n");
2026      System.out.println(secondInstances.instancesAndWeights());
2027      System.out.print("\nSum of weights: ");
2028      System.out.println(secondInstances.sumOfWeights());
2029     
2030      // Create and print training and test sets for 3-fold
2031      // cross-validation.
2032      System.out.println("\nTrain and test folds for 3-fold CV:");
2033      if (instances.classAttribute().isNominal()) {
2034        instances.stratify(3);
2035      }
2036      for (j = 0; j < 3; j++) {
2037        train = instances.trainCV(3,j, new Random(1));
2038        test = instances.testCV(3,j);
2039                     
2040        // Print all instances and their weights (and the sum of weights).
2041        System.out.println("\nTrain: ");
2042        System.out.println("\nInstances and their weights:\n");
2043        System.out.println(train.instancesAndWeights());
2044        System.out.print("\nSum of weights: ");
2045        System.out.println(train.sumOfWeights());
2046        System.out.println("\nClass name: "+train.classAttribute().name());
2047        System.out.println("\nTest: ");
2048        System.out.println("\nInstances and their weights:\n");
2049        System.out.println(test.instancesAndWeights());
2050        System.out.print("\nSum of weights: ");
2051        System.out.println(test.sumOfWeights());
2052        System.out.println("\nClass name: "+test.classAttribute().name());
2053      }
2054
2055      // Randomize instances and print them.
2056      System.out.println("\nRandomized dataset:");
2057      instances.randomize(random);
2058     
2059      // Print all instances and their weights (and the sum of weights).
2060      System.out.println("\nInstances and their weights:\n");
2061      System.out.println(instances.instancesAndWeights());
2062      System.out.print("\nSum of weights: ");
2063      System.out.println(instances.sumOfWeights());
2064
2065      // Sort instances according to first attribute and
2066      // print them.
2067      System.out.print("\nInstances sorted according to first attribute:\n ");
2068      instances.sort(0);
2069       
2070      // Print all instances and their weights (and the sum of weights).
2071      System.out.println("\nInstances and their weights:\n");
2072      System.out.println(instances.instancesAndWeights());
2073      System.out.print("\nSum of weights: ");
2074      System.out.println(instances.sumOfWeights());
2075    } catch (Exception e) {
2076      e.printStackTrace(); 
2077    }
2078  }
2079
2080  /**
2081   * Main method for this class. The following calls are possible:
2082   * <ul>
2083   *   <li>
2084   *     <code>weka.core.Instances</code> help<br/>
2085   *     prints a short list of possible commands.
2086   *   </li>
2087   *   <li>
2088   *     <code>weka.core.Instances</code> &lt;filename&gt;<br/>
2089   *     prints a summary of a set of instances.
2090   *   </li>
2091   *   <li>
2092   *     <code>weka.core.Instances</code> merge &lt;filename1&gt; &lt;filename2&gt;<br/>
2093   *     merges the two datasets (must have same number of instances) and
2094   *     outputs the results on stdout.
2095   *   </li>
2096   *   <li>
2097   *     <code>weka.core.Instances</code> append &lt;filename1&gt; &lt;filename2&gt;<br/>
2098   *     appends the second dataset to the first one (must have same headers) and
2099   *     outputs the results on stdout.
2100   *   </li>
2101   *   <li>
2102   *     <code>weka.core.Instances</code> headers &lt;filename1&gt; &lt;filename2&gt;<br/>
2103   *     Compares the headers of the two datasets and prints whether they match
2104   *     or not.
2105   *   </li>
2106   *   <li>
2107   *     <code>weka.core.Instances</code> randomize &lt;seed&gt; &lt;filename&gt;<br/>
2108   *     randomizes the dataset with the given seed and outputs the result on stdout.
2109   *   </li>
2110   * </ul>
2111   *
2112   * @param args        the commandline parameters
2113   */
2114  public static void main(String[] args) {
2115
2116    try {
2117      Instances i;
2118      // read from stdin and print statistics
2119      if (args.length == 0) {
2120        DataSource source = new DataSource(System.in);
2121        i = source.getDataSet();
2122        System.out.println(i.toSummaryString());
2123      }
2124      // read file and print statistics
2125      else if ((args.length == 1) && (!args[0].equals("-h")) && (!args[0].equals("help"))) {
2126        DataSource source = new DataSource(args[0]);
2127        i = source.getDataSet();
2128        System.out.println(i.toSummaryString());
2129      }
2130      // read two files, merge them and print result to stdout
2131      else if ((args.length == 3) && (args[0].toLowerCase().equals("merge"))) {
2132        DataSource source1 = new DataSource(args[1]);
2133        DataSource source2 = new DataSource(args[2]);
2134        i = Instances.mergeInstances(source1.getDataSet(), source2.getDataSet());
2135        System.out.println(i);
2136      }
2137      // read two files, append them and print result to stdout
2138      else if ((args.length == 3) && (args[0].toLowerCase().equals("append"))) {
2139        DataSource source1 = new DataSource(args[1]);
2140        DataSource source2 = new DataSource(args[2]);
2141        String msg = source1.getStructure().equalHeadersMsg(source2.getStructure());
2142        if (msg != null)
2143          throw new Exception("The two datasets have different headers:\n" + msg);
2144        Instances structure = source1.getStructure();
2145        System.out.println(source1.getStructure());
2146        while (source1.hasMoreElements(structure))
2147          System.out.println(source1.nextElement(structure));
2148        structure = source2.getStructure();
2149        while (source2.hasMoreElements(structure))
2150          System.out.println(source2.nextElement(structure));
2151      }
2152      // read two files and compare their headers
2153      else if ((args.length == 3) && (args[0].toLowerCase().equals("headers"))) {
2154        DataSource source1 = new DataSource(args[1]);
2155        DataSource source2 = new DataSource(args[2]);
2156        String msg = source1.getStructure().equalHeadersMsg(source2.getStructure());
2157        if (msg == null)
2158          System.out.println("Headers match");
2159        else
2160          System.out.println("Headers don't match:\n" + msg);
2161      }
2162      // read file and seed value, randomize data and print result to stdout
2163      else if ((args.length == 3) && (args[0].toLowerCase().equals("randomize"))) {
2164        DataSource source = new DataSource(args[2]);
2165        i = source.getDataSet();
2166        i.randomize(new Random(Integer.parseInt(args[1])));
2167        System.out.println(i);
2168      }
2169      // wrong parameters or help
2170      else {
2171        System.err.println(
2172            "\nUsage:\n"
2173            // help
2174            + "\tweka.core.Instances help\n"
2175            + "\t\tPrints this help\n"
2176            // stats
2177            + "\tweka.core.Instances <filename>\n"
2178            + "\t\tOutputs dataset statistics\n"
2179            // merge
2180            + "\tweka.core.Instances merge <filename1> <filename2>\n"
2181            + "\t\tMerges the datasets (must have same number of rows).\n"
2182            + "\t\tGenerated dataset gets output on stdout.\n"
2183            // append
2184            + "\tweka.core.Instances append <filename1> <filename2>\n"
2185            + "\t\tAppends the second dataset to the first (must have same number of attributes).\n"
2186            + "\t\tGenerated dataset gets output on stdout.\n"
2187            // headers
2188            + "\tweka.core.Instances headers <filename1> <filename2>\n"
2189            + "\t\tCompares the structure of the two datasets and outputs whether they\n"
2190            + "\t\tdiffer or not.\n"
2191            // randomize
2192            + "\tweka.core.Instances randomize <seed> <filename>\n"
2193            + "\t\tRandomizes the dataset and outputs it on stdout.\n"
2194        );
2195      }
2196    }
2197    catch (Exception ex) {
2198      ex.printStackTrace();
2199      System.err.println(ex.getMessage());
2200    }
2201  }
2202 
2203  /**
2204   * Returns the revision string.
2205   *
2206   * @return            the revision
2207   */
2208  public String getRevision() {
2209    return RevisionUtils.extract("$Revision: 5987 $");
2210  }
2211}
Note: See TracBrowser for help on using the repository browser.