| 1 | /* | 
|---|
| 2 | *    This program is free software; you can redistribute it and/or modify | 
|---|
| 3 | *    it under the terms of the GNU General Public License as published by | 
|---|
| 4 | *    the Free Software Foundation; either version 2 of the License, or | 
|---|
| 5 | *    (at your option) any later version. | 
|---|
| 6 | * | 
|---|
| 7 | *    This program is distributed in the hope that it will be useful, | 
|---|
| 8 | *    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 9 | *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 10 | *    GNU General Public License for more details. | 
|---|
| 11 | * | 
|---|
| 12 | *    You should have received a copy of the GNU General Public License | 
|---|
| 13 | *    along with this program; if not, write to the Free Software | 
|---|
| 14 | *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 
|---|
| 15 | */ | 
|---|
| 16 |  | 
|---|
| 17 | /* | 
|---|
| 18 | *    Instances.java | 
|---|
| 19 | *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand | 
|---|
| 20 | * | 
|---|
| 21 | */ | 
|---|
| 22 |  | 
|---|
| 23 | package weka.core; | 
|---|
| 24 |  | 
|---|
| 25 | import weka.core.converters.ArffLoader.ArffReader; | 
|---|
| 26 | import weka.core.converters.ConverterUtils.DataSource; | 
|---|
| 27 |  | 
|---|
| 28 | import java.io.FileReader; | 
|---|
| 29 | import java.io.IOException; | 
|---|
| 30 | import java.io.Reader; | 
|---|
| 31 | import java.io.Serializable; | 
|---|
| 32 | import java.util.Enumeration; | 
|---|
| 33 | import java.util.Random; | 
|---|
| 34 | import java.util.List; | 
|---|
| 35 | import java.util.AbstractList; | 
|---|
| 36 | import java.util.ArrayList; | 
|---|
| 37 |  | 
|---|
| 38 | /** | 
|---|
| 39 | * Class for handling an ordered set of weighted instances. <p> | 
|---|
| 40 | * | 
|---|
| 41 | * Typical usage: <p> | 
|---|
| 42 | * <pre> | 
|---|
| 43 | * import weka.core.converters.ConverterUtils.DataSource; | 
|---|
| 44 | * ... | 
|---|
| 45 | * | 
|---|
| 46 | * // Read all the instances in the file (ARFF, CSV, XRFF, ...) | 
|---|
| 47 | * DataSource source = new DataSource(filename); | 
|---|
| 48 | * Instances instances = source.getDataSet(); | 
|---|
| 49 | * | 
|---|
| 50 | * // Make the last attribute be the class | 
|---|
| 51 | * instances.setClassIndex(instances.numAttributes() - 1); | 
|---|
| 52 | * | 
|---|
| 53 | * // Print header and instances. | 
|---|
| 54 | * System.out.println("\nDataset:\n"); | 
|---|
| 55 | * System.out.println(instances); | 
|---|
| 56 | * | 
|---|
| 57 | * ... | 
|---|
| 58 | * </pre><p> | 
|---|
| 59 | * | 
|---|
| 60 | * All methods that change a set of instances are safe, ie. a change | 
|---|
| 61 | * of a set of instances does not affect any other sets of | 
|---|
| 62 | * instances. All methods that change a datasets's attribute | 
|---|
| 63 | * information clone the dataset before it is changed. | 
|---|
| 64 | * | 
|---|
| 65 | * @author Eibe Frank (eibe@cs.waikato.ac.nz) | 
|---|
| 66 | * @author Len Trigg (trigg@cs.waikato.ac.nz) | 
|---|
| 67 | * @author FracPete (fracpete at waikato dot ac dot nz) | 
|---|
| 68 | * @version $Revision: 5987 $ | 
|---|
| 69 | */ | 
|---|
| 70 | public class Instances extends AbstractList<Instance> | 
|---|
| 71 | implements Serializable, RevisionHandler { | 
|---|
| 72 |  | 
|---|
| 73 | /** for serialization */ | 
|---|
| 74 | static final long serialVersionUID = -19412345060742748L; | 
|---|
| 75 |  | 
|---|
| 76 | /** The filename extension that should be used for arff files */ | 
|---|
| 77 | public final static String FILE_EXTENSION = ".arff"; | 
|---|
| 78 |  | 
|---|
| 79 | /** The filename extension that should be used for bin. serialized instances files */ | 
|---|
| 80 | public final static String SERIALIZED_OBJ_FILE_EXTENSION = ".bsi"; | 
|---|
| 81 |  | 
|---|
| 82 | /** The keyword used to denote the start of an arff header */ | 
|---|
| 83 | public final static String ARFF_RELATION = "@relation"; | 
|---|
| 84 |  | 
|---|
| 85 | /** The keyword used to denote the start of the arff data section */ | 
|---|
| 86 | public final static String ARFF_DATA = "@data"; | 
|---|
| 87 |  | 
|---|
| 88 | /** The dataset's name. */ | 
|---|
| 89 | protected /*@spec_public non_null@*/ String m_RelationName; | 
|---|
| 90 |  | 
|---|
| 91 | /** The attribute information. */ | 
|---|
| 92 | protected /*@spec_public non_null@*/ ArrayList<Attribute> m_Attributes; | 
|---|
| 93 | /*  public invariant (\forall int i; 0 <= i && i < m_Attributes.size(); | 
|---|
| 94 | m_Attributes.get(i) != null); | 
|---|
| 95 | */ | 
|---|
| 96 |  | 
|---|
| 97 | /** The instances. */ | 
|---|
| 98 | protected /*@spec_public non_null@*/ ArrayList<Instance> m_Instances; | 
|---|
| 99 |  | 
|---|
| 100 | /** The class attribute's index */ | 
|---|
| 101 | protected int m_ClassIndex; | 
|---|
| 102 | //@ protected invariant classIndex() == m_ClassIndex; | 
|---|
| 103 |  | 
|---|
| 104 | /** The lines read so far in case of incremental loading. Since the | 
|---|
| 105 | * StreamTokenizer will be re-initialized with every instance that is read, | 
|---|
| 106 | * we have to keep track of the number of lines read so far. | 
|---|
| 107 | * @see #readInstance(Reader) */ | 
|---|
| 108 | protected int m_Lines = 0; | 
|---|
| 109 |  | 
|---|
| 110 | /** | 
|---|
| 111 | * Reads an ARFF file from a reader, and assigns a weight of | 
|---|
| 112 | * one to each instance. Lets the index of the class | 
|---|
| 113 | * attribute be undefined (negative). | 
|---|
| 114 | * | 
|---|
| 115 | * @param reader the reader | 
|---|
| 116 | * @throws IOException if the ARFF file is not read | 
|---|
| 117 | * successfully | 
|---|
| 118 | */ | 
|---|
| 119 | public Instances(/*@non_null@*/Reader reader) throws IOException { | 
|---|
| 120 | ArffReader arff = new ArffReader(reader); | 
|---|
| 121 | Instances dataset = arff.getData(); | 
|---|
| 122 | initialize(dataset, dataset.numInstances()); | 
|---|
| 123 | dataset.copyInstances(0, this, dataset.numInstances()); | 
|---|
| 124 | compactify(); | 
|---|
| 125 | } | 
|---|
| 126 |  | 
|---|
| 127 | /** | 
|---|
| 128 | * Reads the header of an ARFF file from a reader and | 
|---|
| 129 | * reserves space for the given number of instances. Lets | 
|---|
| 130 | * the class index be undefined (negative). | 
|---|
| 131 | * | 
|---|
| 132 | * @param reader the reader | 
|---|
| 133 | * @param capacity the capacity | 
|---|
| 134 | * @throws IllegalArgumentException if the header is not read successfully | 
|---|
| 135 | * or the capacity is negative. | 
|---|
| 136 | * @throws IOException if there is a problem with the reader. | 
|---|
| 137 | * @deprecated instead of using this method in conjunction with the | 
|---|
| 138 | * <code>readInstance(Reader)</code> method, one should use the | 
|---|
| 139 | * <code>ArffLoader</code> or <code>DataSource</code> class instead. | 
|---|
| 140 | * @see weka.core.converters.ArffLoader | 
|---|
| 141 | * @see weka.core.converters.ConverterUtils.DataSource | 
|---|
| 142 | */ | 
|---|
| 143 | //@ requires capacity >= 0; | 
|---|
| 144 | //@ ensures classIndex() == -1; | 
|---|
| 145 | @Deprecated public Instances(/*@non_null@*/Reader reader, int capacity) | 
|---|
| 146 | throws IOException { | 
|---|
| 147 |  | 
|---|
| 148 | ArffReader arff = new ArffReader(reader, 0); | 
|---|
| 149 | Instances header = arff.getStructure(); | 
|---|
| 150 | initialize(header, capacity); | 
|---|
| 151 | m_Lines = arff.getLineNo(); | 
|---|
| 152 | } | 
|---|
| 153 |  | 
|---|
| 154 | /** | 
|---|
| 155 | * Constructor copying all instances and references to | 
|---|
| 156 | * the header information from the given set of instances. | 
|---|
| 157 | * | 
|---|
| 158 | * @param dataset the set to be copied | 
|---|
| 159 | */ | 
|---|
| 160 | public Instances(/*@non_null@*/Instances dataset) { | 
|---|
| 161 |  | 
|---|
| 162 | this(dataset, dataset.numInstances()); | 
|---|
| 163 |  | 
|---|
| 164 | dataset.copyInstances(0, this, dataset.numInstances()); | 
|---|
| 165 | } | 
|---|
| 166 |  | 
|---|
| 167 | /** | 
|---|
| 168 | * Constructor creating an empty set of instances. Copies references | 
|---|
| 169 | * to the header information from the given set of instances. Sets | 
|---|
| 170 | * the capacity of the set of instances to 0 if its negative. | 
|---|
| 171 | * | 
|---|
| 172 | * @param dataset the instances from which the header | 
|---|
| 173 | * information is to be taken | 
|---|
| 174 | * @param capacity the capacity of the new dataset | 
|---|
| 175 | */ | 
|---|
| 176 | public Instances(/*@non_null@*/Instances dataset, int capacity) { | 
|---|
| 177 | initialize(dataset, capacity); | 
|---|
| 178 | } | 
|---|
| 179 |  | 
|---|
| 180 | /** | 
|---|
| 181 | * initializes with the header information of the given dataset and sets | 
|---|
| 182 | * the capacity of the set of instances. | 
|---|
| 183 | * | 
|---|
| 184 | * @param dataset the dataset to use as template | 
|---|
| 185 | * @param capacity the number of rows to reserve | 
|---|
| 186 | */ | 
|---|
| 187 | protected void initialize(Instances dataset, int capacity) { | 
|---|
| 188 | if (capacity < 0) | 
|---|
| 189 | capacity = 0; | 
|---|
| 190 |  | 
|---|
| 191 | // Strings only have to be "shallow" copied because | 
|---|
| 192 | // they can't be modified. | 
|---|
| 193 | m_ClassIndex   = dataset.m_ClassIndex; | 
|---|
| 194 | m_RelationName = dataset.m_RelationName; | 
|---|
| 195 | m_Attributes   = dataset.m_Attributes; | 
|---|
| 196 | m_Instances    = new ArrayList<Instance>(capacity); | 
|---|
| 197 | } | 
|---|
| 198 |  | 
|---|
| 199 | /** | 
|---|
| 200 | * Creates a new set of instances by copying a | 
|---|
| 201 | * subset of another set. | 
|---|
| 202 | * | 
|---|
| 203 | * @param source the set of instances from which a subset | 
|---|
| 204 | * is to be created | 
|---|
| 205 | * @param first the index of the first instance to be copied | 
|---|
| 206 | * @param toCopy the number of instances to be copied | 
|---|
| 207 | * @throws IllegalArgumentException if first and toCopy are out of range | 
|---|
| 208 | */ | 
|---|
| 209 | //@ requires 0 <= first; | 
|---|
| 210 | //@ requires 0 <= toCopy; | 
|---|
| 211 | //@ requires first + toCopy <= source.numInstances(); | 
|---|
| 212 | public Instances(/*@non_null@*/Instances source, int first, int toCopy) { | 
|---|
| 213 |  | 
|---|
| 214 | this(source, toCopy); | 
|---|
| 215 |  | 
|---|
| 216 | if ((first < 0) || ((first + toCopy) > source.numInstances())) { | 
|---|
| 217 | throw new IllegalArgumentException("Parameters first and/or toCopy out "+ | 
|---|
| 218 | "of range"); | 
|---|
| 219 | } | 
|---|
| 220 | source.copyInstances(first, this, toCopy); | 
|---|
| 221 | } | 
|---|
| 222 |  | 
|---|
| 223 | /** | 
|---|
| 224 | * Creates an empty set of instances. Uses the given | 
|---|
| 225 | * attribute information. Sets the capacity of the set of | 
|---|
| 226 | * instances to 0 if its negative. Given attribute information | 
|---|
| 227 | * must not be changed after this constructor has been used. | 
|---|
| 228 | * | 
|---|
| 229 | * @param name the name of the relation | 
|---|
| 230 | * @param attInfo the attribute information | 
|---|
| 231 | * @param capacity the capacity of the set | 
|---|
| 232 | */ | 
|---|
| 233 | public Instances(/*@non_null@*/String name, | 
|---|
| 234 | /*@non_null@*/ArrayList<Attribute> attInfo, int capacity) { | 
|---|
| 235 |  | 
|---|
| 236 | m_RelationName = name; | 
|---|
| 237 | m_ClassIndex = -1; | 
|---|
| 238 | m_Attributes = attInfo; | 
|---|
| 239 | for (int i = 0; i < numAttributes(); i++) { | 
|---|
| 240 | attribute(i).setIndex(i); | 
|---|
| 241 | } | 
|---|
| 242 | m_Instances = new ArrayList<Instance>(capacity); | 
|---|
| 243 | } | 
|---|
| 244 |  | 
|---|
| 245 | /** | 
|---|
| 246 | * Create a copy of the structure if the data has string or | 
|---|
| 247 | * relational attributes, "cleanses" string types (i.e. doesn't | 
|---|
| 248 | * contain references to the strings seen in the past) and all | 
|---|
| 249 | * relational attributes. | 
|---|
| 250 | * | 
|---|
| 251 | * @return a copy of the instance structure. | 
|---|
| 252 | */ | 
|---|
| 253 | public Instances stringFreeStructure() { | 
|---|
| 254 |  | 
|---|
| 255 | ArrayList<Attribute> newAtts = new ArrayList<Attribute>(); | 
|---|
| 256 | for (int i = 0 ; i < m_Attributes.size(); i++) { | 
|---|
| 257 | Attribute att = (Attribute)m_Attributes.get(i); | 
|---|
| 258 | if (att.type() == Attribute.STRING) { | 
|---|
| 259 | newAtts.add(new Attribute(att.name(), (List<String>)null, i)); | 
|---|
| 260 | } else if (att.type() == Attribute.RELATIONAL) { | 
|---|
| 261 | newAtts.add(new Attribute(att.name(), new Instances(att.relation(), 0), i)); | 
|---|
| 262 | } | 
|---|
| 263 | } | 
|---|
| 264 | if (newAtts.size() == 0) { | 
|---|
| 265 | return new Instances(this, 0); | 
|---|
| 266 | } | 
|---|
| 267 | ArrayList<Attribute> atts = Utils.cast(m_Attributes.clone()); | 
|---|
| 268 | for (int i = 0; i < newAtts.size(); i++) { | 
|---|
| 269 | atts.set(((Attribute)newAtts.get(i)).index(), newAtts.get(i)); | 
|---|
| 270 | } | 
|---|
| 271 | Instances result = new Instances(this, 0); | 
|---|
| 272 | result.m_Attributes = atts; | 
|---|
| 273 | return result; | 
|---|
| 274 | } | 
|---|
| 275 |  | 
|---|
| 276 | /** | 
|---|
| 277 | * Adds one instance to the end of the set. | 
|---|
| 278 | * Shallow copies instance before it is added. Increases the | 
|---|
| 279 | * size of the dataset if it is not large enough. Does not | 
|---|
| 280 | * check if the instance is compatible with the dataset. | 
|---|
| 281 | * Note: String or relational values are not transferred. | 
|---|
| 282 | * | 
|---|
| 283 | * @param instance the instance to be added | 
|---|
| 284 | */ | 
|---|
| 285 | public boolean add(/*@non_null@*/ Instance instance) { | 
|---|
| 286 |  | 
|---|
| 287 | Instance newInstance = (Instance)instance.copy(); | 
|---|
| 288 |  | 
|---|
| 289 | newInstance.setDataset(this); | 
|---|
| 290 | m_Instances.add(newInstance); | 
|---|
| 291 |  | 
|---|
| 292 | return true; | 
|---|
| 293 | } | 
|---|
| 294 |  | 
|---|
| 295 | /** | 
|---|
| 296 | * Adds one instance to the end of the set. | 
|---|
| 297 | * Shallow copies instance before it is added. Increases the | 
|---|
| 298 | * size of the dataset if it is not large enough. Does not | 
|---|
| 299 | * check if the instance is compatible with the dataset. | 
|---|
| 300 | * Note: String or relational values are not transferred. | 
|---|
| 301 | * | 
|---|
| 302 | * @param index position where instance is to be inserted | 
|---|
| 303 | * @param instance the instance to be added | 
|---|
| 304 | */ | 
|---|
| 305 | //@ requires 0 <= index; | 
|---|
| 306 | //@ requires index < m_Instances.size(); | 
|---|
| 307 | public void add(int index, /*@non_null@*/ Instance instance) { | 
|---|
| 308 |  | 
|---|
| 309 | Instance newInstance = (Instance)instance.copy(); | 
|---|
| 310 |  | 
|---|
| 311 | newInstance.setDataset(this); | 
|---|
| 312 | m_Instances.add(index, newInstance); | 
|---|
| 313 | } | 
|---|
| 314 |  | 
|---|
| 315 | /** | 
|---|
| 316 | * Returns an attribute. | 
|---|
| 317 | * | 
|---|
| 318 | * @param index the attribute's index (index starts with 0) | 
|---|
| 319 | * @return the attribute at the given position | 
|---|
| 320 | */ | 
|---|
| 321 | //@ requires 0 <= index; | 
|---|
| 322 | //@ requires index < m_Attributes.size(); | 
|---|
| 323 | //@ ensures \result != null; | 
|---|
| 324 | public /*@pure@*/ Attribute attribute(int index) { | 
|---|
| 325 |  | 
|---|
| 326 | return (Attribute) m_Attributes.get(index); | 
|---|
| 327 | } | 
|---|
| 328 |  | 
|---|
| 329 | /** | 
|---|
| 330 | * Returns an attribute given its name. If there is more than | 
|---|
| 331 | * one attribute with the same name, it returns the first one. | 
|---|
| 332 | * Returns null if the attribute can't be found. | 
|---|
| 333 | * | 
|---|
| 334 | * @param name the attribute's name | 
|---|
| 335 | * @return the attribute with the given name, null if the | 
|---|
| 336 | * attribute can't be found | 
|---|
| 337 | */ | 
|---|
| 338 | public /*@pure@*/ Attribute attribute(String name) { | 
|---|
| 339 |  | 
|---|
| 340 | for (int i = 0; i < numAttributes(); i++) { | 
|---|
| 341 | if (attribute(i).name().equals(name)) { | 
|---|
| 342 | return attribute(i); | 
|---|
| 343 | } | 
|---|
| 344 | } | 
|---|
| 345 | return null; | 
|---|
| 346 | } | 
|---|
| 347 |  | 
|---|
| 348 | /** | 
|---|
| 349 | * Checks for attributes of the given type in the dataset | 
|---|
| 350 | * | 
|---|
| 351 | * @param attType  the attribute type to look for | 
|---|
| 352 | * @return         true if attributes of the given type are present | 
|---|
| 353 | */ | 
|---|
| 354 | public boolean checkForAttributeType(int attType) { | 
|---|
| 355 |  | 
|---|
| 356 | int i = 0; | 
|---|
| 357 |  | 
|---|
| 358 | while (i < m_Attributes.size()) { | 
|---|
| 359 | if (attribute(i++).type() == attType) { | 
|---|
| 360 | return true; | 
|---|
| 361 | } | 
|---|
| 362 | } | 
|---|
| 363 | return false; | 
|---|
| 364 | } | 
|---|
| 365 |  | 
|---|
| 366 | /** | 
|---|
| 367 | * Checks for string attributes in the dataset | 
|---|
| 368 | * | 
|---|
| 369 | * @return true if string attributes are present, false otherwise | 
|---|
| 370 | */ | 
|---|
| 371 | public /*@pure@*/ boolean checkForStringAttributes() { | 
|---|
| 372 | return checkForAttributeType(Attribute.STRING); | 
|---|
| 373 | } | 
|---|
| 374 |  | 
|---|
| 375 | /** | 
|---|
| 376 | * Checks if the given instance is compatible | 
|---|
| 377 | * with this dataset. Only looks at the size of | 
|---|
| 378 | * the instance and the ranges of the values for | 
|---|
| 379 | * nominal and string attributes. | 
|---|
| 380 | * | 
|---|
| 381 | * @param instance the instance to check | 
|---|
| 382 | * @return true if the instance is compatible with the dataset | 
|---|
| 383 | */ | 
|---|
| 384 | public /*@pure@*/ boolean checkInstance(Instance instance) { | 
|---|
| 385 |  | 
|---|
| 386 | if (instance.numAttributes() != numAttributes()) { | 
|---|
| 387 | return false; | 
|---|
| 388 | } | 
|---|
| 389 | for (int i = 0; i < numAttributes(); i++) { | 
|---|
| 390 | if (instance.isMissing(i)) { | 
|---|
| 391 | continue; | 
|---|
| 392 | } else if (attribute(i).isNominal() || | 
|---|
| 393 | attribute(i).isString()) { | 
|---|
| 394 | if (!(Utils.eq(instance.value(i), | 
|---|
| 395 | (double)(int)instance.value(i)))) { | 
|---|
| 396 | return false; | 
|---|
| 397 | } else if (Utils.sm(instance.value(i), 0) || | 
|---|
| 398 | Utils.gr(instance.value(i), | 
|---|
| 399 | attribute(i).numValues())) { | 
|---|
| 400 | return false; | 
|---|
| 401 | } | 
|---|
| 402 | } | 
|---|
| 403 | } | 
|---|
| 404 | return true; | 
|---|
| 405 | } | 
|---|
| 406 |  | 
|---|
| 407 | /** | 
|---|
| 408 | * Returns the class attribute. | 
|---|
| 409 | * | 
|---|
| 410 | * @return the class attribute | 
|---|
| 411 | * @throws UnassignedClassException if the class is not set | 
|---|
| 412 | */ | 
|---|
| 413 | //@ requires classIndex() >= 0; | 
|---|
| 414 | public /*@pure@*/ Attribute classAttribute() { | 
|---|
| 415 |  | 
|---|
| 416 | if (m_ClassIndex < 0) { | 
|---|
| 417 | throw new UnassignedClassException("Class index is negative (not set)!"); | 
|---|
| 418 | } | 
|---|
| 419 | return attribute(m_ClassIndex); | 
|---|
| 420 | } | 
|---|
| 421 |  | 
|---|
| 422 | /** | 
|---|
| 423 | * Returns the class attribute's index. Returns negative number | 
|---|
| 424 | * if it's undefined. | 
|---|
| 425 | * | 
|---|
| 426 | * @return the class index as an integer | 
|---|
| 427 | */ | 
|---|
| 428 | // ensures \result == m_ClassIndex; | 
|---|
| 429 | public /*@pure@*/ int classIndex() { | 
|---|
| 430 |  | 
|---|
| 431 | return m_ClassIndex; | 
|---|
| 432 | } | 
|---|
| 433 |  | 
|---|
| 434 | /** | 
|---|
| 435 | * Compactifies the set of instances. Decreases the capacity of | 
|---|
| 436 | * the set so that it matches the number of instances in the set. | 
|---|
| 437 | */ | 
|---|
| 438 | public void compactify() { | 
|---|
| 439 |  | 
|---|
| 440 | m_Instances.trimToSize(); | 
|---|
| 441 | } | 
|---|
| 442 |  | 
|---|
| 443 | /** | 
|---|
| 444 | * Removes all instances from the set. | 
|---|
| 445 | */ | 
|---|
| 446 | public void delete() { | 
|---|
| 447 |  | 
|---|
| 448 | m_Instances = new ArrayList<Instance>(); | 
|---|
| 449 | } | 
|---|
| 450 |  | 
|---|
| 451 | /** | 
|---|
| 452 | * Removes an instance at the given position from the set. | 
|---|
| 453 | * | 
|---|
| 454 | * @param index the instance's position (index starts with 0) | 
|---|
| 455 | */ | 
|---|
| 456 | //@ requires 0 <= index && index < numInstances(); | 
|---|
| 457 | public void delete(int index) { | 
|---|
| 458 |  | 
|---|
| 459 | m_Instances.remove(index); | 
|---|
| 460 | } | 
|---|
| 461 |  | 
|---|
| 462 | /** | 
|---|
| 463 | * Deletes an attribute at the given position | 
|---|
| 464 | * (0 to numAttributes() - 1). A deep copy of the attribute | 
|---|
| 465 | * information is performed before the attribute is deleted. | 
|---|
| 466 | * | 
|---|
| 467 | * @param position the attribute's position (position starts with 0) | 
|---|
| 468 | * @throws IllegalArgumentException if the given index is out of range | 
|---|
| 469 | *            or the class attribute is being deleted | 
|---|
| 470 | */ | 
|---|
| 471 | //@ requires 0 <= position && position < numAttributes(); | 
|---|
| 472 | //@ requires position != classIndex(); | 
|---|
| 473 | public void deleteAttributeAt(int position) { | 
|---|
| 474 |  | 
|---|
| 475 | if ((position < 0) || (position >= m_Attributes.size())) { | 
|---|
| 476 | throw new IllegalArgumentException("Index out of range"); | 
|---|
| 477 | } | 
|---|
| 478 | if (position == m_ClassIndex) { | 
|---|
| 479 | throw new IllegalArgumentException("Can't delete class attribute"); | 
|---|
| 480 | } | 
|---|
| 481 | freshAttributeInfo(); | 
|---|
| 482 | if (m_ClassIndex > position) { | 
|---|
| 483 | m_ClassIndex--; | 
|---|
| 484 | } | 
|---|
| 485 | m_Attributes.remove(position); | 
|---|
| 486 | for (int i = position; i < m_Attributes.size(); i++) { | 
|---|
| 487 | Attribute current = (Attribute)m_Attributes.get(i); | 
|---|
| 488 | current.setIndex(current.index() - 1); | 
|---|
| 489 | } | 
|---|
| 490 | for (int i = 0; i < numInstances(); i++) { | 
|---|
| 491 | instance(i).setDataset(null); | 
|---|
| 492 | instance(i).deleteAttributeAt(position); | 
|---|
| 493 | instance(i).setDataset(this); | 
|---|
| 494 | } | 
|---|
| 495 | } | 
|---|
| 496 |  | 
|---|
| 497 | /** | 
|---|
| 498 | * Deletes all attributes of the given type in the dataset. A deep copy of | 
|---|
| 499 | * the attribute information is performed before an attribute is deleted. | 
|---|
| 500 | * | 
|---|
| 501 | * @param attType the attribute type to delete | 
|---|
| 502 | * @throws IllegalArgumentException if attribute couldn't be | 
|---|
| 503 | * successfully deleted (probably because it is the class attribute). | 
|---|
| 504 | */ | 
|---|
| 505 | public void deleteAttributeType(int attType) { | 
|---|
| 506 | int i = 0; | 
|---|
| 507 | while (i < m_Attributes.size()) { | 
|---|
| 508 | if (attribute(i).type() == attType) { | 
|---|
| 509 | deleteAttributeAt(i); | 
|---|
| 510 | } else { | 
|---|
| 511 | i++; | 
|---|
| 512 | } | 
|---|
| 513 | } | 
|---|
| 514 | } | 
|---|
| 515 |  | 
|---|
| 516 | /** | 
|---|
| 517 | * Deletes all string attributes in the dataset. A deep copy of the attribute | 
|---|
| 518 | * information is performed before an attribute is deleted. | 
|---|
| 519 | * | 
|---|
| 520 | * @throws IllegalArgumentException if string attribute couldn't be | 
|---|
| 521 | * successfully deleted (probably because it is the class attribute). | 
|---|
| 522 | * @see #deleteAttributeType(int) | 
|---|
| 523 | */ | 
|---|
| 524 | public void deleteStringAttributes() { | 
|---|
| 525 | deleteAttributeType(Attribute.STRING); | 
|---|
| 526 | } | 
|---|
| 527 |  | 
|---|
| 528 | /** | 
|---|
| 529 | * Removes all instances with missing values for a particular | 
|---|
| 530 | * attribute from the dataset. | 
|---|
| 531 | * | 
|---|
| 532 | * @param attIndex the attribute's index (index starts with 0) | 
|---|
| 533 | */ | 
|---|
| 534 | //@ requires 0 <= attIndex && attIndex < numAttributes(); | 
|---|
| 535 | public void deleteWithMissing(int attIndex) { | 
|---|
| 536 |  | 
|---|
| 537 | ArrayList<Instance> newInstances = new ArrayList<Instance>(numInstances()); | 
|---|
| 538 |  | 
|---|
| 539 | for (int i = 0; i < numInstances(); i++) { | 
|---|
| 540 | if (!instance(i).isMissing(attIndex)) { | 
|---|
| 541 | newInstances.add(instance(i)); | 
|---|
| 542 | } | 
|---|
| 543 | } | 
|---|
| 544 | m_Instances = newInstances; | 
|---|
| 545 | } | 
|---|
| 546 |  | 
|---|
| 547 | /** | 
|---|
| 548 | * Removes all instances with missing values for a particular | 
|---|
| 549 | * attribute from the dataset. | 
|---|
| 550 | * | 
|---|
| 551 | * @param att the attribute | 
|---|
| 552 | */ | 
|---|
| 553 | public void deleteWithMissing(/*@non_null@*/ Attribute att) { | 
|---|
| 554 |  | 
|---|
| 555 | deleteWithMissing(att.index()); | 
|---|
| 556 | } | 
|---|
| 557 |  | 
|---|
| 558 | /** | 
|---|
| 559 | * Removes all instances with a missing class value | 
|---|
| 560 | * from the dataset. | 
|---|
| 561 | * | 
|---|
| 562 | * @throws UnassignedClassException if class is not set | 
|---|
| 563 | */ | 
|---|
| 564 | public void deleteWithMissingClass() { | 
|---|
| 565 |  | 
|---|
| 566 | if (m_ClassIndex < 0) { | 
|---|
| 567 | throw new UnassignedClassException("Class index is negative (not set)!"); | 
|---|
| 568 | } | 
|---|
| 569 | deleteWithMissing(m_ClassIndex); | 
|---|
| 570 | } | 
|---|
| 571 |  | 
|---|
| 572 | /** | 
|---|
| 573 | * Returns an enumeration of all the attributes. | 
|---|
| 574 | * | 
|---|
| 575 | * @return enumeration of all the attributes. | 
|---|
| 576 | */ | 
|---|
| 577 | public /*@non_null pure@*/ Enumeration enumerateAttributes() { | 
|---|
| 578 |  | 
|---|
| 579 | return new WekaEnumeration(m_Attributes, m_ClassIndex); | 
|---|
| 580 | } | 
|---|
| 581 |  | 
|---|
| 582 | /** | 
|---|
| 583 | * Returns an enumeration of all instances in the dataset. | 
|---|
| 584 | * | 
|---|
| 585 | * @return enumeration of all instances in the dataset | 
|---|
| 586 | */ | 
|---|
| 587 | public /*@non_null pure@*/ Enumeration enumerateInstances() { | 
|---|
| 588 |  | 
|---|
| 589 | return new WekaEnumeration(m_Instances); | 
|---|
| 590 | } | 
|---|
| 591 |  | 
|---|
| 592 | /** | 
|---|
| 593 | * Checks if two headers are equivalent. If not, then returns a message why | 
|---|
| 594 | * they differ. | 
|---|
| 595 | * | 
|---|
| 596 | * @param dataset     another dataset | 
|---|
| 597 | * @return            null if the header of the given dataset is equivalent | 
|---|
| 598 | *                    to this header, otherwise a message with details on | 
|---|
| 599 | *                    why they differ | 
|---|
| 600 | */ | 
|---|
| 601 | public String equalHeadersMsg(Instances dataset) { | 
|---|
| 602 | // Check class and all attributes | 
|---|
| 603 | if (m_ClassIndex != dataset.m_ClassIndex) | 
|---|
| 604 | return "Class index differ: " + (m_ClassIndex+1) + " != " + (dataset.m_ClassIndex+1); | 
|---|
| 605 |  | 
|---|
| 606 | if (m_Attributes.size() != dataset.m_Attributes.size()) | 
|---|
| 607 | return "Different number of attributes: " + m_Attributes.size() + " != " + dataset.m_Attributes.size(); | 
|---|
| 608 |  | 
|---|
| 609 | for (int i = 0; i < m_Attributes.size(); i++) { | 
|---|
| 610 | String msg = attribute(i).equalsMsg(dataset.attribute(i)); | 
|---|
| 611 | if (msg != null) | 
|---|
| 612 | return "Attributes differ at position " + (i+1) + ":\n" + msg; | 
|---|
| 613 | } | 
|---|
| 614 |  | 
|---|
| 615 | return null; | 
|---|
| 616 | } | 
|---|
| 617 |  | 
|---|
| 618 | /** | 
|---|
| 619 | * Checks if two headers are equivalent. | 
|---|
| 620 | * | 
|---|
| 621 | * @param dataset another dataset | 
|---|
| 622 | * @return true if the header of the given dataset is equivalent | 
|---|
| 623 | * to this header | 
|---|
| 624 | */ | 
|---|
| 625 | public /*@pure@*/ boolean equalHeaders(Instances dataset){ | 
|---|
| 626 | return (equalHeadersMsg(dataset) == null); | 
|---|
| 627 | } | 
|---|
| 628 |  | 
|---|
| 629 | /** | 
|---|
| 630 | * Returns the first instance in the set. | 
|---|
| 631 | * | 
|---|
| 632 | * @return the first instance in the set | 
|---|
| 633 | */ | 
|---|
| 634 | //@ requires numInstances() > 0; | 
|---|
| 635 | public /*@non_null pure@*/ Instance firstInstance() { | 
|---|
| 636 |  | 
|---|
| 637 | return (Instance)m_Instances.get(0); | 
|---|
| 638 | } | 
|---|
| 639 |  | 
|---|
| 640 | /** | 
|---|
| 641 | * Returns a random number generator. The initial seed of the random | 
|---|
| 642 | * number generator depends on the given seed and the hash code of | 
|---|
| 643 | * a string representation of a instances chosen based on the given | 
|---|
| 644 | * seed. | 
|---|
| 645 | * | 
|---|
| 646 | * @param seed the given seed | 
|---|
| 647 | * @return the random number generator | 
|---|
| 648 | */ | 
|---|
| 649 | public Random getRandomNumberGenerator(long seed) { | 
|---|
| 650 |  | 
|---|
| 651 | Random r = new Random(seed); | 
|---|
| 652 | r.setSeed(instance(r.nextInt(numInstances())).toStringNoWeight().hashCode() + seed); | 
|---|
| 653 | return r; | 
|---|
| 654 | } | 
|---|
| 655 |  | 
|---|
| 656 | /** | 
|---|
| 657 | * Inserts an attribute at the given position (0 to | 
|---|
| 658 | * numAttributes()) and sets all values to be missing. | 
|---|
| 659 | * Shallow copies the attribute before it is inserted, and performs | 
|---|
| 660 | * a deep copy of the existing attribute information. | 
|---|
| 661 | * | 
|---|
| 662 | * @param att the attribute to be inserted | 
|---|
| 663 | * @param position the attribute's position (position starts with 0) | 
|---|
| 664 | * @throws IllegalArgumentException if the given index is out of range | 
|---|
| 665 | */ | 
|---|
| 666 | //@ requires 0 <= position; | 
|---|
| 667 | //@ requires position <= numAttributes(); | 
|---|
| 668 | public void insertAttributeAt(/*@non_null@*/ Attribute att, int position) { | 
|---|
| 669 |  | 
|---|
| 670 | if ((position < 0) || | 
|---|
| 671 | (position > m_Attributes.size())) { | 
|---|
| 672 | throw new IllegalArgumentException("Index out of range"); | 
|---|
| 673 | } | 
|---|
| 674 | att = (Attribute)att.copy(); | 
|---|
| 675 | freshAttributeInfo(); | 
|---|
| 676 | att.setIndex(position); | 
|---|
| 677 | m_Attributes.add(position, att); | 
|---|
| 678 | for (int i = position + 1; i < m_Attributes.size(); i++) { | 
|---|
| 679 | Attribute current = (Attribute)m_Attributes.get(i); | 
|---|
| 680 | current.setIndex(current.index() + 1); | 
|---|
| 681 | } | 
|---|
| 682 | for (int i = 0; i < numInstances(); i++) { | 
|---|
| 683 | instance(i).setDataset(null); | 
|---|
| 684 | instance(i).insertAttributeAt(position); | 
|---|
| 685 | instance(i).setDataset(this); | 
|---|
| 686 | } | 
|---|
| 687 | if (m_ClassIndex >= position) { | 
|---|
| 688 | m_ClassIndex++; | 
|---|
| 689 | } | 
|---|
| 690 | } | 
|---|
| 691 |  | 
|---|
| 692 | /** | 
|---|
| 693 | * Returns the instance at the given position. | 
|---|
| 694 | * | 
|---|
| 695 | * @param index the instance's index (index starts with 0) | 
|---|
| 696 | * @return the instance at the given position | 
|---|
| 697 | */ | 
|---|
| 698 | //@ requires 0 <= index; | 
|---|
| 699 | //@ requires index < numInstances(); | 
|---|
| 700 | public /*@non_null pure@*/ Instance instance(int index) { | 
|---|
| 701 |  | 
|---|
| 702 | return m_Instances.get(index); | 
|---|
| 703 | } | 
|---|
| 704 |  | 
|---|
| 705 | /** | 
|---|
| 706 | * Returns the instance at the given position. | 
|---|
| 707 | * | 
|---|
| 708 | * @param index the instance's index (index starts with 0) | 
|---|
| 709 | * @return the instance at the given position | 
|---|
| 710 | */ | 
|---|
| 711 | //@ requires 0 <= index; | 
|---|
| 712 | //@ requires index < numInstances(); | 
|---|
| 713 | public /*@non_null pure@*/ Instance get(int index) { | 
|---|
| 714 |  | 
|---|
| 715 | return m_Instances.get(index); | 
|---|
| 716 | } | 
|---|
| 717 |  | 
|---|
| 718 | /** | 
|---|
| 719 | * Returns the kth-smallest attribute value of a numeric attribute. | 
|---|
| 720 | * Note that calling this method will change the order of the data! | 
|---|
| 721 | * | 
|---|
| 722 | * @param att the Attribute object | 
|---|
| 723 | * @param k the value of k | 
|---|
| 724 | * @return the kth-smallest value | 
|---|
| 725 | */ | 
|---|
| 726 | public double kthSmallestValue(Attribute att, int k) { | 
|---|
| 727 |  | 
|---|
| 728 | return kthSmallestValue(att.index(), k); | 
|---|
| 729 | } | 
|---|
| 730 |  | 
|---|
| 731 | /** | 
|---|
| 732 | * Returns the kth-smallest attribute value of a numeric attribute. | 
|---|
| 733 | * Note that calling this method will change the order of the data! | 
|---|
| 734 | * The number of non-missing values in the data must be as least | 
|---|
| 735 | * as last as k for this to work. | 
|---|
| 736 | * | 
|---|
| 737 | * @param attIndex the attribute's index | 
|---|
| 738 | * @param k the value of k | 
|---|
| 739 | * @return the kth-smallest value | 
|---|
| 740 | */ | 
|---|
| 741 | public double kthSmallestValue(int attIndex, int k) { | 
|---|
| 742 |  | 
|---|
| 743 | if (!attribute(attIndex).isNumeric()) { | 
|---|
| 744 | throw new IllegalArgumentException("Instances: attribute must be numeric to compute kth-smallest value."); | 
|---|
| 745 | } | 
|---|
| 746 |  | 
|---|
| 747 | int i,j; | 
|---|
| 748 |  | 
|---|
| 749 | // move all instances with missing values to end | 
|---|
| 750 | j = numInstances() - 1; | 
|---|
| 751 | i = 0; | 
|---|
| 752 | while (i <= j) { | 
|---|
| 753 | if (instance(j).isMissing(attIndex)) { | 
|---|
| 754 | j--; | 
|---|
| 755 | } else { | 
|---|
| 756 | if (instance(i).isMissing(attIndex)) { | 
|---|
| 757 | swap(i,j); | 
|---|
| 758 | j--; | 
|---|
| 759 | } | 
|---|
| 760 | i++; | 
|---|
| 761 | } | 
|---|
| 762 | } | 
|---|
| 763 |  | 
|---|
| 764 | if ((k < 1) || (k > j+1)) { | 
|---|
| 765 | throw new IllegalArgumentException("Instances: value for k for computing kth-smallest value too large."); | 
|---|
| 766 | } | 
|---|
| 767 |  | 
|---|
| 768 | return instance(select(attIndex, 0, j, k)).value(attIndex); | 
|---|
| 769 | } | 
|---|
| 770 |  | 
|---|
| 771 | /** | 
|---|
| 772 | * Returns the last instance in the set. | 
|---|
| 773 | * | 
|---|
| 774 | * @return the last instance in the set | 
|---|
| 775 | */ | 
|---|
| 776 | //@ requires numInstances() > 0; | 
|---|
| 777 | public /*@non_null pure@*/ Instance lastInstance() { | 
|---|
| 778 |  | 
|---|
| 779 | return (Instance)m_Instances.get(m_Instances.size() - 1); | 
|---|
| 780 | } | 
|---|
| 781 |  | 
|---|
| 782 | /** | 
|---|
| 783 | * Returns the mean (mode) for a numeric (nominal) attribute as | 
|---|
| 784 | * a floating-point value. Returns 0 if the attribute is neither nominal nor | 
|---|
| 785 | * numeric. If all values are missing it returns zero. | 
|---|
| 786 | * | 
|---|
| 787 | * @param attIndex the attribute's index (index starts with 0) | 
|---|
| 788 | * @return the mean or the mode | 
|---|
| 789 | */ | 
|---|
| 790 | public /*@pure@*/ double meanOrMode(int attIndex) { | 
|---|
| 791 |  | 
|---|
| 792 | double result, found; | 
|---|
| 793 | int [] counts; | 
|---|
| 794 |  | 
|---|
| 795 | if (attribute(attIndex).isNumeric()) { | 
|---|
| 796 | result = found = 0; | 
|---|
| 797 | for (int j = 0; j < numInstances(); j++) { | 
|---|
| 798 | if (!instance(j).isMissing(attIndex)) { | 
|---|
| 799 | found += instance(j).weight(); | 
|---|
| 800 | result += instance(j).weight()*instance(j).value(attIndex); | 
|---|
| 801 | } | 
|---|
| 802 | } | 
|---|
| 803 | if (found <= 0) { | 
|---|
| 804 | return 0; | 
|---|
| 805 | } else { | 
|---|
| 806 | return result / found; | 
|---|
| 807 | } | 
|---|
| 808 | } else if (attribute(attIndex).isNominal()) { | 
|---|
| 809 | counts = new int[attribute(attIndex).numValues()]; | 
|---|
| 810 | for (int j = 0; j < numInstances(); j++) { | 
|---|
| 811 | if (!instance(j).isMissing(attIndex)) { | 
|---|
| 812 | counts[(int) instance(j).value(attIndex)] += instance(j).weight(); | 
|---|
| 813 | } | 
|---|
| 814 | } | 
|---|
| 815 | return (double)Utils.maxIndex(counts); | 
|---|
| 816 | } else { | 
|---|
| 817 | return 0; | 
|---|
| 818 | } | 
|---|
| 819 | } | 
|---|
| 820 |  | 
|---|
| 821 | /** | 
|---|
| 822 | * Returns the mean (mode) for a numeric (nominal) attribute as a | 
|---|
| 823 | * floating-point value.  Returns 0 if the attribute is neither | 
|---|
| 824 | * nominal nor numeric.  If all values are missing it returns zero. | 
|---|
| 825 | * | 
|---|
| 826 | * @param att the attribute | 
|---|
| 827 | * @return the mean or the mode | 
|---|
| 828 | */ | 
|---|
| 829 | public /*@pure@*/ double meanOrMode(Attribute att) { | 
|---|
| 830 |  | 
|---|
| 831 | return meanOrMode(att.index()); | 
|---|
| 832 | } | 
|---|
| 833 |  | 
|---|
| 834 | /** | 
|---|
| 835 | * Returns the number of attributes. | 
|---|
| 836 | * | 
|---|
| 837 | * @return the number of attributes as an integer | 
|---|
| 838 | */ | 
|---|
| 839 | //@ ensures \result == m_Attributes.size(); | 
|---|
| 840 | public /*@pure@*/ int numAttributes() { | 
|---|
| 841 |  | 
|---|
| 842 | return m_Attributes.size(); | 
|---|
| 843 | } | 
|---|
| 844 |  | 
|---|
| 845 | /** | 
|---|
| 846 | * Returns the number of class labels. | 
|---|
| 847 | * | 
|---|
| 848 | * @return the number of class labels as an integer if the class | 
|---|
| 849 | * attribute is nominal, 1 otherwise. | 
|---|
| 850 | * @throws UnassignedClassException if the class is not set | 
|---|
| 851 | */ | 
|---|
| 852 | //@ requires classIndex() >= 0; | 
|---|
| 853 | public /*@pure@*/ int numClasses() { | 
|---|
| 854 |  | 
|---|
| 855 | if (m_ClassIndex < 0) { | 
|---|
| 856 | throw new UnassignedClassException("Class index is negative (not set)!"); | 
|---|
| 857 | } | 
|---|
| 858 | if (!classAttribute().isNominal()) { | 
|---|
| 859 | return 1; | 
|---|
| 860 | } else { | 
|---|
| 861 | return classAttribute().numValues(); | 
|---|
| 862 | } | 
|---|
| 863 | } | 
|---|
| 864 |  | 
|---|
| 865 | /** | 
|---|
| 866 | * Returns the number of distinct values of a given attribute. | 
|---|
| 867 | * Returns the number of instances if the attribute is a | 
|---|
| 868 | * string attribute. The value 'missing' is not counted. | 
|---|
| 869 | * | 
|---|
| 870 | * @param attIndex the attribute (index starts with 0) | 
|---|
| 871 | * @return the number of distinct values of a given attribute | 
|---|
| 872 | */ | 
|---|
| 873 | //@ requires 0 <= attIndex; | 
|---|
| 874 | //@ requires attIndex < numAttributes(); | 
|---|
| 875 | public /*@pure@*/ int numDistinctValues(int attIndex) { | 
|---|
| 876 |  | 
|---|
| 877 | if (attribute(attIndex).isNumeric()) { | 
|---|
| 878 | double [] attVals = attributeToDoubleArray(attIndex); | 
|---|
| 879 | int [] sorted = Utils.sort(attVals); | 
|---|
| 880 | double prev = 0; | 
|---|
| 881 | int counter = 0; | 
|---|
| 882 | for (int i = 0; i < sorted.length; i++) { | 
|---|
| 883 | Instance current = instance(sorted[i]); | 
|---|
| 884 | if (current.isMissing(attIndex)) { | 
|---|
| 885 | break; | 
|---|
| 886 | } | 
|---|
| 887 | if ((i == 0) || | 
|---|
| 888 | (current.value(attIndex) > prev)) { | 
|---|
| 889 | prev = current.value(attIndex); | 
|---|
| 890 | counter++; | 
|---|
| 891 | } | 
|---|
| 892 | } | 
|---|
| 893 | return counter; | 
|---|
| 894 | } else { | 
|---|
| 895 | return attribute(attIndex).numValues(); | 
|---|
| 896 | } | 
|---|
| 897 | } | 
|---|
| 898 |  | 
|---|
| 899 | /** | 
|---|
| 900 | * Returns the number of distinct values of a given attribute. | 
|---|
| 901 | * Returns the number of instances if the attribute is a | 
|---|
| 902 | * string attribute. The value 'missing' is not counted. | 
|---|
| 903 | * | 
|---|
| 904 | * @param att the attribute | 
|---|
| 905 | * @return the number of distinct values of a given attribute | 
|---|
| 906 | */ | 
|---|
| 907 | public /*@pure@*/ int numDistinctValues(/*@non_null@*/Attribute att) { | 
|---|
| 908 |  | 
|---|
| 909 | return numDistinctValues(att.index()); | 
|---|
| 910 | } | 
|---|
| 911 |  | 
|---|
| 912 | /** | 
|---|
| 913 | * Returns the number of instances in the dataset. | 
|---|
| 914 | * | 
|---|
| 915 | * @return the number of instances in the dataset as an integer | 
|---|
| 916 | */ | 
|---|
| 917 | //@ ensures \result == m_Instances.size(); | 
|---|
| 918 | public /*@pure@*/ int numInstances() { | 
|---|
| 919 |  | 
|---|
| 920 | return m_Instances.size(); | 
|---|
| 921 | } | 
|---|
| 922 |  | 
|---|
| 923 | /** | 
|---|
| 924 | * Returns the number of instances in the dataset. | 
|---|
| 925 | * | 
|---|
| 926 | * @return the number of instances in the dataset as an integer | 
|---|
| 927 | */ | 
|---|
| 928 | //@ ensures \result == m_Instances.size(); | 
|---|
| 929 | public /*@pure@*/ int size() { | 
|---|
| 930 |  | 
|---|
| 931 | return m_Instances.size(); | 
|---|
| 932 | } | 
|---|
| 933 |  | 
|---|
| 934 | /** | 
|---|
| 935 | * Shuffles the instances in the set so that they are ordered | 
|---|
| 936 | * randomly. | 
|---|
| 937 | * | 
|---|
| 938 | * @param random a random number generator | 
|---|
| 939 | */ | 
|---|
| 940 | public void randomize(Random random) { | 
|---|
| 941 |  | 
|---|
| 942 | for (int j = numInstances() - 1; j > 0; j--) | 
|---|
| 943 | swap(j, random.nextInt(j+1)); | 
|---|
| 944 | } | 
|---|
| 945 |  | 
|---|
| 946 | /** | 
|---|
| 947 | * Reads a single instance from the reader and appends it | 
|---|
| 948 | * to the dataset.  Automatically expands the dataset if it | 
|---|
| 949 | * is not large enough to hold the instance. This method does | 
|---|
| 950 | * not check for carriage return at the end of the line. | 
|---|
| 951 | * | 
|---|
| 952 | * @param reader the reader | 
|---|
| 953 | * @return false if end of file has been reached | 
|---|
| 954 | * @throws IOException if the information is not read | 
|---|
| 955 | * successfully | 
|---|
| 956 | * @deprecated instead of using this method in conjunction with the | 
|---|
| 957 | * <code>readInstance(Reader)</code> method, one should use the | 
|---|
| 958 | * <code>ArffLoader</code> or <code>DataSource</code> class instead. | 
|---|
| 959 | * @see weka.core.converters.ArffLoader | 
|---|
| 960 | * @see weka.core.converters.ConverterUtils.DataSource | 
|---|
| 961 | */ | 
|---|
| 962 | @Deprecated public boolean readInstance(Reader reader) throws IOException { | 
|---|
| 963 |  | 
|---|
| 964 | ArffReader arff = new ArffReader(reader, this, m_Lines, 1); | 
|---|
| 965 | Instance inst = arff.readInstance(arff.getData(), false); | 
|---|
| 966 | m_Lines = arff.getLineNo(); | 
|---|
| 967 | if (inst != null) { | 
|---|
| 968 | add(inst); | 
|---|
| 969 | return true; | 
|---|
| 970 | } | 
|---|
| 971 | else { | 
|---|
| 972 | return false; | 
|---|
| 973 | } | 
|---|
| 974 | } | 
|---|
| 975 |  | 
|---|
| 976 | /** | 
|---|
| 977 | * Returns the relation's name. | 
|---|
| 978 | * | 
|---|
| 979 | * @return the relation's name as a string | 
|---|
| 980 | */ | 
|---|
| 981 | //@ ensures \result == m_RelationName; | 
|---|
| 982 | public /*@pure@*/ String relationName() { | 
|---|
| 983 |  | 
|---|
| 984 | return m_RelationName; | 
|---|
| 985 | } | 
|---|
| 986 |  | 
|---|
| 987 | /** | 
|---|
| 988 | * Removes the instance at the given position. | 
|---|
| 989 | * | 
|---|
| 990 | * @param index the instance's index (index starts with 0) | 
|---|
| 991 | * @return the instance at the given position | 
|---|
| 992 | */ | 
|---|
| 993 | //@ requires 0 <= index; | 
|---|
| 994 | //@ requires index < numInstances(); | 
|---|
| 995 | public Instance remove(int index) { | 
|---|
| 996 |  | 
|---|
| 997 | return m_Instances.remove(index); | 
|---|
| 998 | } | 
|---|
| 999 |  | 
|---|
| 1000 | /** | 
|---|
| 1001 | * Renames an attribute. This change only affects this | 
|---|
| 1002 | * dataset. | 
|---|
| 1003 | * | 
|---|
| 1004 | * @param att the attribute's index (index starts with 0) | 
|---|
| 1005 | * @param name the new name | 
|---|
| 1006 | */ | 
|---|
| 1007 | public void renameAttribute(int att, String name) { | 
|---|
| 1008 |  | 
|---|
| 1009 | Attribute newAtt = attribute(att).copy(name); | 
|---|
| 1010 | ArrayList<Attribute> newVec = new ArrayList<Attribute>(numAttributes()); | 
|---|
| 1011 |  | 
|---|
| 1012 | for (int i = 0; i < numAttributes(); i++) { | 
|---|
| 1013 | if (i == att) { | 
|---|
| 1014 | newVec.add(newAtt); | 
|---|
| 1015 | } else { | 
|---|
| 1016 | newVec.add(attribute(i)); | 
|---|
| 1017 | } | 
|---|
| 1018 | } | 
|---|
| 1019 | m_Attributes = newVec; | 
|---|
| 1020 | } | 
|---|
| 1021 |  | 
|---|
| 1022 | /** | 
|---|
| 1023 | * Renames an attribute. This change only affects this | 
|---|
| 1024 | * dataset. | 
|---|
| 1025 | * | 
|---|
| 1026 | * @param att the attribute | 
|---|
| 1027 | * @param name the new name | 
|---|
| 1028 | */ | 
|---|
| 1029 | public void renameAttribute(Attribute att, String name) { | 
|---|
| 1030 |  | 
|---|
| 1031 | renameAttribute(att.index(), name); | 
|---|
| 1032 | } | 
|---|
| 1033 |  | 
|---|
| 1034 | /** | 
|---|
| 1035 | * Renames the value of a nominal (or string) attribute value. This | 
|---|
| 1036 | * change only affects this dataset. | 
|---|
| 1037 | * | 
|---|
| 1038 | * @param att the attribute's index (index starts with 0) | 
|---|
| 1039 | * @param val the value's index (index starts with 0) | 
|---|
| 1040 | * @param name the new name | 
|---|
| 1041 | */ | 
|---|
| 1042 | public void renameAttributeValue(int att, int val, String name) { | 
|---|
| 1043 |  | 
|---|
| 1044 | Attribute newAtt = (Attribute)attribute(att).copy(); | 
|---|
| 1045 | ArrayList<Attribute> newVec = new ArrayList<Attribute>(numAttributes()); | 
|---|
| 1046 |  | 
|---|
| 1047 | newAtt.setValue(val, name); | 
|---|
| 1048 | for (int i = 0; i < numAttributes(); i++) { | 
|---|
| 1049 | if (i == att) { | 
|---|
| 1050 | newVec.add(newAtt); | 
|---|
| 1051 | } else { | 
|---|
| 1052 | newVec.add(attribute(i)); | 
|---|
| 1053 | } | 
|---|
| 1054 | } | 
|---|
| 1055 | m_Attributes = newVec; | 
|---|
| 1056 | } | 
|---|
| 1057 |  | 
|---|
| 1058 | /** | 
|---|
| 1059 | * Renames the value of a nominal (or string) attribute value. This | 
|---|
| 1060 | * change only affects this dataset. | 
|---|
| 1061 | * | 
|---|
| 1062 | * @param att the attribute | 
|---|
| 1063 | * @param val the value | 
|---|
| 1064 | * @param name the new name | 
|---|
| 1065 | */ | 
|---|
| 1066 | public void renameAttributeValue(Attribute att, String val, | 
|---|
| 1067 | String name) { | 
|---|
| 1068 |  | 
|---|
| 1069 | int v = att.indexOfValue(val); | 
|---|
| 1070 | if (v == -1) throw new IllegalArgumentException(val + " not found"); | 
|---|
| 1071 | renameAttributeValue(att.index(), v, name); | 
|---|
| 1072 | } | 
|---|
| 1073 |  | 
|---|
| 1074 | /** | 
|---|
| 1075 | * Creates a new dataset of the same size using random sampling | 
|---|
| 1076 | * with replacement. | 
|---|
| 1077 | * | 
|---|
| 1078 | * @param random a random number generator | 
|---|
| 1079 | * @return the new dataset | 
|---|
| 1080 | */ | 
|---|
| 1081 | public Instances resample(Random random) { | 
|---|
| 1082 |  | 
|---|
| 1083 | Instances newData = new Instances(this, numInstances()); | 
|---|
| 1084 | while (newData.numInstances() < numInstances()) { | 
|---|
| 1085 | newData.add(instance(random.nextInt(numInstances()))); | 
|---|
| 1086 | } | 
|---|
| 1087 | return newData; | 
|---|
| 1088 | } | 
|---|
| 1089 |  | 
|---|
| 1090 | /** | 
|---|
| 1091 | * Creates a new dataset of the same size using random sampling | 
|---|
| 1092 | * with replacement according to the current instance weights. The | 
|---|
| 1093 | * weights of the instances in the new dataset are set to one. | 
|---|
| 1094 | * | 
|---|
| 1095 | * @param random a random number generator | 
|---|
| 1096 | * @return the new dataset | 
|---|
| 1097 | */ | 
|---|
| 1098 | public Instances resampleWithWeights(Random random) { | 
|---|
| 1099 |  | 
|---|
| 1100 | double [] weights = new double[numInstances()]; | 
|---|
| 1101 | for (int i = 0; i < weights.length; i++) { | 
|---|
| 1102 | weights[i] = instance(i).weight(); | 
|---|
| 1103 | } | 
|---|
| 1104 | return resampleWithWeights(random, weights); | 
|---|
| 1105 | } | 
|---|
| 1106 |  | 
|---|
| 1107 |  | 
|---|
| 1108 | /** | 
|---|
| 1109 | * Creates a new dataset of the same size using random sampling | 
|---|
| 1110 | * with replacement according to the given weight vector. The | 
|---|
| 1111 | * weights of the instances in the new dataset are set to one. | 
|---|
| 1112 | * The length of the weight vector has to be the same as the | 
|---|
| 1113 | * number of instances in the dataset, and all weights have to | 
|---|
| 1114 | * be positive. | 
|---|
| 1115 | * | 
|---|
| 1116 | * @param random a random number generator | 
|---|
| 1117 | * @param weights the weight vector | 
|---|
| 1118 | * @return the new dataset | 
|---|
| 1119 | * @throws IllegalArgumentException if the weights array is of the wrong | 
|---|
| 1120 | * length or contains negative weights. | 
|---|
| 1121 | */ | 
|---|
| 1122 | public Instances resampleWithWeights(Random random, | 
|---|
| 1123 | double[] weights) { | 
|---|
| 1124 |  | 
|---|
| 1125 | if (weights.length != numInstances()) { | 
|---|
| 1126 | throw new IllegalArgumentException("weights.length != numInstances."); | 
|---|
| 1127 | } | 
|---|
| 1128 | Instances newData = new Instances(this, numInstances()); | 
|---|
| 1129 | if (numInstances() == 0) { | 
|---|
| 1130 | return newData; | 
|---|
| 1131 | } | 
|---|
| 1132 | double[] probabilities = new double[numInstances()]; | 
|---|
| 1133 | double sumProbs = 0, sumOfWeights = Utils.sum(weights); | 
|---|
| 1134 | for (int i = 0; i < numInstances(); i++) { | 
|---|
| 1135 | sumProbs += random.nextDouble(); | 
|---|
| 1136 | probabilities[i] = sumProbs; | 
|---|
| 1137 | } | 
|---|
| 1138 | Utils.normalize(probabilities, sumProbs / sumOfWeights); | 
|---|
| 1139 |  | 
|---|
| 1140 | // Make sure that rounding errors don't mess things up | 
|---|
| 1141 | probabilities[numInstances() - 1] = sumOfWeights; | 
|---|
| 1142 | int k = 0; int l = 0; | 
|---|
| 1143 | sumProbs = 0; | 
|---|
| 1144 | while ((k < numInstances() && (l < numInstances()))) { | 
|---|
| 1145 | if (weights[l] < 0) { | 
|---|
| 1146 | throw new IllegalArgumentException("Weights have to be positive."); | 
|---|
| 1147 | } | 
|---|
| 1148 | sumProbs += weights[l]; | 
|---|
| 1149 | while ((k < numInstances()) && | 
|---|
| 1150 | (probabilities[k] <= sumProbs)) { | 
|---|
| 1151 | newData.add(instance(l)); | 
|---|
| 1152 | newData.instance(k).setWeight(1); | 
|---|
| 1153 | k++; | 
|---|
| 1154 | } | 
|---|
| 1155 | l++; | 
|---|
| 1156 | } | 
|---|
| 1157 | return newData; | 
|---|
| 1158 | } | 
|---|
| 1159 |  | 
|---|
| 1160 | /** | 
|---|
| 1161 | * Replaces the instance at the given position. | 
|---|
| 1162 | * Shallow copies instance before it is added. Does not | 
|---|
| 1163 | * check if the instance is compatible with the dataset. | 
|---|
| 1164 | * Note: String or relational values are not transferred. | 
|---|
| 1165 | * | 
|---|
| 1166 | * @param index position where instance is to be inserted | 
|---|
| 1167 | * @param instance the instance to be inserted | 
|---|
| 1168 | * @return the instance previously at that position | 
|---|
| 1169 | */ | 
|---|
| 1170 | //@ requires 0 <= index; | 
|---|
| 1171 | //@ requires index < m_Instances.size(); | 
|---|
| 1172 | public Instance set(int index, /*@non_null@*/ Instance instance) { | 
|---|
| 1173 |  | 
|---|
| 1174 | Instance newInstance = (Instance)instance.copy(); | 
|---|
| 1175 | Instance oldInstance = m_Instances.get(index); | 
|---|
| 1176 |  | 
|---|
| 1177 | newInstance.setDataset(this); | 
|---|
| 1178 | m_Instances.set(index, newInstance); | 
|---|
| 1179 |  | 
|---|
| 1180 | return oldInstance; | 
|---|
| 1181 | } | 
|---|
| 1182 |  | 
|---|
| 1183 | /** | 
|---|
| 1184 | * Sets the class attribute. | 
|---|
| 1185 | * | 
|---|
| 1186 | * @param att attribute to be the class | 
|---|
| 1187 | */ | 
|---|
| 1188 | public void setClass(Attribute att) { | 
|---|
| 1189 |  | 
|---|
| 1190 | m_ClassIndex = att.index(); | 
|---|
| 1191 | } | 
|---|
| 1192 |  | 
|---|
| 1193 | /** | 
|---|
| 1194 | * Sets the class index of the set. | 
|---|
| 1195 | * If the class index is negative there is assumed to be no class. | 
|---|
| 1196 | * (ie. it is undefined) | 
|---|
| 1197 | * | 
|---|
| 1198 | * @param classIndex the new class index (index starts with 0) | 
|---|
| 1199 | * @throws IllegalArgumentException if the class index is too big or < 0 | 
|---|
| 1200 | */ | 
|---|
| 1201 | public void setClassIndex(int classIndex) { | 
|---|
| 1202 |  | 
|---|
| 1203 | if (classIndex >= numAttributes()) { | 
|---|
| 1204 | throw new IllegalArgumentException("Invalid class index: " + classIndex); | 
|---|
| 1205 | } | 
|---|
| 1206 | m_ClassIndex = classIndex; | 
|---|
| 1207 | } | 
|---|
| 1208 |  | 
|---|
| 1209 | /** | 
|---|
| 1210 | * Sets the relation's name. | 
|---|
| 1211 | * | 
|---|
| 1212 | * @param newName the new relation name. | 
|---|
| 1213 | */ | 
|---|
| 1214 | public void setRelationName(/*@non_null@*/String newName) { | 
|---|
| 1215 |  | 
|---|
| 1216 | m_RelationName = newName; | 
|---|
| 1217 | } | 
|---|
| 1218 |  | 
|---|
| 1219 | /** | 
|---|
| 1220 | * Sorts the instances based on an attribute. For numeric attributes, | 
|---|
| 1221 | * instances are sorted in ascending order. For nominal attributes, | 
|---|
| 1222 | * instances are sorted based on the attribute label ordering | 
|---|
| 1223 | * specified in the header. Instances with missing values for the | 
|---|
| 1224 | * attribute are placed at the end of the dataset. | 
|---|
| 1225 | * | 
|---|
| 1226 | * @param attIndex the attribute's index (index starts with 0) | 
|---|
| 1227 | */ | 
|---|
| 1228 | public void sort(int attIndex) { | 
|---|
| 1229 |  | 
|---|
| 1230 | int i,j; | 
|---|
| 1231 |  | 
|---|
| 1232 | // move all instances with missing values to end | 
|---|
| 1233 | j = numInstances() - 1; | 
|---|
| 1234 | i = 0; | 
|---|
| 1235 | while (i <= j) { | 
|---|
| 1236 | if (instance(j).isMissing(attIndex)) { | 
|---|
| 1237 | j--; | 
|---|
| 1238 | } else { | 
|---|
| 1239 | if (instance(i).isMissing(attIndex)) { | 
|---|
| 1240 | swap(i,j); | 
|---|
| 1241 | j--; | 
|---|
| 1242 | } | 
|---|
| 1243 | i++; | 
|---|
| 1244 | } | 
|---|
| 1245 | } | 
|---|
| 1246 | quickSort(attIndex, 0, j); | 
|---|
| 1247 | } | 
|---|
| 1248 |  | 
|---|
| 1249 | /** | 
|---|
| 1250 | * Sorts the instances based on an attribute. For numeric attributes, | 
|---|
| 1251 | * instances are sorted into ascending order. For nominal attributes, | 
|---|
| 1252 | * instances are sorted based on the attribute label ordering | 
|---|
| 1253 | * specified in the header. Instances with missing values for the | 
|---|
| 1254 | * attribute are placed at the end of the dataset. | 
|---|
| 1255 | * | 
|---|
| 1256 | * @param att the attribute | 
|---|
| 1257 | */ | 
|---|
| 1258 | public void sort(Attribute att) { | 
|---|
| 1259 |  | 
|---|
| 1260 | sort(att.index()); | 
|---|
| 1261 | } | 
|---|
| 1262 |  | 
|---|
| 1263 | /** | 
|---|
| 1264 | * Stratifies a set of instances according to its class values | 
|---|
| 1265 | * if the class attribute is nominal (so that afterwards a | 
|---|
| 1266 | * stratified cross-validation can be performed). | 
|---|
| 1267 | * | 
|---|
| 1268 | * @param numFolds the number of folds in the cross-validation | 
|---|
| 1269 | * @throws UnassignedClassException if the class is not set | 
|---|
| 1270 | */ | 
|---|
| 1271 | public void stratify(int numFolds) { | 
|---|
| 1272 |  | 
|---|
| 1273 | if (numFolds <= 1) { | 
|---|
| 1274 | throw new IllegalArgumentException("Number of folds must be greater than 1"); | 
|---|
| 1275 | } | 
|---|
| 1276 | if (m_ClassIndex < 0) { | 
|---|
| 1277 | throw new UnassignedClassException("Class index is negative (not set)!"); | 
|---|
| 1278 | } | 
|---|
| 1279 | if (classAttribute().isNominal()) { | 
|---|
| 1280 |  | 
|---|
| 1281 | // sort by class | 
|---|
| 1282 | int index = 1; | 
|---|
| 1283 | while (index < numInstances()) { | 
|---|
| 1284 | Instance instance1 = instance(index - 1); | 
|---|
| 1285 | for (int j = index; j < numInstances(); j++) { | 
|---|
| 1286 | Instance instance2 = instance(j); | 
|---|
| 1287 | if ((instance1.classValue() == instance2.classValue()) || | 
|---|
| 1288 | (instance1.classIsMissing() && | 
|---|
| 1289 | instance2.classIsMissing())) { | 
|---|
| 1290 | swap(index,j); | 
|---|
| 1291 | index++; | 
|---|
| 1292 | } | 
|---|
| 1293 | } | 
|---|
| 1294 | index++; | 
|---|
| 1295 | } | 
|---|
| 1296 | stratStep(numFolds); | 
|---|
| 1297 | } | 
|---|
| 1298 | } | 
|---|
| 1299 |  | 
|---|
| 1300 | /** | 
|---|
| 1301 | * Computes the sum of all the instances' weights. | 
|---|
| 1302 | * | 
|---|
| 1303 | * @return the sum of all the instances' weights as a double | 
|---|
| 1304 | */ | 
|---|
| 1305 | public /*@pure@*/ double sumOfWeights() { | 
|---|
| 1306 |  | 
|---|
| 1307 | double sum = 0; | 
|---|
| 1308 |  | 
|---|
| 1309 | for (int i = 0; i < numInstances(); i++) { | 
|---|
| 1310 | sum += instance(i).weight(); | 
|---|
| 1311 | } | 
|---|
| 1312 | return sum; | 
|---|
| 1313 | } | 
|---|
| 1314 |  | 
|---|
| 1315 | /** | 
|---|
| 1316 | * Creates the test set for one fold of a cross-validation on | 
|---|
| 1317 | * the dataset. | 
|---|
| 1318 | * | 
|---|
| 1319 | * @param numFolds the number of folds in the cross-validation. Must | 
|---|
| 1320 | * be greater than 1. | 
|---|
| 1321 | * @param numFold 0 for the first fold, 1 for the second, ... | 
|---|
| 1322 | * @return the test set as a set of weighted instances | 
|---|
| 1323 | * @throws IllegalArgumentException if the number of folds is less than 2 | 
|---|
| 1324 | * or greater than the number of instances. | 
|---|
| 1325 | */ | 
|---|
| 1326 | //@ requires 2 <= numFolds && numFolds < numInstances(); | 
|---|
| 1327 | //@ requires 0 <= numFold && numFold < numFolds; | 
|---|
| 1328 | public Instances testCV(int numFolds, int numFold) { | 
|---|
| 1329 |  | 
|---|
| 1330 | int numInstForFold, first, offset; | 
|---|
| 1331 | Instances test; | 
|---|
| 1332 |  | 
|---|
| 1333 | if (numFolds < 2) { | 
|---|
| 1334 | throw new IllegalArgumentException("Number of folds must be at least 2!"); | 
|---|
| 1335 | } | 
|---|
| 1336 | if (numFolds > numInstances()) { | 
|---|
| 1337 | throw new IllegalArgumentException("Can't have more folds than instances!"); | 
|---|
| 1338 | } | 
|---|
| 1339 | numInstForFold = numInstances() / numFolds; | 
|---|
| 1340 | if (numFold < numInstances() % numFolds){ | 
|---|
| 1341 | numInstForFold++; | 
|---|
| 1342 | offset = numFold; | 
|---|
| 1343 | }else | 
|---|
| 1344 | offset = numInstances() % numFolds; | 
|---|
| 1345 | test = new Instances(this, numInstForFold); | 
|---|
| 1346 | first = numFold * (numInstances() / numFolds) + offset; | 
|---|
| 1347 | copyInstances(first, test, numInstForFold); | 
|---|
| 1348 | return test; | 
|---|
| 1349 | } | 
|---|
| 1350 |  | 
|---|
| 1351 | /** | 
|---|
| 1352 | * Returns the dataset as a string in ARFF format. Strings | 
|---|
| 1353 | * are quoted if they contain whitespace characters, or if they | 
|---|
| 1354 | * are a question mark. | 
|---|
| 1355 | * | 
|---|
| 1356 | * @return the dataset in ARFF format as a string | 
|---|
| 1357 | */ | 
|---|
| 1358 | public String toString() { | 
|---|
| 1359 |  | 
|---|
| 1360 | StringBuffer text = new StringBuffer(); | 
|---|
| 1361 |  | 
|---|
| 1362 | text.append(ARFF_RELATION).append(" "). | 
|---|
| 1363 | append(Utils.quote(m_RelationName)).append("\n\n"); | 
|---|
| 1364 | for (int i = 0; i < numAttributes(); i++) { | 
|---|
| 1365 | text.append(attribute(i)).append("\n"); | 
|---|
| 1366 | } | 
|---|
| 1367 | text.append("\n").append(ARFF_DATA).append("\n"); | 
|---|
| 1368 |  | 
|---|
| 1369 | text.append(stringWithoutHeader()); | 
|---|
| 1370 | return text.toString(); | 
|---|
| 1371 | } | 
|---|
| 1372 |  | 
|---|
| 1373 | /** | 
|---|
| 1374 | * Returns the instances in the dataset as a string in ARFF format. Strings | 
|---|
| 1375 | * are quoted if they contain whitespace characters, or if they | 
|---|
| 1376 | * are a question mark. | 
|---|
| 1377 | * | 
|---|
| 1378 | * @return the dataset in ARFF format as a string | 
|---|
| 1379 | */ | 
|---|
| 1380 | protected String stringWithoutHeader() { | 
|---|
| 1381 |  | 
|---|
| 1382 | StringBuffer text = new StringBuffer(); | 
|---|
| 1383 |  | 
|---|
| 1384 | for (int i = 0; i < numInstances(); i++) { | 
|---|
| 1385 | text.append(instance(i)); | 
|---|
| 1386 | if (i < numInstances() - 1) { | 
|---|
| 1387 | text.append('\n'); | 
|---|
| 1388 | } | 
|---|
| 1389 | } | 
|---|
| 1390 | return text.toString(); | 
|---|
| 1391 | } | 
|---|
| 1392 |  | 
|---|
| 1393 | /** | 
|---|
| 1394 | * Creates the training set for one fold of a cross-validation | 
|---|
| 1395 | * on the dataset. | 
|---|
| 1396 | * | 
|---|
| 1397 | * @param numFolds the number of folds in the cross-validation. Must | 
|---|
| 1398 | * be greater than 1. | 
|---|
| 1399 | * @param numFold 0 for the first fold, 1 for the second, ... | 
|---|
| 1400 | * @return the training set | 
|---|
| 1401 | * @throws IllegalArgumentException if the number of folds is less than 2 | 
|---|
| 1402 | * or greater than the number of instances. | 
|---|
| 1403 | */ | 
|---|
| 1404 | //@ requires 2 <= numFolds && numFolds < numInstances(); | 
|---|
| 1405 | //@ requires 0 <= numFold && numFold < numFolds; | 
|---|
| 1406 | public Instances trainCV(int numFolds, int numFold) { | 
|---|
| 1407 |  | 
|---|
| 1408 | int numInstForFold, first, offset; | 
|---|
| 1409 | Instances train; | 
|---|
| 1410 |  | 
|---|
| 1411 | if (numFolds < 2) { | 
|---|
| 1412 | throw new IllegalArgumentException("Number of folds must be at least 2!"); | 
|---|
| 1413 | } | 
|---|
| 1414 | if (numFolds > numInstances()) { | 
|---|
| 1415 | throw new IllegalArgumentException("Can't have more folds than instances!"); | 
|---|
| 1416 | } | 
|---|
| 1417 | numInstForFold = numInstances() / numFolds; | 
|---|
| 1418 | if (numFold < numInstances() % numFolds) { | 
|---|
| 1419 | numInstForFold++; | 
|---|
| 1420 | offset = numFold; | 
|---|
| 1421 | }else | 
|---|
| 1422 | offset = numInstances() % numFolds; | 
|---|
| 1423 | train = new Instances(this, numInstances() - numInstForFold); | 
|---|
| 1424 | first = numFold * (numInstances() / numFolds) + offset; | 
|---|
| 1425 | copyInstances(0, train, first); | 
|---|
| 1426 | copyInstances(first + numInstForFold, train, | 
|---|
| 1427 | numInstances() - first - numInstForFold); | 
|---|
| 1428 |  | 
|---|
| 1429 | return train; | 
|---|
| 1430 | } | 
|---|
| 1431 |  | 
|---|
| 1432 | /** | 
|---|
| 1433 | * Creates the training set for one fold of a cross-validation | 
|---|
| 1434 | * on the dataset. The data is subsequently randomized based | 
|---|
| 1435 | * on the given random number generator. | 
|---|
| 1436 | * | 
|---|
| 1437 | * @param numFolds the number of folds in the cross-validation. Must | 
|---|
| 1438 | * be greater than 1. | 
|---|
| 1439 | * @param numFold 0 for the first fold, 1 for the second, ... | 
|---|
| 1440 | * @param random the random number generator | 
|---|
| 1441 | * @return the training set | 
|---|
| 1442 | * @throws IllegalArgumentException if the number of folds is less than 2 | 
|---|
| 1443 | * or greater than the number of instances. | 
|---|
| 1444 | */ | 
|---|
| 1445 | //@ requires 2 <= numFolds && numFolds < numInstances(); | 
|---|
| 1446 | //@ requires 0 <= numFold && numFold < numFolds; | 
|---|
| 1447 | public Instances trainCV(int numFolds, int numFold, Random random) { | 
|---|
| 1448 |  | 
|---|
| 1449 | Instances train = trainCV(numFolds, numFold); | 
|---|
| 1450 | train.randomize(random); | 
|---|
| 1451 | return train; | 
|---|
| 1452 | } | 
|---|
| 1453 |  | 
|---|
| 1454 | /** | 
|---|
| 1455 | * Computes the variance for a numeric attribute. | 
|---|
| 1456 | * | 
|---|
| 1457 | * @param attIndex the numeric attribute (index starts with 0) | 
|---|
| 1458 | * @return the variance if the attribute is numeric | 
|---|
| 1459 | * @throws IllegalArgumentException if the attribute is not numeric | 
|---|
| 1460 | */ | 
|---|
| 1461 | public /*@pure@*/ double variance(int attIndex) { | 
|---|
| 1462 |  | 
|---|
| 1463 | double sum = 0, sumSquared = 0, sumOfWeights = 0; | 
|---|
| 1464 |  | 
|---|
| 1465 | if (!attribute(attIndex).isNumeric()) { | 
|---|
| 1466 | throw new IllegalArgumentException("Can't compute variance because attribute is " + | 
|---|
| 1467 | "not numeric!"); | 
|---|
| 1468 | } | 
|---|
| 1469 | for (int i = 0; i < numInstances(); i++) { | 
|---|
| 1470 | if (!instance(i).isMissing(attIndex)) { | 
|---|
| 1471 | sum += instance(i).weight() * | 
|---|
| 1472 | instance(i).value(attIndex); | 
|---|
| 1473 | sumSquared += instance(i).weight() * | 
|---|
| 1474 | instance(i).value(attIndex) * | 
|---|
| 1475 | instance(i).value(attIndex); | 
|---|
| 1476 | sumOfWeights += instance(i).weight(); | 
|---|
| 1477 | } | 
|---|
| 1478 | } | 
|---|
| 1479 | if (sumOfWeights <= 1) { | 
|---|
| 1480 | return 0; | 
|---|
| 1481 | } | 
|---|
| 1482 | double result = (sumSquared - (sum * sum / sumOfWeights)) / | 
|---|
| 1483 | (sumOfWeights - 1); | 
|---|
| 1484 |  | 
|---|
| 1485 | // We don't like negative variance | 
|---|
| 1486 | if (result < 0) { | 
|---|
| 1487 | return 0; | 
|---|
| 1488 | } else { | 
|---|
| 1489 | return result; | 
|---|
| 1490 | } | 
|---|
| 1491 | } | 
|---|
| 1492 |  | 
|---|
| 1493 | /** | 
|---|
| 1494 | * Computes the variance for a numeric attribute. | 
|---|
| 1495 | * | 
|---|
| 1496 | * @param att the numeric attribute | 
|---|
| 1497 | * @return the variance if the attribute is numeric | 
|---|
| 1498 | * @throws IllegalArgumentException if the attribute is not numeric | 
|---|
| 1499 | */ | 
|---|
| 1500 | public /*@pure@*/ double variance(Attribute att) { | 
|---|
| 1501 |  | 
|---|
| 1502 | return variance(att.index()); | 
|---|
| 1503 | } | 
|---|
| 1504 |  | 
|---|
| 1505 | /** | 
|---|
| 1506 | * Calculates summary statistics on the values that appear in this | 
|---|
| 1507 | * set of instances for a specified attribute. | 
|---|
| 1508 | * | 
|---|
| 1509 | * @param index the index of the attribute to summarize (index starts with 0) | 
|---|
| 1510 | * @return an AttributeStats object with it's fields calculated. | 
|---|
| 1511 | */ | 
|---|
| 1512 | //@ requires 0 <= index && index < numAttributes(); | 
|---|
| 1513 | public AttributeStats attributeStats(int index) { | 
|---|
| 1514 |  | 
|---|
| 1515 | AttributeStats result = new AttributeStats(); | 
|---|
| 1516 | if (attribute(index).isNominal()) { | 
|---|
| 1517 | result.nominalCounts = new int [attribute(index).numValues()]; | 
|---|
| 1518 | result.nominalWeights = new double[attribute(index).numValues()]; | 
|---|
| 1519 | } | 
|---|
| 1520 | if (attribute(index).isNumeric()) { | 
|---|
| 1521 | result.numericStats = new weka.experiment.Stats(); | 
|---|
| 1522 | } | 
|---|
| 1523 | result.totalCount = numInstances(); | 
|---|
| 1524 |  | 
|---|
| 1525 | double [] attVals = attributeToDoubleArray(index); | 
|---|
| 1526 | int [] sorted = Utils.sort(attVals); | 
|---|
| 1527 | int currentCount = 0; | 
|---|
| 1528 | double currentWeight = 0; | 
|---|
| 1529 | double prev = Double.NaN; | 
|---|
| 1530 | for (int j = 0; j < numInstances(); j++) { | 
|---|
| 1531 | Instance current = instance(sorted[j]); | 
|---|
| 1532 | if (current.isMissing(index)) { | 
|---|
| 1533 | result.missingCount = numInstances() - j; | 
|---|
| 1534 | break; | 
|---|
| 1535 | } | 
|---|
| 1536 | if (current.value(index) == prev) { | 
|---|
| 1537 | currentCount++; | 
|---|
| 1538 | currentWeight += current.weight(); | 
|---|
| 1539 | } else { | 
|---|
| 1540 | result.addDistinct(prev, currentCount, currentWeight); | 
|---|
| 1541 | currentCount = 1; | 
|---|
| 1542 | currentWeight = current.weight(); | 
|---|
| 1543 | prev = current.value(index); | 
|---|
| 1544 | } | 
|---|
| 1545 | } | 
|---|
| 1546 | result.addDistinct(prev, currentCount, currentWeight); | 
|---|
| 1547 | result.distinctCount--; // So we don't count "missing" as a value | 
|---|
| 1548 | return result; | 
|---|
| 1549 | } | 
|---|
| 1550 |  | 
|---|
| 1551 | /** | 
|---|
| 1552 | * Gets the value of all instances in this dataset for a particular | 
|---|
| 1553 | * attribute. Useful in conjunction with Utils.sort to allow iterating | 
|---|
| 1554 | * through the dataset in sorted order for some attribute. | 
|---|
| 1555 | * | 
|---|
| 1556 | * @param index the index of the attribute. | 
|---|
| 1557 | * @return an array containing the value of the desired attribute for | 
|---|
| 1558 | * each instance in the dataset. | 
|---|
| 1559 | */ | 
|---|
| 1560 | //@ requires 0 <= index && index < numAttributes(); | 
|---|
| 1561 | public /*@pure@*/ double [] attributeToDoubleArray(int index) { | 
|---|
| 1562 |  | 
|---|
| 1563 | double [] result = new double[numInstances()]; | 
|---|
| 1564 | for (int i = 0; i < result.length; i++) { | 
|---|
| 1565 | result[i] = instance(i).value(index); | 
|---|
| 1566 | } | 
|---|
| 1567 | return result; | 
|---|
| 1568 | } | 
|---|
| 1569 |  | 
|---|
| 1570 | /** | 
|---|
| 1571 | * Generates a string summarizing the set of instances. Gives a breakdown | 
|---|
| 1572 | * for each attribute indicating the number of missing/discrete/unique | 
|---|
| 1573 | * values and other information. | 
|---|
| 1574 | * | 
|---|
| 1575 | * @return a string summarizing the dataset | 
|---|
| 1576 | */ | 
|---|
| 1577 | public String toSummaryString() { | 
|---|
| 1578 |  | 
|---|
| 1579 | StringBuffer result = new StringBuffer(); | 
|---|
| 1580 | result.append("Relation Name:  ").append(relationName()).append('\n'); | 
|---|
| 1581 | result.append("Num Instances:  ").append(numInstances()).append('\n'); | 
|---|
| 1582 | result.append("Num Attributes: ").append(numAttributes()).append('\n'); | 
|---|
| 1583 | result.append('\n'); | 
|---|
| 1584 |  | 
|---|
| 1585 | result.append(Utils.padLeft("", 5)).append(Utils.padRight("Name", 25)); | 
|---|
| 1586 | result.append(Utils.padLeft("Type", 5)).append(Utils.padLeft("Nom", 5)); | 
|---|
| 1587 | result.append(Utils.padLeft("Int", 5)).append(Utils.padLeft("Real", 5)); | 
|---|
| 1588 | result.append(Utils.padLeft("Missing", 12)); | 
|---|
| 1589 | result.append(Utils.padLeft("Unique", 12)); | 
|---|
| 1590 | result.append(Utils.padLeft("Dist", 6)).append('\n'); | 
|---|
| 1591 | for (int i = 0; i < numAttributes(); i++) { | 
|---|
| 1592 | Attribute a = attribute(i); | 
|---|
| 1593 | AttributeStats as = attributeStats(i); | 
|---|
| 1594 | result.append(Utils.padLeft("" + (i + 1), 4)).append(' '); | 
|---|
| 1595 | result.append(Utils.padRight(a.name(), 25)).append(' '); | 
|---|
| 1596 | long percent; | 
|---|
| 1597 | switch (a.type()) { | 
|---|
| 1598 | case Attribute.NOMINAL: | 
|---|
| 1599 | result.append(Utils.padLeft("Nom", 4)).append(' '); | 
|---|
| 1600 | percent = Math.round(100.0 * as.intCount / as.totalCount); | 
|---|
| 1601 | result.append(Utils.padLeft("" + percent, 3)).append("% "); | 
|---|
| 1602 | result.append(Utils.padLeft("" + 0, 3)).append("% "); | 
|---|
| 1603 | percent = Math.round(100.0 * as.realCount / as.totalCount); | 
|---|
| 1604 | result.append(Utils.padLeft("" + percent, 3)).append("% "); | 
|---|
| 1605 | break; | 
|---|
| 1606 | case Attribute.NUMERIC: | 
|---|
| 1607 | result.append(Utils.padLeft("Num", 4)).append(' '); | 
|---|
| 1608 | result.append(Utils.padLeft("" + 0, 3)).append("% "); | 
|---|
| 1609 | percent = Math.round(100.0 * as.intCount / as.totalCount); | 
|---|
| 1610 | result.append(Utils.padLeft("" + percent, 3)).append("% "); | 
|---|
| 1611 | percent = Math.round(100.0 * as.realCount / as.totalCount); | 
|---|
| 1612 | result.append(Utils.padLeft("" + percent, 3)).append("% "); | 
|---|
| 1613 | break; | 
|---|
| 1614 | case Attribute.DATE: | 
|---|
| 1615 | result.append(Utils.padLeft("Dat", 4)).append(' '); | 
|---|
| 1616 | result.append(Utils.padLeft("" + 0, 3)).append("% "); | 
|---|
| 1617 | percent = Math.round(100.0 * as.intCount / as.totalCount); | 
|---|
| 1618 | result.append(Utils.padLeft("" + percent, 3)).append("% "); | 
|---|
| 1619 | percent = Math.round(100.0 * as.realCount / as.totalCount); | 
|---|
| 1620 | result.append(Utils.padLeft("" + percent, 3)).append("% "); | 
|---|
| 1621 | break; | 
|---|
| 1622 | case Attribute.STRING: | 
|---|
| 1623 | result.append(Utils.padLeft("Str", 4)).append(' '); | 
|---|
| 1624 | percent = Math.round(100.0 * as.intCount / as.totalCount); | 
|---|
| 1625 | result.append(Utils.padLeft("" + percent, 3)).append("% "); | 
|---|
| 1626 | result.append(Utils.padLeft("" + 0, 3)).append("% "); | 
|---|
| 1627 | percent = Math.round(100.0 * as.realCount / as.totalCount); | 
|---|
| 1628 | result.append(Utils.padLeft("" + percent, 3)).append("% "); | 
|---|
| 1629 | break; | 
|---|
| 1630 | case Attribute.RELATIONAL: | 
|---|
| 1631 | result.append(Utils.padLeft("Rel", 4)).append(' '); | 
|---|
| 1632 | percent = Math.round(100.0 * as.intCount / as.totalCount); | 
|---|
| 1633 | result.append(Utils.padLeft("" + percent, 3)).append("% "); | 
|---|
| 1634 | result.append(Utils.padLeft("" + 0, 3)).append("% "); | 
|---|
| 1635 | percent = Math.round(100.0 * as.realCount / as.totalCount); | 
|---|
| 1636 | result.append(Utils.padLeft("" + percent, 3)).append("% "); | 
|---|
| 1637 | break; | 
|---|
| 1638 | default: | 
|---|
| 1639 | result.append(Utils.padLeft("???", 4)).append(' '); | 
|---|
| 1640 | result.append(Utils.padLeft("" + 0, 3)).append("% "); | 
|---|
| 1641 | percent = Math.round(100.0 * as.intCount / as.totalCount); | 
|---|
| 1642 | result.append(Utils.padLeft("" + percent, 3)).append("% "); | 
|---|
| 1643 | percent = Math.round(100.0 * as.realCount / as.totalCount); | 
|---|
| 1644 | result.append(Utils.padLeft("" + percent, 3)).append("% "); | 
|---|
| 1645 | break; | 
|---|
| 1646 | } | 
|---|
| 1647 | result.append(Utils.padLeft("" + as.missingCount, 5)).append(" /"); | 
|---|
| 1648 | percent = Math.round(100.0 * as.missingCount / as.totalCount); | 
|---|
| 1649 | result.append(Utils.padLeft("" + percent, 3)).append("% "); | 
|---|
| 1650 | result.append(Utils.padLeft("" + as.uniqueCount, 5)).append(" /"); | 
|---|
| 1651 | percent = Math.round(100.0 * as.uniqueCount / as.totalCount); | 
|---|
| 1652 | result.append(Utils.padLeft("" + percent, 3)).append("% "); | 
|---|
| 1653 | result.append(Utils.padLeft("" + as.distinctCount, 5)).append(' '); | 
|---|
| 1654 | result.append('\n'); | 
|---|
| 1655 | } | 
|---|
| 1656 | return result.toString(); | 
|---|
| 1657 | } | 
|---|
| 1658 |  | 
|---|
| 1659 | /** | 
|---|
| 1660 | * Copies instances from one set to the end of another | 
|---|
| 1661 | * one. | 
|---|
| 1662 | * | 
|---|
| 1663 | * @param from the position of the first instance to be copied | 
|---|
| 1664 | * @param dest the destination for the instances | 
|---|
| 1665 | * @param num the number of instances to be copied | 
|---|
| 1666 | */ | 
|---|
| 1667 | //@ requires 0 <= from && from <= numInstances() - num; | 
|---|
| 1668 | //@ requires 0 <= num; | 
|---|
| 1669 | protected void copyInstances(int from, /*@non_null@*/ Instances dest, int num) { | 
|---|
| 1670 |  | 
|---|
| 1671 | for (int i = 0; i < num; i++) { | 
|---|
| 1672 | dest.add(instance(from + i)); | 
|---|
| 1673 | } | 
|---|
| 1674 | } | 
|---|
| 1675 |  | 
|---|
| 1676 | /** | 
|---|
| 1677 | * Replaces the attribute information by a clone of | 
|---|
| 1678 | * itself. | 
|---|
| 1679 | */ | 
|---|
| 1680 | protected void freshAttributeInfo() { | 
|---|
| 1681 |  | 
|---|
| 1682 | ArrayList<Attribute> newList = new ArrayList<Attribute>(m_Attributes.size()); | 
|---|
| 1683 | for (Attribute att : m_Attributes) { | 
|---|
| 1684 | newList.add((Attribute)att.copy()); | 
|---|
| 1685 | } | 
|---|
| 1686 | m_Attributes = newList; | 
|---|
| 1687 | } | 
|---|
| 1688 |  | 
|---|
| 1689 | /** | 
|---|
| 1690 | * Returns string including all instances, their weights and | 
|---|
| 1691 | * their indices in the original dataset. | 
|---|
| 1692 | * | 
|---|
| 1693 | * @return description of instance and its weight as a string | 
|---|
| 1694 | */ | 
|---|
| 1695 | protected /*@pure@*/ String instancesAndWeights(){ | 
|---|
| 1696 |  | 
|---|
| 1697 | StringBuffer text = new StringBuffer(); | 
|---|
| 1698 |  | 
|---|
| 1699 | for (int i = 0; i < numInstances(); i++) { | 
|---|
| 1700 | text.append(instance(i) + " " + instance(i).weight()); | 
|---|
| 1701 | if (i < numInstances() - 1) { | 
|---|
| 1702 | text.append("\n"); | 
|---|
| 1703 | } | 
|---|
| 1704 | } | 
|---|
| 1705 | return text.toString(); | 
|---|
| 1706 | } | 
|---|
| 1707 |  | 
|---|
| 1708 | /** | 
|---|
| 1709 | * Partitions the instances around a pivot. Used by quicksort and | 
|---|
| 1710 | * kthSmallestValue. | 
|---|
| 1711 | * | 
|---|
| 1712 | * @param attIndex the attribute's index (index starts with 0) | 
|---|
| 1713 | * @param l the first index of the subset (index starts with 0) | 
|---|
| 1714 | * @param r the last index of the subset (index starts with 0) | 
|---|
| 1715 | * | 
|---|
| 1716 | * @return the index of the middle element | 
|---|
| 1717 | */ | 
|---|
| 1718 | //@ requires 0 <= attIndex && attIndex < numAttributes(); | 
|---|
| 1719 | //@ requires 0 <= left && left <= right && right < numInstances(); | 
|---|
| 1720 | protected int partition(int attIndex, int l, int r) { | 
|---|
| 1721 |  | 
|---|
| 1722 | double pivot = instance((l + r) / 2).value(attIndex); | 
|---|
| 1723 |  | 
|---|
| 1724 | while (l < r) { | 
|---|
| 1725 | while ((instance(l).value(attIndex) < pivot) && (l < r)) { | 
|---|
| 1726 | l++; | 
|---|
| 1727 | } | 
|---|
| 1728 | while ((instance(r).value(attIndex) > pivot) && (l < r)) { | 
|---|
| 1729 | r--; | 
|---|
| 1730 | } | 
|---|
| 1731 | if (l < r) { | 
|---|
| 1732 | swap(l, r); | 
|---|
| 1733 | l++; | 
|---|
| 1734 | r--; | 
|---|
| 1735 | } | 
|---|
| 1736 | } | 
|---|
| 1737 | if ((l == r) && (instance(r).value(attIndex) > pivot)) { | 
|---|
| 1738 | r--; | 
|---|
| 1739 | } | 
|---|
| 1740 |  | 
|---|
| 1741 | return r; | 
|---|
| 1742 | } | 
|---|
| 1743 |  | 
|---|
| 1744 | /** | 
|---|
| 1745 | * Implements quicksort according to Manber's "Introduction to | 
|---|
| 1746 | * Algorithms". | 
|---|
| 1747 | * | 
|---|
| 1748 | * @param attIndex the attribute's index (index starts with 0) | 
|---|
| 1749 | * @param left the first index of the subset to be sorted (index starts with 0) | 
|---|
| 1750 | * @param right the last index of the subset to be sorted (index starts with 0) | 
|---|
| 1751 | */ | 
|---|
| 1752 | //@ requires 0 <= attIndex && attIndex < numAttributes(); | 
|---|
| 1753 | //@ requires 0 <= first && first <= right && right < numInstances(); | 
|---|
| 1754 | protected void quickSort(int attIndex, int left, int right) { | 
|---|
| 1755 |  | 
|---|
| 1756 | if (left < right) { | 
|---|
| 1757 | int middle = partition(attIndex, left, right); | 
|---|
| 1758 | quickSort(attIndex, left, middle); | 
|---|
| 1759 | quickSort(attIndex, middle + 1, right); | 
|---|
| 1760 | } | 
|---|
| 1761 | } | 
|---|
| 1762 |  | 
|---|
| 1763 | /** | 
|---|
| 1764 | * Implements computation of the kth-smallest element according | 
|---|
| 1765 | * to Manber's "Introduction to Algorithms". | 
|---|
| 1766 | * | 
|---|
| 1767 | * @param attIndex the attribute's index (index starts with 0) | 
|---|
| 1768 | * @param left the first index of the subset (index starts with 0) | 
|---|
| 1769 | * @param right the last index of the subset (index starts with 0) | 
|---|
| 1770 | * @param k the value of k | 
|---|
| 1771 | * | 
|---|
| 1772 | * @return the index of the kth-smallest element | 
|---|
| 1773 | */ | 
|---|
| 1774 | //@ requires 0 <= attIndex && attIndex < numAttributes(); | 
|---|
| 1775 | //@ requires 0 <= first && first <= right && right < numInstances(); | 
|---|
| 1776 | protected int select(int attIndex, int left, int right, int k) { | 
|---|
| 1777 |  | 
|---|
| 1778 | if (left == right) { | 
|---|
| 1779 | return left; | 
|---|
| 1780 | } else { | 
|---|
| 1781 | int middle = partition(attIndex, left, right); | 
|---|
| 1782 | if ((middle - left + 1) >= k) { | 
|---|
| 1783 | return select(attIndex, left, middle, k); | 
|---|
| 1784 | } else { | 
|---|
| 1785 | return select(attIndex, middle + 1, right, k - (middle - left + 1)); | 
|---|
| 1786 | } | 
|---|
| 1787 | } | 
|---|
| 1788 | } | 
|---|
| 1789 |  | 
|---|
| 1790 | /** | 
|---|
| 1791 | * Help function needed for stratification of set. | 
|---|
| 1792 | * | 
|---|
| 1793 | * @param numFolds the number of folds for the stratification | 
|---|
| 1794 | */ | 
|---|
| 1795 | protected void stratStep (int numFolds){ | 
|---|
| 1796 |  | 
|---|
| 1797 | ArrayList<Instance> newVec = new ArrayList<Instance>(m_Instances.size()); | 
|---|
| 1798 | int start = 0, j; | 
|---|
| 1799 |  | 
|---|
| 1800 | // create stratified batch | 
|---|
| 1801 | while (newVec.size() < numInstances()) { | 
|---|
| 1802 | j = start; | 
|---|
| 1803 | while (j < numInstances()) { | 
|---|
| 1804 | newVec.add(instance(j)); | 
|---|
| 1805 | j = j + numFolds; | 
|---|
| 1806 | } | 
|---|
| 1807 | start++; | 
|---|
| 1808 | } | 
|---|
| 1809 | m_Instances = newVec; | 
|---|
| 1810 | } | 
|---|
| 1811 |  | 
|---|
| 1812 | /** | 
|---|
| 1813 | * Swaps two instances in the set. | 
|---|
| 1814 | * | 
|---|
| 1815 | * @param i the first instance's index (index starts with 0) | 
|---|
| 1816 | * @param j the second instance's index (index starts with 0) | 
|---|
| 1817 | */ | 
|---|
| 1818 | //@ requires 0 <= i && i < numInstances(); | 
|---|
| 1819 | //@ requires 0 <= j && j < numInstances(); | 
|---|
| 1820 | public void swap(int i, int j){ | 
|---|
| 1821 |  | 
|---|
| 1822 | Instance in = m_Instances.get(i); | 
|---|
| 1823 | m_Instances.set(i, m_Instances.get(j)); | 
|---|
| 1824 | m_Instances.set(j, in); | 
|---|
| 1825 | } | 
|---|
| 1826 |  | 
|---|
| 1827 | /** | 
|---|
| 1828 | * Merges two sets of Instances together. The resulting set will have | 
|---|
| 1829 | * all the attributes of the first set plus all the attributes of the | 
|---|
| 1830 | * second set. The number of instances in both sets must be the same. | 
|---|
| 1831 | * | 
|---|
| 1832 | * @param first the first set of Instances | 
|---|
| 1833 | * @param second the second set of Instances | 
|---|
| 1834 | * @return the merged set of Instances | 
|---|
| 1835 | * @throws IllegalArgumentException if the datasets are not the same size | 
|---|
| 1836 | */ | 
|---|
| 1837 | public static Instances mergeInstances(Instances first, Instances second) { | 
|---|
| 1838 |  | 
|---|
| 1839 | if (first.numInstances() != second.numInstances()) { | 
|---|
| 1840 | throw new IllegalArgumentException("Instance sets must be of the same size"); | 
|---|
| 1841 | } | 
|---|
| 1842 |  | 
|---|
| 1843 | // Create the vector of merged attributes | 
|---|
| 1844 | ArrayList<Attribute> newAttributes = new ArrayList<Attribute>(); | 
|---|
| 1845 | for (int i = 0; i < first.numAttributes(); i++) { | 
|---|
| 1846 | newAttributes.add(first.attribute(i)); | 
|---|
| 1847 | } | 
|---|
| 1848 | for (int i = 0; i < second.numAttributes(); i++) { | 
|---|
| 1849 | newAttributes.add(second.attribute(i)); | 
|---|
| 1850 | } | 
|---|
| 1851 |  | 
|---|
| 1852 | // Create the set of Instances | 
|---|
| 1853 | Instances merged = new Instances(first.relationName() + '_' | 
|---|
| 1854 | + second.relationName(), | 
|---|
| 1855 | newAttributes, | 
|---|
| 1856 | first.numInstances()); | 
|---|
| 1857 | // Merge each instance | 
|---|
| 1858 | for (int i = 0; i < first.numInstances(); i++) { | 
|---|
| 1859 | merged.add(first.instance(i).mergeInstance(second.instance(i))); | 
|---|
| 1860 | } | 
|---|
| 1861 | return merged; | 
|---|
| 1862 | } | 
|---|
| 1863 |  | 
|---|
| 1864 | /** | 
|---|
| 1865 | * Method for testing this class. | 
|---|
| 1866 | * | 
|---|
| 1867 | * @param argv should contain one element: the name of an ARFF file | 
|---|
| 1868 | */ | 
|---|
| 1869 | //@ requires argv != null; | 
|---|
| 1870 | //@ requires argv.length == 1; | 
|---|
| 1871 | //@ requires argv[0] != null; | 
|---|
| 1872 | public static void test(String [] argv) { | 
|---|
| 1873 |  | 
|---|
| 1874 | Instances instances, secondInstances, train, test, empty; | 
|---|
| 1875 | Random random = new Random(2); | 
|---|
| 1876 | Reader reader; | 
|---|
| 1877 | int start, num; | 
|---|
| 1878 | ArrayList<Attribute> testAtts; | 
|---|
| 1879 | ArrayList<String> testVals; | 
|---|
| 1880 | int i,j; | 
|---|
| 1881 |  | 
|---|
| 1882 | try{ | 
|---|
| 1883 | if (argv.length > 1) { | 
|---|
| 1884 | throw (new Exception("Usage: Instances [<filename>]")); | 
|---|
| 1885 | } | 
|---|
| 1886 |  | 
|---|
| 1887 | // Creating set of instances from scratch | 
|---|
| 1888 | testVals = new ArrayList<String>(2); | 
|---|
| 1889 | testVals.add("first_value"); | 
|---|
| 1890 | testVals.add("second_value"); | 
|---|
| 1891 | testAtts = new ArrayList<Attribute>(2); | 
|---|
| 1892 | testAtts.add(new Attribute("nominal_attribute", testVals)); | 
|---|
| 1893 | testAtts.add(new Attribute("numeric_attribute")); | 
|---|
| 1894 | instances = new Instances("test_set", testAtts, 10); | 
|---|
| 1895 | instances.add(new DenseInstance(instances.numAttributes())); | 
|---|
| 1896 | instances.add(new DenseInstance(instances.numAttributes())); | 
|---|
| 1897 | instances.add(new DenseInstance(instances.numAttributes())); | 
|---|
| 1898 | instances.setClassIndex(0); | 
|---|
| 1899 | System.out.println("\nSet of instances created from scratch:\n"); | 
|---|
| 1900 | System.out.println(instances); | 
|---|
| 1901 |  | 
|---|
| 1902 | if (argv.length == 1) { | 
|---|
| 1903 | String filename = argv[0]; | 
|---|
| 1904 | reader = new FileReader(filename); | 
|---|
| 1905 |  | 
|---|
| 1906 | // Read first five instances and print them | 
|---|
| 1907 | System.out.println("\nFirst five instances from file:\n"); | 
|---|
| 1908 | instances = new Instances(reader, 1); | 
|---|
| 1909 | instances.setClassIndex(instances.numAttributes() - 1); | 
|---|
| 1910 | i = 0; | 
|---|
| 1911 | while ((i < 5) && (instances.readInstance(reader))) { | 
|---|
| 1912 | i++; | 
|---|
| 1913 | } | 
|---|
| 1914 | System.out.println(instances); | 
|---|
| 1915 |  | 
|---|
| 1916 | // Read all the instances in the file | 
|---|
| 1917 | reader = new FileReader(filename); | 
|---|
| 1918 | instances = new Instances(reader); | 
|---|
| 1919 |  | 
|---|
| 1920 | // Make the last attribute be the class | 
|---|
| 1921 | instances.setClassIndex(instances.numAttributes() - 1); | 
|---|
| 1922 |  | 
|---|
| 1923 | // Print header and instances. | 
|---|
| 1924 | System.out.println("\nDataset:\n"); | 
|---|
| 1925 | System.out.println(instances); | 
|---|
| 1926 | System.out.println("\nClass index: "+instances.classIndex()); | 
|---|
| 1927 | } | 
|---|
| 1928 |  | 
|---|
| 1929 | // Test basic methods based on class index. | 
|---|
| 1930 | System.out.println("\nClass name: "+instances.classAttribute().name()); | 
|---|
| 1931 | System.out.println("\nClass index: "+instances.classIndex()); | 
|---|
| 1932 | System.out.println("\nClass is nominal: " + | 
|---|
| 1933 | instances.classAttribute().isNominal()); | 
|---|
| 1934 | System.out.println("\nClass is numeric: " + | 
|---|
| 1935 | instances.classAttribute().isNumeric()); | 
|---|
| 1936 | System.out.println("\nClasses:\n"); | 
|---|
| 1937 | for (i = 0; i < instances.numClasses(); i++) { | 
|---|
| 1938 | System.out.println(instances.classAttribute().value(i)); | 
|---|
| 1939 | } | 
|---|
| 1940 | System.out.println("\nClass values and labels of instances:\n"); | 
|---|
| 1941 | for (i = 0; i < instances.numInstances(); i++) { | 
|---|
| 1942 | Instance inst = instances.instance(i); | 
|---|
| 1943 | System.out.print(inst.classValue() + "\t"); | 
|---|
| 1944 | System.out.print(inst.toString(inst.classIndex())); | 
|---|
| 1945 | if (instances.instance(i).classIsMissing()) { | 
|---|
| 1946 | System.out.println("\tis missing"); | 
|---|
| 1947 | } else { | 
|---|
| 1948 | System.out.println(); | 
|---|
| 1949 | } | 
|---|
| 1950 | } | 
|---|
| 1951 |  | 
|---|
| 1952 | // Create random weights. | 
|---|
| 1953 | System.out.println("\nCreating random weights for instances."); | 
|---|
| 1954 | for (i = 0; i < instances.numInstances(); i++) { | 
|---|
| 1955 | instances.instance(i).setWeight(random.nextDouble()); | 
|---|
| 1956 | } | 
|---|
| 1957 |  | 
|---|
| 1958 | // Print all instances and their weights (and the sum of weights). | 
|---|
| 1959 | System.out.println("\nInstances and their weights:\n"); | 
|---|
| 1960 | System.out.println(instances.instancesAndWeights()); | 
|---|
| 1961 | System.out.print("\nSum of weights: "); | 
|---|
| 1962 | System.out.println(instances.sumOfWeights()); | 
|---|
| 1963 |  | 
|---|
| 1964 | // Insert an attribute | 
|---|
| 1965 | secondInstances = new Instances(instances); | 
|---|
| 1966 | Attribute testAtt = new Attribute("Inserted"); | 
|---|
| 1967 | secondInstances.insertAttributeAt(testAtt, 0); | 
|---|
| 1968 | System.out.println("\nSet with inserted attribute:\n"); | 
|---|
| 1969 | System.out.println(secondInstances); | 
|---|
| 1970 | System.out.println("\nClass name: " | 
|---|
| 1971 | + secondInstances.classAttribute().name()); | 
|---|
| 1972 |  | 
|---|
| 1973 | // Delete the attribute | 
|---|
| 1974 | secondInstances.deleteAttributeAt(0); | 
|---|
| 1975 | System.out.println("\nSet with attribute deleted:\n"); | 
|---|
| 1976 | System.out.println(secondInstances); | 
|---|
| 1977 | System.out.println("\nClass name: " | 
|---|
| 1978 | + secondInstances.classAttribute().name()); | 
|---|
| 1979 |  | 
|---|
| 1980 | // Test if headers are equal | 
|---|
| 1981 | System.out.println("\nHeaders equal: "+ | 
|---|
| 1982 | instances.equalHeaders(secondInstances) + "\n"); | 
|---|
| 1983 |  | 
|---|
| 1984 | // Print data in internal format. | 
|---|
| 1985 | System.out.println("\nData (internal values):\n"); | 
|---|
| 1986 | for (i = 0; i < instances.numInstances(); i++) { | 
|---|
| 1987 | for (j = 0; j < instances.numAttributes(); j++) { | 
|---|
| 1988 | if (instances.instance(i).isMissing(j)) { | 
|---|
| 1989 | System.out.print("? "); | 
|---|
| 1990 | } else { | 
|---|
| 1991 | System.out.print(instances.instance(i).value(j) + " "); | 
|---|
| 1992 | } | 
|---|
| 1993 | } | 
|---|
| 1994 | System.out.println(); | 
|---|
| 1995 | } | 
|---|
| 1996 |  | 
|---|
| 1997 | // Just print header | 
|---|
| 1998 | System.out.println("\nEmpty dataset:\n"); | 
|---|
| 1999 | empty = new Instances(instances, 0); | 
|---|
| 2000 | System.out.println(empty); | 
|---|
| 2001 | System.out.println("\nClass name: "+empty.classAttribute().name()); | 
|---|
| 2002 |  | 
|---|
| 2003 | // Create copy and rename an attribute and a value (if possible) | 
|---|
| 2004 | if (empty.classAttribute().isNominal()) { | 
|---|
| 2005 | Instances copy = new Instances(empty, 0); | 
|---|
| 2006 | copy.renameAttribute(copy.classAttribute(), "new_name"); | 
|---|
| 2007 | copy.renameAttributeValue(copy.classAttribute(), | 
|---|
| 2008 | copy.classAttribute().value(0), | 
|---|
| 2009 | "new_val_name"); | 
|---|
| 2010 | System.out.println("\nDataset with names changed:\n" + copy); | 
|---|
| 2011 | System.out.println("\nOriginal dataset:\n" + empty); | 
|---|
| 2012 | } | 
|---|
| 2013 |  | 
|---|
| 2014 | // Create and prints subset of instances. | 
|---|
| 2015 | start = instances.numInstances() / 4; | 
|---|
| 2016 | num = instances.numInstances() / 2; | 
|---|
| 2017 | System.out.print("\nSubset of dataset: "); | 
|---|
| 2018 | System.out.println(num + " instances from " + (start + 1) | 
|---|
| 2019 | + ". instance"); | 
|---|
| 2020 | secondInstances = new Instances(instances, start, num); | 
|---|
| 2021 | System.out.println("\nClass name: " | 
|---|
| 2022 | + secondInstances.classAttribute().name()); | 
|---|
| 2023 |  | 
|---|
| 2024 | // Print all instances and their weights (and the sum of weights). | 
|---|
| 2025 | System.out.println("\nInstances and their weights:\n"); | 
|---|
| 2026 | System.out.println(secondInstances.instancesAndWeights()); | 
|---|
| 2027 | System.out.print("\nSum of weights: "); | 
|---|
| 2028 | System.out.println(secondInstances.sumOfWeights()); | 
|---|
| 2029 |  | 
|---|
| 2030 | // Create and print training and test sets for 3-fold | 
|---|
| 2031 | // cross-validation. | 
|---|
| 2032 | System.out.println("\nTrain and test folds for 3-fold CV:"); | 
|---|
| 2033 | if (instances.classAttribute().isNominal()) { | 
|---|
| 2034 | instances.stratify(3); | 
|---|
| 2035 | } | 
|---|
| 2036 | for (j = 0; j < 3; j++) { | 
|---|
| 2037 | train = instances.trainCV(3,j, new Random(1)); | 
|---|
| 2038 | test = instances.testCV(3,j); | 
|---|
| 2039 |  | 
|---|
| 2040 | // Print all instances and their weights (and the sum of weights). | 
|---|
| 2041 | System.out.println("\nTrain: "); | 
|---|
| 2042 | System.out.println("\nInstances and their weights:\n"); | 
|---|
| 2043 | System.out.println(train.instancesAndWeights()); | 
|---|
| 2044 | System.out.print("\nSum of weights: "); | 
|---|
| 2045 | System.out.println(train.sumOfWeights()); | 
|---|
| 2046 | System.out.println("\nClass name: "+train.classAttribute().name()); | 
|---|
| 2047 | System.out.println("\nTest: "); | 
|---|
| 2048 | System.out.println("\nInstances and their weights:\n"); | 
|---|
| 2049 | System.out.println(test.instancesAndWeights()); | 
|---|
| 2050 | System.out.print("\nSum of weights: "); | 
|---|
| 2051 | System.out.println(test.sumOfWeights()); | 
|---|
| 2052 | System.out.println("\nClass name: "+test.classAttribute().name()); | 
|---|
| 2053 | } | 
|---|
| 2054 |  | 
|---|
| 2055 | // Randomize instances and print them. | 
|---|
| 2056 | System.out.println("\nRandomized dataset:"); | 
|---|
| 2057 | instances.randomize(random); | 
|---|
| 2058 |  | 
|---|
| 2059 | // Print all instances and their weights (and the sum of weights). | 
|---|
| 2060 | System.out.println("\nInstances and their weights:\n"); | 
|---|
| 2061 | System.out.println(instances.instancesAndWeights()); | 
|---|
| 2062 | System.out.print("\nSum of weights: "); | 
|---|
| 2063 | System.out.println(instances.sumOfWeights()); | 
|---|
| 2064 |  | 
|---|
| 2065 | // Sort instances according to first attribute and | 
|---|
| 2066 | // print them. | 
|---|
| 2067 | System.out.print("\nInstances sorted according to first attribute:\n "); | 
|---|
| 2068 | instances.sort(0); | 
|---|
| 2069 |  | 
|---|
| 2070 | // Print all instances and their weights (and the sum of weights). | 
|---|
| 2071 | System.out.println("\nInstances and their weights:\n"); | 
|---|
| 2072 | System.out.println(instances.instancesAndWeights()); | 
|---|
| 2073 | System.out.print("\nSum of weights: "); | 
|---|
| 2074 | System.out.println(instances.sumOfWeights()); | 
|---|
| 2075 | } catch (Exception e) { | 
|---|
| 2076 | e.printStackTrace(); | 
|---|
| 2077 | } | 
|---|
| 2078 | } | 
|---|
| 2079 |  | 
|---|
| 2080 | /** | 
|---|
| 2081 | * Main method for this class. The following calls are possible: | 
|---|
| 2082 | * <ul> | 
|---|
| 2083 | *   <li> | 
|---|
| 2084 | *     <code>weka.core.Instances</code> help<br/> | 
|---|
| 2085 | *     prints a short list of possible commands. | 
|---|
| 2086 | *   </li> | 
|---|
| 2087 | *   <li> | 
|---|
| 2088 | *     <code>weka.core.Instances</code> <filename><br/> | 
|---|
| 2089 | *     prints a summary of a set of instances. | 
|---|
| 2090 | *   </li> | 
|---|
| 2091 | *   <li> | 
|---|
| 2092 | *     <code>weka.core.Instances</code> merge <filename1> <filename2><br/> | 
|---|
| 2093 | *     merges the two datasets (must have same number of instances) and | 
|---|
| 2094 | *     outputs the results on stdout. | 
|---|
| 2095 | *   </li> | 
|---|
| 2096 | *   <li> | 
|---|
| 2097 | *     <code>weka.core.Instances</code> append <filename1> <filename2><br/> | 
|---|
| 2098 | *     appends the second dataset to the first one (must have same headers) and | 
|---|
| 2099 | *     outputs the results on stdout. | 
|---|
| 2100 | *   </li> | 
|---|
| 2101 | *   <li> | 
|---|
| 2102 | *     <code>weka.core.Instances</code> headers <filename1> <filename2><br/> | 
|---|
| 2103 | *     Compares the headers of the two datasets and prints whether they match | 
|---|
| 2104 | *     or not. | 
|---|
| 2105 | *   </li> | 
|---|
| 2106 | *   <li> | 
|---|
| 2107 | *     <code>weka.core.Instances</code> randomize <seed> <filename><br/> | 
|---|
| 2108 | *     randomizes the dataset with the given seed and outputs the result on stdout. | 
|---|
| 2109 | *   </li> | 
|---|
| 2110 | * </ul> | 
|---|
| 2111 | * | 
|---|
| 2112 | * @param args        the commandline parameters | 
|---|
| 2113 | */ | 
|---|
| 2114 | public static void main(String[] args) { | 
|---|
| 2115 |  | 
|---|
| 2116 | try { | 
|---|
| 2117 | Instances i; | 
|---|
| 2118 | // read from stdin and print statistics | 
|---|
| 2119 | if (args.length == 0) { | 
|---|
| 2120 | DataSource source = new DataSource(System.in); | 
|---|
| 2121 | i = source.getDataSet(); | 
|---|
| 2122 | System.out.println(i.toSummaryString()); | 
|---|
| 2123 | } | 
|---|
| 2124 | // read file and print statistics | 
|---|
| 2125 | else if ((args.length == 1) && (!args[0].equals("-h")) && (!args[0].equals("help"))) { | 
|---|
| 2126 | DataSource source = new DataSource(args[0]); | 
|---|
| 2127 | i = source.getDataSet(); | 
|---|
| 2128 | System.out.println(i.toSummaryString()); | 
|---|
| 2129 | } | 
|---|
| 2130 | // read two files, merge them and print result to stdout | 
|---|
| 2131 | else if ((args.length == 3) && (args[0].toLowerCase().equals("merge"))) { | 
|---|
| 2132 | DataSource source1 = new DataSource(args[1]); | 
|---|
| 2133 | DataSource source2 = new DataSource(args[2]); | 
|---|
| 2134 | i = Instances.mergeInstances(source1.getDataSet(), source2.getDataSet()); | 
|---|
| 2135 | System.out.println(i); | 
|---|
| 2136 | } | 
|---|
| 2137 | // read two files, append them and print result to stdout | 
|---|
| 2138 | else if ((args.length == 3) && (args[0].toLowerCase().equals("append"))) { | 
|---|
| 2139 | DataSource source1 = new DataSource(args[1]); | 
|---|
| 2140 | DataSource source2 = new DataSource(args[2]); | 
|---|
| 2141 | String msg = source1.getStructure().equalHeadersMsg(source2.getStructure()); | 
|---|
| 2142 | if (msg != null) | 
|---|
| 2143 | throw new Exception("The two datasets have different headers:\n" + msg); | 
|---|
| 2144 | Instances structure = source1.getStructure(); | 
|---|
| 2145 | System.out.println(source1.getStructure()); | 
|---|
| 2146 | while (source1.hasMoreElements(structure)) | 
|---|
| 2147 | System.out.println(source1.nextElement(structure)); | 
|---|
| 2148 | structure = source2.getStructure(); | 
|---|
| 2149 | while (source2.hasMoreElements(structure)) | 
|---|
| 2150 | System.out.println(source2.nextElement(structure)); | 
|---|
| 2151 | } | 
|---|
| 2152 | // read two files and compare their headers | 
|---|
| 2153 | else if ((args.length == 3) && (args[0].toLowerCase().equals("headers"))) { | 
|---|
| 2154 | DataSource source1 = new DataSource(args[1]); | 
|---|
| 2155 | DataSource source2 = new DataSource(args[2]); | 
|---|
| 2156 | String msg = source1.getStructure().equalHeadersMsg(source2.getStructure()); | 
|---|
| 2157 | if (msg == null) | 
|---|
| 2158 | System.out.println("Headers match"); | 
|---|
| 2159 | else | 
|---|
| 2160 | System.out.println("Headers don't match:\n" + msg); | 
|---|
| 2161 | } | 
|---|
| 2162 | // read file and seed value, randomize data and print result to stdout | 
|---|
| 2163 | else if ((args.length == 3) && (args[0].toLowerCase().equals("randomize"))) { | 
|---|
| 2164 | DataSource source = new DataSource(args[2]); | 
|---|
| 2165 | i = source.getDataSet(); | 
|---|
| 2166 | i.randomize(new Random(Integer.parseInt(args[1]))); | 
|---|
| 2167 | System.out.println(i); | 
|---|
| 2168 | } | 
|---|
| 2169 | // wrong parameters or help | 
|---|
| 2170 | else { | 
|---|
| 2171 | System.err.println( | 
|---|
| 2172 | "\nUsage:\n" | 
|---|
| 2173 | // help | 
|---|
| 2174 | + "\tweka.core.Instances help\n" | 
|---|
| 2175 | + "\t\tPrints this help\n" | 
|---|
| 2176 | // stats | 
|---|
| 2177 | + "\tweka.core.Instances <filename>\n" | 
|---|
| 2178 | + "\t\tOutputs dataset statistics\n" | 
|---|
| 2179 | // merge | 
|---|
| 2180 | + "\tweka.core.Instances merge <filename1> <filename2>\n" | 
|---|
| 2181 | + "\t\tMerges the datasets (must have same number of rows).\n" | 
|---|
| 2182 | + "\t\tGenerated dataset gets output on stdout.\n" | 
|---|
| 2183 | // append | 
|---|
| 2184 | + "\tweka.core.Instances append <filename1> <filename2>\n" | 
|---|
| 2185 | + "\t\tAppends the second dataset to the first (must have same number of attributes).\n" | 
|---|
| 2186 | + "\t\tGenerated dataset gets output on stdout.\n" | 
|---|
| 2187 | // headers | 
|---|
| 2188 | + "\tweka.core.Instances headers <filename1> <filename2>\n" | 
|---|
| 2189 | + "\t\tCompares the structure of the two datasets and outputs whether they\n" | 
|---|
| 2190 | + "\t\tdiffer or not.\n" | 
|---|
| 2191 | // randomize | 
|---|
| 2192 | + "\tweka.core.Instances randomize <seed> <filename>\n" | 
|---|
| 2193 | + "\t\tRandomizes the dataset and outputs it on stdout.\n" | 
|---|
| 2194 | ); | 
|---|
| 2195 | } | 
|---|
| 2196 | } | 
|---|
| 2197 | catch (Exception ex) { | 
|---|
| 2198 | ex.printStackTrace(); | 
|---|
| 2199 | System.err.println(ex.getMessage()); | 
|---|
| 2200 | } | 
|---|
| 2201 | } | 
|---|
| 2202 |  | 
|---|
| 2203 | /** | 
|---|
| 2204 | * Returns the revision string. | 
|---|
| 2205 | * | 
|---|
| 2206 | * @return            the revision | 
|---|
| 2207 | */ | 
|---|
| 2208 | public String getRevision() { | 
|---|
| 2209 | return RevisionUtils.extract("$Revision: 5987 $"); | 
|---|
| 2210 | } | 
|---|
| 2211 | } | 
|---|