Context Navigation

source: src/main/java/weka/core/Instances.java @ 18

Last change on this file since 18 was 4, checked in by gnappo, 14 years ago
Import di weka.
File size: 69.4 KB

Line
1	/*
2	* This program is free software; you can redistribute it and/or modify
3	* it under the terms of the GNU General Public License as published by
4	* the Free Software Foundation; either version 2 of the License, or
5	* (at your option) any later version.
6	*
7	* This program is distributed in the hope that it will be useful,
8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	* GNU General Public License for more details.
11	*
12	* You should have received a copy of the GNU General Public License
13	* along with this program; if not, write to the Free Software
14	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15	*/
16
17	/*
18	* Instances.java
19	* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20	*
21	*/
22
23	package weka.core;
24
25	import weka.core.converters.ArffLoader.ArffReader;
26	import weka.core.converters.ConverterUtils.DataSource;
27
28	import java.io.FileReader;
29	import java.io.IOException;
30	import java.io.Reader;
31	import java.io.Serializable;
32	import java.util.Enumeration;
33	import java.util.Random;
34	import java.util.List;
35	import java.util.AbstractList;
36	import java.util.ArrayList;
37
38	/**
39	* Class for handling an ordered set of weighted instances. <p>
40	*
41	* Typical usage: <p>
42	* <pre>
43	* import weka.core.converters.ConverterUtils.DataSource;
44	* ...
45	*
46	* // Read all the instances in the file (ARFF, CSV, XRFF, ...)
47	* DataSource source = new DataSource(filename);
48	* Instances instances = source.getDataSet();
49	*
50	* // Make the last attribute be the class
51	* instances.setClassIndex(instances.numAttributes() - 1);
52	*
53	* // Print header and instances.
54	* System.out.println("\nDataset:\n");
55	* System.out.println(instances);
56	*
57	* ...
58	* </pre><p>
59	*
60	* All methods that change a set of instances are safe, ie. a change
61	* of a set of instances does not affect any other sets of
62	* instances. All methods that change a datasets's attribute
63	* information clone the dataset before it is changed.
64	*
65	* @author Eibe Frank (eibe@cs.waikato.ac.nz)
66	* @author Len Trigg (trigg@cs.waikato.ac.nz)
67	* @author FracPete (fracpete at waikato dot ac dot nz)
68	* @version $Revision: 5987 $
69	*/
70	public class Instances extends AbstractList<Instance>
71	implements Serializable, RevisionHandler {
72
73	/** for serialization */
74	static final long serialVersionUID = -19412345060742748L;
75
76	/** The filename extension that should be used for arff files */
77	public final static String FILE_EXTENSION = ".arff";
78
79	/** The filename extension that should be used for bin. serialized instances files */
80	public final static String SERIALIZED_OBJ_FILE_EXTENSION = ".bsi";
81
82	/** The keyword used to denote the start of an arff header */
83	public final static String ARFF_RELATION = "@relation";
84
85	/** The keyword used to denote the start of the arff data section */
86	public final static String ARFF_DATA = "@data";
87
88	/** The dataset's name. */
89	protected /@spec_public non_null@/ String m_RelationName;
90
91	/** The attribute information. */
92	protected /@spec_public non_null@/ ArrayList<Attribute> m_Attributes;
93	/* public invariant (\forall int i; 0 <= i && i < m_Attributes.size();
94	m_Attributes.get(i) != null);
95	*/
96
97	/** The instances. */
98	protected /@spec_public non_null@/ ArrayList<Instance> m_Instances;
99
100	/** The class attribute's index */
101	protected int m_ClassIndex;
102	//@ protected invariant classIndex() == m_ClassIndex;
103
104	/** The lines read so far in case of incremental loading. Since the
105	* StreamTokenizer will be re-initialized with every instance that is read,
106	* we have to keep track of the number of lines read so far.
107	* @see #readInstance(Reader) */
108	protected int m_Lines = 0;
109
110	/**
111	* Reads an ARFF file from a reader, and assigns a weight of
112	* one to each instance. Lets the index of the class
113	* attribute be undefined (negative).
114	*
115	* @param reader the reader
116	* @throws IOException if the ARFF file is not read
117	* successfully
118	*/
119	public Instances(/@non_null@/Reader reader) throws IOException {
120	ArffReader arff = new ArffReader(reader);
121	Instances dataset = arff.getData();
122	initialize(dataset, dataset.numInstances());
123	dataset.copyInstances(0, this, dataset.numInstances());
124	compactify();
125	}
126
127	/**
128	* Reads the header of an ARFF file from a reader and
129	* reserves space for the given number of instances. Lets
130	* the class index be undefined (negative).
131	*
132	* @param reader the reader
133	* @param capacity the capacity
134	* @throws IllegalArgumentException if the header is not read successfully
135	* or the capacity is negative.
136	* @throws IOException if there is a problem with the reader.
137	* @deprecated instead of using this method in conjunction with the
138	* <code>readInstance(Reader)</code> method, one should use the
139	* <code>ArffLoader</code> or <code>DataSource</code> class instead.
140	* @see weka.core.converters.ArffLoader
141	* @see weka.core.converters.ConverterUtils.DataSource
142	*/
143	//@ requires capacity >= 0;
144	//@ ensures classIndex() == -1;
145	@Deprecated public Instances(/@non_null@/Reader reader, int capacity)
146	throws IOException {
147
148	ArffReader arff = new ArffReader(reader, 0);
149	Instances header = arff.getStructure();
150	initialize(header, capacity);
151	m_Lines = arff.getLineNo();
152	}
153
154	/**
155	* Constructor copying all instances and references to
156	* the header information from the given set of instances.
157	*
158	* @param dataset the set to be copied
159	*/
160	public Instances(/@non_null@/Instances dataset) {
161
162	this(dataset, dataset.numInstances());
163
164	dataset.copyInstances(0, this, dataset.numInstances());
165	}
166
167	/**
168	* Constructor creating an empty set of instances. Copies references
169	* to the header information from the given set of instances. Sets
170	* the capacity of the set of instances to 0 if its negative.
171	*
172	* @param dataset the instances from which the header
173	* information is to be taken
174	* @param capacity the capacity of the new dataset
175	*/
176	public Instances(/@non_null@/Instances dataset, int capacity) {
177	initialize(dataset, capacity);
178	}
179
180	/**
181	* initializes with the header information of the given dataset and sets
182	* the capacity of the set of instances.
183	*
184	* @param dataset the dataset to use as template
185	* @param capacity the number of rows to reserve
186	*/
187	protected void initialize(Instances dataset, int capacity) {
188	if (capacity < 0)
189	capacity = 0;
190
191	// Strings only have to be "shallow" copied because
192	// they can't be modified.
193	m_ClassIndex = dataset.m_ClassIndex;
194	m_RelationName = dataset.m_RelationName;
195	m_Attributes = dataset.m_Attributes;
196	m_Instances = new ArrayList<Instance>(capacity);
197	}
198
199	/**
200	* Creates a new set of instances by copying a
201	* subset of another set.
202	*
203	* @param source the set of instances from which a subset
204	* is to be created
205	* @param first the index of the first instance to be copied
206	* @param toCopy the number of instances to be copied
207	* @throws IllegalArgumentException if first and toCopy are out of range
208	*/
209	//@ requires 0 <= first;
210	//@ requires 0 <= toCopy;
211	//@ requires first + toCopy <= source.numInstances();
212	public Instances(/@non_null@/Instances source, int first, int toCopy) {
213
214	this(source, toCopy);
215
216	if ((first < 0) \|\| ((first + toCopy) > source.numInstances())) {
217	throw new IllegalArgumentException("Parameters first and/or toCopy out "+
218	"of range");
219	}
220	source.copyInstances(first, this, toCopy);
221	}
222
223	/**
224	* Creates an empty set of instances. Uses the given
225	* attribute information. Sets the capacity of the set of
226	* instances to 0 if its negative. Given attribute information
227	* must not be changed after this constructor has been used.
228	*
229	* @param name the name of the relation
230	* @param attInfo the attribute information
231	* @param capacity the capacity of the set
232	*/
233	public Instances(/@non_null@/String name,
234	/@non_null@/ArrayList<Attribute> attInfo, int capacity) {
235
236	m_RelationName = name;
237	m_ClassIndex = -1;
238	m_Attributes = attInfo;
239	for (int i = 0; i < numAttributes(); i++) {
240	attribute(i).setIndex(i);
241	}
242	m_Instances = new ArrayList<Instance>(capacity);
243	}
244
245	/**
246	* Create a copy of the structure if the data has string or
247	* relational attributes, "cleanses" string types (i.e. doesn't
248	* contain references to the strings seen in the past) and all
249	* relational attributes.
250	*
251	* @return a copy of the instance structure.
252	*/
253	public Instances stringFreeStructure() {
254
255	ArrayList<Attribute> newAtts = new ArrayList<Attribute>();
256	for (int i = 0 ; i < m_Attributes.size(); i++) {
257	Attribute att = (Attribute)m_Attributes.get(i);
258	if (att.type() == Attribute.STRING) {
259	newAtts.add(new Attribute(att.name(), (List<String>)null, i));
260	} else if (att.type() == Attribute.RELATIONAL) {
261	newAtts.add(new Attribute(att.name(), new Instances(att.relation(), 0), i));
262	}
263	}
264	if (newAtts.size() == 0) {
265	return new Instances(this, 0);
266	}
267	ArrayList<Attribute> atts = Utils.cast(m_Attributes.clone());
268	for (int i = 0; i < newAtts.size(); i++) {
269	atts.set(((Attribute)newAtts.get(i)).index(), newAtts.get(i));
270	}
271	Instances result = new Instances(this, 0);
272	result.m_Attributes = atts;
273	return result;
274	}
275
276	/**
277	* Adds one instance to the end of the set.
278	* Shallow copies instance before it is added. Increases the
279	* size of the dataset if it is not large enough. Does not
280	* check if the instance is compatible with the dataset.
281	* Note: String or relational values are not transferred.
282	*
283	* @param instance the instance to be added
284	*/
285	public boolean add(/@non_null@/ Instance instance) {
286
287	Instance newInstance = (Instance)instance.copy();
288
289	newInstance.setDataset(this);
290	m_Instances.add(newInstance);
291
292	return true;
293	}
294
295	/**
296	* Adds one instance to the end of the set.
297	* Shallow copies instance before it is added. Increases the
298	* size of the dataset if it is not large enough. Does not
299	* check if the instance is compatible with the dataset.
300	* Note: String or relational values are not transferred.
301	*
302	* @param index position where instance is to be inserted
303	* @param instance the instance to be added
304	*/
305	//@ requires 0 <= index;
306	//@ requires index < m_Instances.size();
307	public void add(int index, /@non_null@/ Instance instance) {
308
309	Instance newInstance = (Instance)instance.copy();
310
311	newInstance.setDataset(this);
312	m_Instances.add(index, newInstance);
313	}
314
315	/**
316	* Returns an attribute.
317	*
318	* @param index the attribute's index (index starts with 0)
319	* @return the attribute at the given position
320	*/
321	//@ requires 0 <= index;
322	//@ requires index < m_Attributes.size();
323	//@ ensures \result != null;
324	public /@pure@/ Attribute attribute(int index) {
325
326	return (Attribute) m_Attributes.get(index);
327	}
328
329	/**
330	* Returns an attribute given its name. If there is more than
331	* one attribute with the same name, it returns the first one.
332	* Returns null if the attribute can't be found.
333	*
334	* @param name the attribute's name
335	* @return the attribute with the given name, null if the
336	* attribute can't be found
337	*/
338	public /@pure@/ Attribute attribute(String name) {
339
340	for (int i = 0; i < numAttributes(); i++) {
341	if (attribute(i).name().equals(name)) {
342	return attribute(i);
343	}
344	}
345	return null;
346	}
347
348	/**
349	* Checks for attributes of the given type in the dataset
350	*
351	* @param attType the attribute type to look for
352	* @return true if attributes of the given type are present
353	*/
354	public boolean checkForAttributeType(int attType) {
355
356	int i = 0;
357
358	while (i < m_Attributes.size()) {
359	if (attribute(i++).type() == attType) {
360	return true;
361	}
362	}
363	return false;
364	}
365
366	/**
367	* Checks for string attributes in the dataset
368	*
369	* @return true if string attributes are present, false otherwise
370	*/
371	public /@pure@/ boolean checkForStringAttributes() {
372	return checkForAttributeType(Attribute.STRING);
373	}
374
375	/**
376	* Checks if the given instance is compatible
377	* with this dataset. Only looks at the size of
378	* the instance and the ranges of the values for
379	* nominal and string attributes.
380	*
381	* @param instance the instance to check
382	* @return true if the instance is compatible with the dataset
383	*/
384	public /@pure@/ boolean checkInstance(Instance instance) {
385
386	if (instance.numAttributes() != numAttributes()) {
387	return false;
388	}
389	for (int i = 0; i < numAttributes(); i++) {
390	if (instance.isMissing(i)) {
391	continue;
392	} else if (attribute(i).isNominal() \|\|
393	attribute(i).isString()) {
394	if (!(Utils.eq(instance.value(i),
395	(double)(int)instance.value(i)))) {
396	return false;
397	} else if (Utils.sm(instance.value(i), 0) \|\|
398	Utils.gr(instance.value(i),
399	attribute(i).numValues())) {
400	return false;
401	}
402	}
403	}
404	return true;
405	}
406
407	/**
408	* Returns the class attribute.
409	*
410	* @return the class attribute
411	* @throws UnassignedClassException if the class is not set
412	*/
413	//@ requires classIndex() >= 0;
414	public /@pure@/ Attribute classAttribute() {
415
416	if (m_ClassIndex < 0) {
417	throw new UnassignedClassException("Class index is negative (not set)!");
418	}
419	return attribute(m_ClassIndex);
420	}
421
422	/**
423	* Returns the class attribute's index. Returns negative number
424	* if it's undefined.
425	*
426	* @return the class index as an integer
427	*/
428	// ensures \result == m_ClassIndex;
429	public /@pure@/ int classIndex() {
430
431	return m_ClassIndex;
432	}
433
434	/**
435	* Compactifies the set of instances. Decreases the capacity of
436	* the set so that it matches the number of instances in the set.
437	*/
438	public void compactify() {
439
440	m_Instances.trimToSize();
441	}
442
443	/**
444	* Removes all instances from the set.
445	*/
446	public void delete() {
447
448	m_Instances = new ArrayList<Instance>();
449	}
450
451	/**
452	* Removes an instance at the given position from the set.
453	*
454	* @param index the instance's position (index starts with 0)
455	*/
456	//@ requires 0 <= index && index < numInstances();
457	public void delete(int index) {
458
459	m_Instances.remove(index);
460	}
461
462	/**
463	* Deletes an attribute at the given position
464	* (0 to numAttributes() - 1). A deep copy of the attribute
465	* information is performed before the attribute is deleted.
466	*
467	* @param position the attribute's position (position starts with 0)
468	* @throws IllegalArgumentException if the given index is out of range
469	* or the class attribute is being deleted
470	*/
471	//@ requires 0 <= position && position < numAttributes();
472	//@ requires position != classIndex();
473	public void deleteAttributeAt(int position) {
474
475	if ((position < 0) \|\| (position >= m_Attributes.size())) {
476	throw new IllegalArgumentException("Index out of range");
477	}
478	if (position == m_ClassIndex) {
479	throw new IllegalArgumentException("Can't delete class attribute");
480	}
481	freshAttributeInfo();
482	if (m_ClassIndex > position) {
483	m_ClassIndex--;
484	}
485	m_Attributes.remove(position);
486	for (int i = position; i < m_Attributes.size(); i++) {
487	Attribute current = (Attribute)m_Attributes.get(i);
488	current.setIndex(current.index() - 1);
489	}
490	for (int i = 0; i < numInstances(); i++) {
491	instance(i).setDataset(null);
492	instance(i).deleteAttributeAt(position);
493	instance(i).setDataset(this);
494	}
495	}
496
497	/**
498	* Deletes all attributes of the given type in the dataset. A deep copy of
499	* the attribute information is performed before an attribute is deleted.
500	*
501	* @param attType the attribute type to delete
502	* @throws IllegalArgumentException if attribute couldn't be
503	* successfully deleted (probably because it is the class attribute).
504	*/
505	public void deleteAttributeType(int attType) {
506	int i = 0;
507	while (i < m_Attributes.size()) {
508	if (attribute(i).type() == attType) {
509	deleteAttributeAt(i);
510	} else {
511	i++;
512	}
513	}
514	}
515
516	/**
517	* Deletes all string attributes in the dataset. A deep copy of the attribute
518	* information is performed before an attribute is deleted.
519	*
520	* @throws IllegalArgumentException if string attribute couldn't be
521	* successfully deleted (probably because it is the class attribute).
522	* @see #deleteAttributeType(int)
523	*/
524	public void deleteStringAttributes() {
525	deleteAttributeType(Attribute.STRING);
526	}
527
528	/**
529	* Removes all instances with missing values for a particular
530	* attribute from the dataset.
531	*
532	* @param attIndex the attribute's index (index starts with 0)
533	*/
534	//@ requires 0 <= attIndex && attIndex < numAttributes();
535	public void deleteWithMissing(int attIndex) {
536
537	ArrayList<Instance> newInstances = new ArrayList<Instance>(numInstances());
538
539	for (int i = 0; i < numInstances(); i++) {
540	if (!instance(i).isMissing(attIndex)) {
541	newInstances.add(instance(i));
542	}
543	}
544	m_Instances = newInstances;
545	}
546
547	/**
548	* Removes all instances with missing values for a particular
549	* attribute from the dataset.
550	*
551	* @param att the attribute
552	*/
553	public void deleteWithMissing(/@non_null@/ Attribute att) {
554
555	deleteWithMissing(att.index());
556	}
557
558	/**
559	* Removes all instances with a missing class value
560	* from the dataset.
561	*
562	* @throws UnassignedClassException if class is not set
563	*/
564	public void deleteWithMissingClass() {
565
566	if (m_ClassIndex < 0) {
567	throw new UnassignedClassException("Class index is negative (not set)!");
568	}
569	deleteWithMissing(m_ClassIndex);
570	}
571
572	/**
573	* Returns an enumeration of all the attributes.
574	*
575	* @return enumeration of all the attributes.
576	*/
577	public /@non_null pure@/ Enumeration enumerateAttributes() {
578
579	return new WekaEnumeration(m_Attributes, m_ClassIndex);
580	}
581
582	/**
583	* Returns an enumeration of all instances in the dataset.
584	*
585	* @return enumeration of all instances in the dataset
586	*/
587	public /@non_null pure@/ Enumeration enumerateInstances() {
588
589	return new WekaEnumeration(m_Instances);
590	}
591
592	/**
593	* Checks if two headers are equivalent. If not, then returns a message why
594	* they differ.
595	*
596	* @param dataset another dataset
597	* @return null if the header of the given dataset is equivalent
598	* to this header, otherwise a message with details on
599	* why they differ
600	*/
601	public String equalHeadersMsg(Instances dataset) {
602	// Check class and all attributes
603	if (m_ClassIndex != dataset.m_ClassIndex)
604	return "Class index differ: " + (m_ClassIndex+1) + " != " + (dataset.m_ClassIndex+1);
605
606	if (m_Attributes.size() != dataset.m_Attributes.size())
607	return "Different number of attributes: " + m_Attributes.size() + " != " + dataset.m_Attributes.size();
608
609	for (int i = 0; i < m_Attributes.size(); i++) {
610	String msg = attribute(i).equalsMsg(dataset.attribute(i));
611	if (msg != null)
612	return "Attributes differ at position " + (i+1) + ":\n" + msg;
613	}
614
615	return null;
616	}
617
618	/**
619	* Checks if two headers are equivalent.
620	*
621	* @param dataset another dataset
622	* @return true if the header of the given dataset is equivalent
623	* to this header
624	*/
625	public /@pure@/ boolean equalHeaders(Instances dataset){
626	return (equalHeadersMsg(dataset) == null);
627	}
628
629	/**
630	* Returns the first instance in the set.
631	*
632	* @return the first instance in the set
633	*/
634	//@ requires numInstances() > 0;
635	public /@non_null pure@/ Instance firstInstance() {
636
637	return (Instance)m_Instances.get(0);
638	}
639
640	/**
641	* Returns a random number generator. The initial seed of the random
642	* number generator depends on the given seed and the hash code of
643	* a string representation of a instances chosen based on the given
644	* seed.
645	*
646	* @param seed the given seed
647	* @return the random number generator
648	*/
649	public Random getRandomNumberGenerator(long seed) {
650
651	Random r = new Random(seed);
652	r.setSeed(instance(r.nextInt(numInstances())).toStringNoWeight().hashCode() + seed);
653	return r;
654	}
655
656	/**
657	* Inserts an attribute at the given position (0 to
658	* numAttributes()) and sets all values to be missing.
659	* Shallow copies the attribute before it is inserted, and performs
660	* a deep copy of the existing attribute information.
661	*
662	* @param att the attribute to be inserted
663	* @param position the attribute's position (position starts with 0)
664	* @throws IllegalArgumentException if the given index is out of range
665	*/
666	//@ requires 0 <= position;
667	//@ requires position <= numAttributes();
668	public void insertAttributeAt(/@non_null@/ Attribute att, int position) {
669
670	if ((position < 0) \|\|
671	(position > m_Attributes.size())) {
672	throw new IllegalArgumentException("Index out of range");
673	}
674	att = (Attribute)att.copy();
675	freshAttributeInfo();
676	att.setIndex(position);
677	m_Attributes.add(position, att);
678	for (int i = position + 1; i < m_Attributes.size(); i++) {
679	Attribute current = (Attribute)m_Attributes.get(i);
680	current.setIndex(current.index() + 1);
681	}
682	for (int i = 0; i < numInstances(); i++) {
683	instance(i).setDataset(null);
684	instance(i).insertAttributeAt(position);
685	instance(i).setDataset(this);
686	}
687	if (m_ClassIndex >= position) {
688	m_ClassIndex++;
689	}
690	}
691
692	/**
693	* Returns the instance at the given position.
694	*
695	* @param index the instance's index (index starts with 0)
696	* @return the instance at the given position
697	*/
698	//@ requires 0 <= index;
699	//@ requires index < numInstances();
700	public /@non_null pure@/ Instance instance(int index) {
701
702	return m_Instances.get(index);
703	}
704
705	/**
706	* Returns the instance at the given position.
707	*
708	* @param index the instance's index (index starts with 0)
709	* @return the instance at the given position
710	*/
711	//@ requires 0 <= index;
712	//@ requires index < numInstances();
713	public /@non_null pure@/ Instance get(int index) {
714
715	return m_Instances.get(index);
716	}
717
718	/**
719	* Returns the kth-smallest attribute value of a numeric attribute.
720	* Note that calling this method will change the order of the data!
721	*
722	* @param att the Attribute object
723	* @param k the value of k
724	* @return the kth-smallest value
725	*/
726	public double kthSmallestValue(Attribute att, int k) {
727
728	return kthSmallestValue(att.index(), k);
729	}
730
731	/**
732	* Returns the kth-smallest attribute value of a numeric attribute.
733	* Note that calling this method will change the order of the data!
734	* The number of non-missing values in the data must be as least
735	* as last as k for this to work.
736	*
737	* @param attIndex the attribute's index
738	* @param k the value of k
739	* @return the kth-smallest value
740	*/
741	public double kthSmallestValue(int attIndex, int k) {
742
743	if (!attribute(attIndex).isNumeric()) {
744	throw new IllegalArgumentException("Instances: attribute must be numeric to compute kth-smallest value.");
745	}
746
747	int i,j;
748
749	// move all instances with missing values to end
750	j = numInstances() - 1;
751	i = 0;
752	while (i <= j) {
753	if (instance(j).isMissing(attIndex)) {
754	j--;
755	} else {
756	if (instance(i).isMissing(attIndex)) {
757	swap(i,j);
758	j--;
759	}
760	i++;
761	}
762	}
763
764	if ((k < 1) \|\| (k > j+1)) {
765	throw new IllegalArgumentException("Instances: value for k for computing kth-smallest value too large.");
766	}
767
768	return instance(select(attIndex, 0, j, k)).value(attIndex);
769	}
770
771	/**
772	* Returns the last instance in the set.
773	*
774	* @return the last instance in the set
775	*/
776	//@ requires numInstances() > 0;
777	public /@non_null pure@/ Instance lastInstance() {
778
779	return (Instance)m_Instances.get(m_Instances.size() - 1);
780	}
781
782	/**
783	* Returns the mean (mode) for a numeric (nominal) attribute as
784	* a floating-point value. Returns 0 if the attribute is neither nominal nor
785	* numeric. If all values are missing it returns zero.
786	*
787	* @param attIndex the attribute's index (index starts with 0)
788	* @return the mean or the mode
789	*/
790	public /@pure@/ double meanOrMode(int attIndex) {
791
792	double result, found;
793	int [] counts;
794
795	if (attribute(attIndex).isNumeric()) {
796	result = found = 0;
797	for (int j = 0; j < numInstances(); j++) {
798	if (!instance(j).isMissing(attIndex)) {
799	found += instance(j).weight();
800	result += instance(j).weight()*instance(j).value(attIndex);
801	}
802	}
803	if (found <= 0) {
804	return 0;
805	} else {
806	return result / found;
807	}
808	} else if (attribute(attIndex).isNominal()) {
809	counts = new int[attribute(attIndex).numValues()];
810	for (int j = 0; j < numInstances(); j++) {
811	if (!instance(j).isMissing(attIndex)) {
812	counts[(int) instance(j).value(attIndex)] += instance(j).weight();
813	}
814	}
815	return (double)Utils.maxIndex(counts);
816	} else {
817	return 0;
818	}
819	}
820
821	/**
822	* Returns the mean (mode) for a numeric (nominal) attribute as a
823	* floating-point value. Returns 0 if the attribute is neither
824	* nominal nor numeric. If all values are missing it returns zero.
825	*
826	* @param att the attribute
827	* @return the mean or the mode
828	*/
829	public /@pure@/ double meanOrMode(Attribute att) {
830
831	return meanOrMode(att.index());
832	}
833
834	/**
835	* Returns the number of attributes.
836	*
837	* @return the number of attributes as an integer
838	*/
839	//@ ensures \result == m_Attributes.size();
840	public /@pure@/ int numAttributes() {
841
842	return m_Attributes.size();
843	}
844
845	/**
846	* Returns the number of class labels.
847	*
848	* @return the number of class labels as an integer if the class
849	* attribute is nominal, 1 otherwise.
850	* @throws UnassignedClassException if the class is not set
851	*/
852	//@ requires classIndex() >= 0;
853	public /@pure@/ int numClasses() {
854
855	if (m_ClassIndex < 0) {
856	throw new UnassignedClassException("Class index is negative (not set)!");
857	}
858	if (!classAttribute().isNominal()) {
859	return 1;
860	} else {
861	return classAttribute().numValues();
862	}
863	}
864
865	/**
866	* Returns the number of distinct values of a given attribute.
867	* Returns the number of instances if the attribute is a
868	* string attribute. The value 'missing' is not counted.
869	*
870	* @param attIndex the attribute (index starts with 0)
871	* @return the number of distinct values of a given attribute
872	*/
873	//@ requires 0 <= attIndex;
874	//@ requires attIndex < numAttributes();
875	public /@pure@/ int numDistinctValues(int attIndex) {
876
877	if (attribute(attIndex).isNumeric()) {
878	double [] attVals = attributeToDoubleArray(attIndex);
879	int [] sorted = Utils.sort(attVals);
880	double prev = 0;
881	int counter = 0;
882	for (int i = 0; i < sorted.length; i++) {
883	Instance current = instance(sorted[i]);
884	if (current.isMissing(attIndex)) {
885	break;
886	}
887	if ((i == 0) \|\|
888	(current.value(attIndex) > prev)) {
889	prev = current.value(attIndex);
890	counter++;
891	}
892	}
893	return counter;
894	} else {
895	return attribute(attIndex).numValues();
896	}
897	}
898
899	/**
900	* Returns the number of distinct values of a given attribute.
901	* Returns the number of instances if the attribute is a
902	* string attribute. The value 'missing' is not counted.
903	*
904	* @param att the attribute
905	* @return the number of distinct values of a given attribute
906	*/
907	public /@pure@/ int numDistinctValues(/@non_null@/Attribute att) {
908
909	return numDistinctValues(att.index());
910	}
911
912	/**
913	* Returns the number of instances in the dataset.
914	*
915	* @return the number of instances in the dataset as an integer
916	*/
917	//@ ensures \result == m_Instances.size();
918	public /@pure@/ int numInstances() {
919
920	return m_Instances.size();
921	}
922
923	/**
924	* Returns the number of instances in the dataset.
925	*
926	* @return the number of instances in the dataset as an integer
927	*/
928	//@ ensures \result == m_Instances.size();
929	public /@pure@/ int size() {
930
931	return m_Instances.size();
932	}
933
934	/**
935	* Shuffles the instances in the set so that they are ordered
936	* randomly.
937	*
938	* @param random a random number generator
939	*/
940	public void randomize(Random random) {
941
942	for (int j = numInstances() - 1; j > 0; j--)
943	swap(j, random.nextInt(j+1));
944	}
945
946	/**
947	* Reads a single instance from the reader and appends it
948	* to the dataset. Automatically expands the dataset if it
949	* is not large enough to hold the instance. This method does
950	* not check for carriage return at the end of the line.
951	*
952	* @param reader the reader
953	* @return false if end of file has been reached
954	* @throws IOException if the information is not read
955	* successfully
956	* @deprecated instead of using this method in conjunction with the
957	* <code>readInstance(Reader)</code> method, one should use the
958	* <code>ArffLoader</code> or <code>DataSource</code> class instead.
959	* @see weka.core.converters.ArffLoader
960	* @see weka.core.converters.ConverterUtils.DataSource
961	*/
962	@Deprecated public boolean readInstance(Reader reader) throws IOException {
963
964	ArffReader arff = new ArffReader(reader, this, m_Lines, 1);
965	Instance inst = arff.readInstance(arff.getData(), false);
966	m_Lines = arff.getLineNo();
967	if (inst != null) {
968	add(inst);
969	return true;
970	}
971	else {
972	return false;
973	}
974	}
975
976	/**
977	* Returns the relation's name.
978	*
979	* @return the relation's name as a string
980	*/
981	//@ ensures \result == m_RelationName;
982	public /@pure@/ String relationName() {
983
984	return m_RelationName;
985	}
986
987	/**
988	* Removes the instance at the given position.
989	*
990	* @param index the instance's index (index starts with 0)
991	* @return the instance at the given position
992	*/
993	//@ requires 0 <= index;
994	//@ requires index < numInstances();
995	public Instance remove(int index) {
996
997	return m_Instances.remove(index);
998	}
999
1000	/**
1001	* Renames an attribute. This change only affects this
1002	* dataset.
1003	*
1004	* @param att the attribute's index (index starts with 0)
1005	* @param name the new name
1006	*/
1007	public void renameAttribute(int att, String name) {
1008
1009	Attribute newAtt = attribute(att).copy(name);
1010	ArrayList<Attribute> newVec = new ArrayList<Attribute>(numAttributes());
1011
1012	for (int i = 0; i < numAttributes(); i++) {
1013	if (i == att) {
1014	newVec.add(newAtt);
1015	} else {
1016	newVec.add(attribute(i));
1017	}
1018	}
1019	m_Attributes = newVec;
1020	}
1021
1022	/**
1023	* Renames an attribute. This change only affects this
1024	* dataset.
1025	*
1026	* @param att the attribute
1027	* @param name the new name
1028	*/
1029	public void renameAttribute(Attribute att, String name) {
1030
1031	renameAttribute(att.index(), name);
1032	}
1033
1034	/**
1035	* Renames the value of a nominal (or string) attribute value. This
1036	* change only affects this dataset.
1037	*
1038	* @param att the attribute's index (index starts with 0)
1039	* @param val the value's index (index starts with 0)
1040	* @param name the new name
1041	*/
1042	public void renameAttributeValue(int att, int val, String name) {
1043
1044	Attribute newAtt = (Attribute)attribute(att).copy();
1045	ArrayList<Attribute> newVec = new ArrayList<Attribute>(numAttributes());
1046
1047	newAtt.setValue(val, name);
1048	for (int i = 0; i < numAttributes(); i++) {
1049	if (i == att) {
1050	newVec.add(newAtt);
1051	} else {
1052	newVec.add(attribute(i));
1053	}
1054	}
1055	m_Attributes = newVec;
1056	}
1057
1058	/**
1059	* Renames the value of a nominal (or string) attribute value. This
1060	* change only affects this dataset.
1061	*
1062	* @param att the attribute
1063	* @param val the value
1064	* @param name the new name
1065	*/
1066	public void renameAttributeValue(Attribute att, String val,
1067	String name) {
1068
1069	int v = att.indexOfValue(val);
1070	if (v == -1) throw new IllegalArgumentException(val + " not found");
1071	renameAttributeValue(att.index(), v, name);
1072	}
1073
1074	/**
1075	* Creates a new dataset of the same size using random sampling
1076	* with replacement.
1077	*
1078	* @param random a random number generator
1079	* @return the new dataset
1080	*/
1081	public Instances resample(Random random) {
1082
1083	Instances newData = new Instances(this, numInstances());
1084	while (newData.numInstances() < numInstances()) {
1085	newData.add(instance(random.nextInt(numInstances())));
1086	}
1087	return newData;
1088	}
1089
1090	/**
1091	* Creates a new dataset of the same size using random sampling
1092	* with replacement according to the current instance weights. The
1093	* weights of the instances in the new dataset are set to one.
1094	*
1095	* @param random a random number generator
1096	* @return the new dataset
1097	*/
1098	public Instances resampleWithWeights(Random random) {
1099
1100	double [] weights = new double[numInstances()];
1101	for (int i = 0; i < weights.length; i++) {
1102	weights[i] = instance(i).weight();
1103	}
1104	return resampleWithWeights(random, weights);
1105	}
1106
1107
1108	/**
1109	* Creates a new dataset of the same size using random sampling
1110	* with replacement according to the given weight vector. The
1111	* weights of the instances in the new dataset are set to one.
1112	* The length of the weight vector has to be the same as the
1113	* number of instances in the dataset, and all weights have to
1114	* be positive.
1115	*
1116	* @param random a random number generator
1117	* @param weights the weight vector
1118	* @return the new dataset
1119	* @throws IllegalArgumentException if the weights array is of the wrong
1120	* length or contains negative weights.
1121	*/
1122	public Instances resampleWithWeights(Random random,
1123	double[] weights) {
1124
1125	if (weights.length != numInstances()) {
1126	throw new IllegalArgumentException("weights.length != numInstances.");
1127	}
1128	Instances newData = new Instances(this, numInstances());
1129	if (numInstances() == 0) {
1130	return newData;
1131	}
1132	double[] probabilities = new double[numInstances()];
1133	double sumProbs = 0, sumOfWeights = Utils.sum(weights);
1134	for (int i = 0; i < numInstances(); i++) {
1135	sumProbs += random.nextDouble();
1136	probabilities[i] = sumProbs;
1137	}
1138	Utils.normalize(probabilities, sumProbs / sumOfWeights);
1139
1140	// Make sure that rounding errors don't mess things up
1141	probabilities[numInstances() - 1] = sumOfWeights;
1142	int k = 0; int l = 0;
1143	sumProbs = 0;
1144	while ((k < numInstances() && (l < numInstances()))) {
1145	if (weights[l] < 0) {
1146	throw new IllegalArgumentException("Weights have to be positive.");
1147	}
1148	sumProbs += weights[l];
1149	while ((k < numInstances()) &&
1150	(probabilities[k] <= sumProbs)) {
1151	newData.add(instance(l));
1152	newData.instance(k).setWeight(1);
1153	k++;
1154	}
1155	l++;
1156	}
1157	return newData;
1158	}
1159
1160	/**
1161	* Replaces the instance at the given position.
1162	* Shallow copies instance before it is added. Does not
1163	* check if the instance is compatible with the dataset.
1164	* Note: String or relational values are not transferred.
1165	*
1166	* @param index position where instance is to be inserted
1167	* @param instance the instance to be inserted
1168	* @return the instance previously at that position
1169	*/
1170	//@ requires 0 <= index;
1171	//@ requires index < m_Instances.size();
1172	public Instance set(int index, /@non_null@/ Instance instance) {
1173
1174	Instance newInstance = (Instance)instance.copy();
1175	Instance oldInstance = m_Instances.get(index);
1176
1177	newInstance.setDataset(this);
1178	m_Instances.set(index, newInstance);
1179
1180	return oldInstance;
1181	}
1182
1183	/**
1184	* Sets the class attribute.
1185	*
1186	* @param att attribute to be the class
1187	*/
1188	public void setClass(Attribute att) {
1189
1190	m_ClassIndex = att.index();
1191	}
1192
1193	/**
1194	* Sets the class index of the set.
1195	* If the class index is negative there is assumed to be no class.
1196	* (ie. it is undefined)
1197	*
1198	* @param classIndex the new class index (index starts with 0)
1199	* @throws IllegalArgumentException if the class index is too big or < 0
1200	*/
1201	public void setClassIndex(int classIndex) {
1202
1203	if (classIndex >= numAttributes()) {
1204	throw new IllegalArgumentException("Invalid class index: " + classIndex);
1205	}
1206	m_ClassIndex = classIndex;
1207	}
1208
1209	/**
1210	* Sets the relation's name.
1211	*
1212	* @param newName the new relation name.
1213	*/
1214	public void setRelationName(/@non_null@/String newName) {
1215
1216	m_RelationName = newName;
1217	}
1218
1219	/**
1220	* Sorts the instances based on an attribute. For numeric attributes,
1221	* instances are sorted in ascending order. For nominal attributes,
1222	* instances are sorted based on the attribute label ordering
1223	* specified in the header. Instances with missing values for the
1224	* attribute are placed at the end of the dataset.
1225	*
1226	* @param attIndex the attribute's index (index starts with 0)
1227	*/
1228	public void sort(int attIndex) {
1229
1230	int i,j;
1231
1232	// move all instances with missing values to end
1233	j = numInstances() - 1;
1234	i = 0;
1235	while (i <= j) {
1236	if (instance(j).isMissing(attIndex)) {
1237	j--;
1238	} else {
1239	if (instance(i).isMissing(attIndex)) {
1240	swap(i,j);
1241	j--;
1242	}
1243	i++;
1244	}
1245	}
1246	quickSort(attIndex, 0, j);
1247	}
1248
1249	/**
1250	* Sorts the instances based on an attribute. For numeric attributes,
1251	* instances are sorted into ascending order. For nominal attributes,
1252	* instances are sorted based on the attribute label ordering
1253	* specified in the header. Instances with missing values for the
1254	* attribute are placed at the end of the dataset.
1255	*
1256	* @param att the attribute
1257	*/
1258	public void sort(Attribute att) {
1259
1260	sort(att.index());
1261	}
1262
1263	/**
1264	* Stratifies a set of instances according to its class values
1265	* if the class attribute is nominal (so that afterwards a
1266	* stratified cross-validation can be performed).
1267	*
1268	* @param numFolds the number of folds in the cross-validation
1269	* @throws UnassignedClassException if the class is not set
1270	*/
1271	public void stratify(int numFolds) {
1272
1273	if (numFolds <= 1) {
1274	throw new IllegalArgumentException("Number of folds must be greater than 1");
1275	}
1276	if (m_ClassIndex < 0) {
1277	throw new UnassignedClassException("Class index is negative (not set)!");
1278	}
1279	if (classAttribute().isNominal()) {
1280
1281	// sort by class
1282	int index = 1;
1283	while (index < numInstances()) {
1284	Instance instance1 = instance(index - 1);
1285	for (int j = index; j < numInstances(); j++) {
1286	Instance instance2 = instance(j);
1287	if ((instance1.classValue() == instance2.classValue()) \|\|
1288	(instance1.classIsMissing() &&
1289	instance2.classIsMissing())) {
1290	swap(index,j);
1291	index++;
1292	}
1293	}
1294	index++;
1295	}
1296	stratStep(numFolds);
1297	}
1298	}
1299
1300	/**
1301	* Computes the sum of all the instances' weights.
1302	*
1303	* @return the sum of all the instances' weights as a double
1304	*/
1305	public /@pure@/ double sumOfWeights() {
1306
1307	double sum = 0;
1308
1309	for (int i = 0; i < numInstances(); i++) {
1310	sum += instance(i).weight();
1311	}
1312	return sum;
1313	}
1314
1315	/**
1316	* Creates the test set for one fold of a cross-validation on
1317	* the dataset.
1318	*
1319	* @param numFolds the number of folds in the cross-validation. Must
1320	* be greater than 1.
1321	* @param numFold 0 for the first fold, 1 for the second, ...
1322	* @return the test set as a set of weighted instances
1323	* @throws IllegalArgumentException if the number of folds is less than 2
1324	* or greater than the number of instances.
1325	*/
1326	//@ requires 2 <= numFolds && numFolds < numInstances();
1327	//@ requires 0 <= numFold && numFold < numFolds;
1328	public Instances testCV(int numFolds, int numFold) {
1329
1330	int numInstForFold, first, offset;
1331	Instances test;
1332
1333	if (numFolds < 2) {
1334	throw new IllegalArgumentException("Number of folds must be at least 2!");
1335	}
1336	if (numFolds > numInstances()) {
1337	throw new IllegalArgumentException("Can't have more folds than instances!");
1338	}
1339	numInstForFold = numInstances() / numFolds;
1340	if (numFold < numInstances() % numFolds){
1341	numInstForFold++;
1342	offset = numFold;
1343	}else
1344	offset = numInstances() % numFolds;
1345	test = new Instances(this, numInstForFold);
1346	first = numFold * (numInstances() / numFolds) + offset;
1347	copyInstances(first, test, numInstForFold);
1348	return test;
1349	}
1350
1351	/**
1352	* Returns the dataset as a string in ARFF format. Strings
1353	* are quoted if they contain whitespace characters, or if they
1354	* are a question mark.
1355	*
1356	* @return the dataset in ARFF format as a string
1357	*/
1358	public String toString() {
1359
1360	StringBuffer text = new StringBuffer();
1361
1362	text.append(ARFF_RELATION).append(" ").
1363	append(Utils.quote(m_RelationName)).append("\n\n");
1364	for (int i = 0; i < numAttributes(); i++) {
1365	text.append(attribute(i)).append("\n");
1366	}
1367	text.append("\n").append(ARFF_DATA).append("\n");
1368
1369	text.append(stringWithoutHeader());
1370	return text.toString();
1371	}
1372
1373	/**
1374	* Returns the instances in the dataset as a string in ARFF format. Strings
1375	* are quoted if they contain whitespace characters, or if they
1376	* are a question mark.
1377	*
1378	* @return the dataset in ARFF format as a string
1379	*/
1380	protected String stringWithoutHeader() {
1381
1382	StringBuffer text = new StringBuffer();
1383
1384	for (int i = 0; i < numInstances(); i++) {
1385	text.append(instance(i));
1386	if (i < numInstances() - 1) {
1387	text.append('\n');
1388	}
1389	}
1390	return text.toString();
1391	}
1392
1393	/**
1394	* Creates the training set for one fold of a cross-validation
1395	* on the dataset.
1396	*
1397	* @param numFolds the number of folds in the cross-validation. Must
1398	* be greater than 1.
1399	* @param numFold 0 for the first fold, 1 for the second, ...
1400	* @return the training set
1401	* @throws IllegalArgumentException if the number of folds is less than 2
1402	* or greater than the number of instances.
1403	*/
1404	//@ requires 2 <= numFolds && numFolds < numInstances();
1405	//@ requires 0 <= numFold && numFold < numFolds;
1406	public Instances trainCV(int numFolds, int numFold) {
1407
1408	int numInstForFold, first, offset;
1409	Instances train;
1410
1411	if (numFolds < 2) {
1412	throw new IllegalArgumentException("Number of folds must be at least 2!");
1413	}
1414	if (numFolds > numInstances()) {
1415	throw new IllegalArgumentException("Can't have more folds than instances!");
1416	}
1417	numInstForFold = numInstances() / numFolds;
1418	if (numFold < numInstances() % numFolds) {
1419	numInstForFold++;
1420	offset = numFold;
1421	}else
1422	offset = numInstances() % numFolds;
1423	train = new Instances(this, numInstances() - numInstForFold);
1424	first = numFold * (numInstances() / numFolds) + offset;
1425	copyInstances(0, train, first);
1426	copyInstances(first + numInstForFold, train,
1427	numInstances() - first - numInstForFold);
1428
1429	return train;
1430	}
1431
1432	/**
1433	* Creates the training set for one fold of a cross-validation
1434	* on the dataset. The data is subsequently randomized based
1435	* on the given random number generator.
1436	*
1437	* @param numFolds the number of folds in the cross-validation. Must
1438	* be greater than 1.
1439	* @param numFold 0 for the first fold, 1 for the second, ...
1440	* @param random the random number generator
1441	* @return the training set
1442	* @throws IllegalArgumentException if the number of folds is less than 2
1443	* or greater than the number of instances.
1444	*/
1445	//@ requires 2 <= numFolds && numFolds < numInstances();
1446	//@ requires 0 <= numFold && numFold < numFolds;
1447	public Instances trainCV(int numFolds, int numFold, Random random) {
1448
1449	Instances train = trainCV(numFolds, numFold);
1450	train.randomize(random);
1451	return train;
1452	}
1453
1454	/**
1455	* Computes the variance for a numeric attribute.
1456	*
1457	* @param attIndex the numeric attribute (index starts with 0)
1458	* @return the variance if the attribute is numeric
1459	* @throws IllegalArgumentException if the attribute is not numeric
1460	*/
1461	public /@pure@/ double variance(int attIndex) {
1462
1463	double sum = 0, sumSquared = 0, sumOfWeights = 0;
1464
1465	if (!attribute(attIndex).isNumeric()) {
1466	throw new IllegalArgumentException("Can't compute variance because attribute is " +
1467	"not numeric!");
1468	}
1469	for (int i = 0; i < numInstances(); i++) {
1470	if (!instance(i).isMissing(attIndex)) {
1471	sum += instance(i).weight() *
1472	instance(i).value(attIndex);
1473	sumSquared += instance(i).weight() *
1474	instance(i).value(attIndex) *
1475	instance(i).value(attIndex);
1476	sumOfWeights += instance(i).weight();
1477	}
1478	}
1479	if (sumOfWeights <= 1) {
1480	return 0;
1481	}
1482	double result = (sumSquared - (sum * sum / sumOfWeights)) /
1483	(sumOfWeights - 1);
1484
1485	// We don't like negative variance
1486	if (result < 0) {
1487	return 0;
1488	} else {
1489	return result;
1490	}
1491	}
1492
1493	/**
1494	* Computes the variance for a numeric attribute.
1495	*
1496	* @param att the numeric attribute
1497	* @return the variance if the attribute is numeric
1498	* @throws IllegalArgumentException if the attribute is not numeric
1499	*/
1500	public /@pure@/ double variance(Attribute att) {
1501
1502	return variance(att.index());
1503	}
1504
1505	/**
1506	* Calculates summary statistics on the values that appear in this
1507	* set of instances for a specified attribute.
1508	*
1509	* @param index the index of the attribute to summarize (index starts with 0)
1510	* @return an AttributeStats object with it's fields calculated.
1511	*/
1512	//@ requires 0 <= index && index < numAttributes();
1513	public AttributeStats attributeStats(int index) {
1514
1515	AttributeStats result = new AttributeStats();
1516	if (attribute(index).isNominal()) {
1517	result.nominalCounts = new int [attribute(index).numValues()];
1518	result.nominalWeights = new double[attribute(index).numValues()];
1519	}
1520	if (attribute(index).isNumeric()) {
1521	result.numericStats = new weka.experiment.Stats();
1522	}
1523	result.totalCount = numInstances();
1524
1525	double [] attVals = attributeToDoubleArray(index);
1526	int [] sorted = Utils.sort(attVals);
1527	int currentCount = 0;
1528	double currentWeight = 0;
1529	double prev = Double.NaN;
1530	for (int j = 0; j < numInstances(); j++) {
1531	Instance current = instance(sorted[j]);
1532	if (current.isMissing(index)) {
1533	result.missingCount = numInstances() - j;
1534	break;
1535	}
1536	if (current.value(index) == prev) {
1537	currentCount++;
1538	currentWeight += current.weight();
1539	} else {
1540	result.addDistinct(prev, currentCount, currentWeight);
1541	currentCount = 1;
1542	currentWeight = current.weight();
1543	prev = current.value(index);
1544	}
1545	}
1546	result.addDistinct(prev, currentCount, currentWeight);
1547	result.distinctCount--; // So we don't count "missing" as a value
1548	return result;
1549	}
1550
1551	/**
1552	* Gets the value of all instances in this dataset for a particular
1553	* attribute. Useful in conjunction with Utils.sort to allow iterating
1554	* through the dataset in sorted order for some attribute.
1555	*
1556	* @param index the index of the attribute.
1557	* @return an array containing the value of the desired attribute for
1558	* each instance in the dataset.
1559	*/
1560	//@ requires 0 <= index && index < numAttributes();
1561	public /@pure@/ double [] attributeToDoubleArray(int index) {
1562
1563	double [] result = new double[numInstances()];
1564	for (int i = 0; i < result.length; i++) {
1565	result[i] = instance(i).value(index);
1566	}
1567	return result;
1568	}
1569
1570	/**
1571	* Generates a string summarizing the set of instances. Gives a breakdown
1572	* for each attribute indicating the number of missing/discrete/unique
1573	* values and other information.
1574	*
1575	* @return a string summarizing the dataset
1576	*/
1577	public String toSummaryString() {
1578
1579	StringBuffer result = new StringBuffer();
1580	result.append("Relation Name: ").append(relationName()).append('\n');
1581	result.append("Num Instances: ").append(numInstances()).append('\n');
1582	result.append("Num Attributes: ").append(numAttributes()).append('\n');
1583	result.append('\n');
1584
1585	result.append(Utils.padLeft("", 5)).append(Utils.padRight("Name", 25));
1586	result.append(Utils.padLeft("Type", 5)).append(Utils.padLeft("Nom", 5));
1587	result.append(Utils.padLeft("Int", 5)).append(Utils.padLeft("Real", 5));
1588	result.append(Utils.padLeft("Missing", 12));
1589	result.append(Utils.padLeft("Unique", 12));
1590	result.append(Utils.padLeft("Dist", 6)).append('\n');
1591	for (int i = 0; i < numAttributes(); i++) {
1592	Attribute a = attribute(i);
1593	AttributeStats as = attributeStats(i);
1594	result.append(Utils.padLeft("" + (i + 1), 4)).append(' ');
1595	result.append(Utils.padRight(a.name(), 25)).append(' ');
1596	long percent;
1597	switch (a.type()) {
1598	case Attribute.NOMINAL:
1599	result.append(Utils.padLeft("Nom", 4)).append(' ');
1600	percent = Math.round(100.0 * as.intCount / as.totalCount);
1601	result.append(Utils.padLeft("" + percent, 3)).append("% ");
1602	result.append(Utils.padLeft("" + 0, 3)).append("% ");
1603	percent = Math.round(100.0 * as.realCount / as.totalCount);
1604	result.append(Utils.padLeft("" + percent, 3)).append("% ");
1605	break;
1606	case Attribute.NUMERIC:
1607	result.append(Utils.padLeft("Num", 4)).append(' ');
1608	result.append(Utils.padLeft("" + 0, 3)).append("% ");
1609	percent = Math.round(100.0 * as.intCount / as.totalCount);
1610	result.append(Utils.padLeft("" + percent, 3)).append("% ");
1611	percent = Math.round(100.0 * as.realCount / as.totalCount);
1612	result.append(Utils.padLeft("" + percent, 3)).append("% ");
1613	break;
1614	case Attribute.DATE:
1615	result.append(Utils.padLeft("Dat", 4)).append(' ');
1616	result.append(Utils.padLeft("" + 0, 3)).append("% ");
1617	percent = Math.round(100.0 * as.intCount / as.totalCount);
1618	result.append(Utils.padLeft("" + percent, 3)).append("% ");
1619	percent = Math.round(100.0 * as.realCount / as.totalCount);
1620	result.append(Utils.padLeft("" + percent, 3)).append("% ");
1621	break;
1622	case Attribute.STRING:
1623	result.append(Utils.padLeft("Str", 4)).append(' ');
1624	percent = Math.round(100.0 * as.intCount / as.totalCount);
1625	result.append(Utils.padLeft("" + percent, 3)).append("% ");
1626	result.append(Utils.padLeft("" + 0, 3)).append("% ");
1627	percent = Math.round(100.0 * as.realCount / as.totalCount);
1628	result.append(Utils.padLeft("" + percent, 3)).append("% ");
1629	break;
1630	case Attribute.RELATIONAL:
1631	result.append(Utils.padLeft("Rel", 4)).append(' ');
1632	percent = Math.round(100.0 * as.intCount / as.totalCount);
1633	result.append(Utils.padLeft("" + percent, 3)).append("% ");
1634	result.append(Utils.padLeft("" + 0, 3)).append("% ");
1635	percent = Math.round(100.0 * as.realCount / as.totalCount);
1636	result.append(Utils.padLeft("" + percent, 3)).append("% ");
1637	break;
1638	default:
1639	result.append(Utils.padLeft("???", 4)).append(' ');
1640	result.append(Utils.padLeft("" + 0, 3)).append("% ");
1641	percent = Math.round(100.0 * as.intCount / as.totalCount);
1642	result.append(Utils.padLeft("" + percent, 3)).append("% ");
1643	percent = Math.round(100.0 * as.realCount / as.totalCount);
1644	result.append(Utils.padLeft("" + percent, 3)).append("% ");
1645	break;
1646	}
1647	result.append(Utils.padLeft("" + as.missingCount, 5)).append(" /");
1648	percent = Math.round(100.0 * as.missingCount / as.totalCount);
1649	result.append(Utils.padLeft("" + percent, 3)).append("% ");
1650	result.append(Utils.padLeft("" + as.uniqueCount, 5)).append(" /");
1651	percent = Math.round(100.0 * as.uniqueCount / as.totalCount);
1652	result.append(Utils.padLeft("" + percent, 3)).append("% ");
1653	result.append(Utils.padLeft("" + as.distinctCount, 5)).append(' ');
1654	result.append('\n');
1655	}
1656	return result.toString();
1657	}
1658
1659	/**
1660	* Copies instances from one set to the end of another
1661	* one.
1662	*
1663	* @param from the position of the first instance to be copied
1664	* @param dest the destination for the instances
1665	* @param num the number of instances to be copied
1666	*/
1667	//@ requires 0 <= from && from <= numInstances() - num;
1668	//@ requires 0 <= num;
1669	protected void copyInstances(int from, /@non_null@/ Instances dest, int num) {
1670
1671	for (int i = 0; i < num; i++) {
1672	dest.add(instance(from + i));
1673	}
1674	}
1675
1676	/**
1677	* Replaces the attribute information by a clone of
1678	* itself.
1679	*/
1680	protected void freshAttributeInfo() {
1681
1682	ArrayList<Attribute> newList = new ArrayList<Attribute>(m_Attributes.size());
1683	for (Attribute att : m_Attributes) {
1684	newList.add((Attribute)att.copy());
1685	}
1686	m_Attributes = newList;
1687	}
1688
1689	/**
1690	* Returns string including all instances, their weights and
1691	* their indices in the original dataset.
1692	*
1693	* @return description of instance and its weight as a string
1694	*/
1695	protected /@pure@/ String instancesAndWeights(){
1696
1697	StringBuffer text = new StringBuffer();
1698
1699	for (int i = 0; i < numInstances(); i++) {
1700	text.append(instance(i) + " " + instance(i).weight());
1701	if (i < numInstances() - 1) {
1702	text.append("\n");
1703	}
1704	}
1705	return text.toString();
1706	}
1707
1708	/**
1709	* Partitions the instances around a pivot. Used by quicksort and
1710	* kthSmallestValue.
1711	*
1712	* @param attIndex the attribute's index (index starts with 0)
1713	* @param l the first index of the subset (index starts with 0)
1714	* @param r the last index of the subset (index starts with 0)
1715	*
1716	* @return the index of the middle element
1717	*/
1718	//@ requires 0 <= attIndex && attIndex < numAttributes();
1719	//@ requires 0 <= left && left <= right && right < numInstances();
1720	protected int partition(int attIndex, int l, int r) {
1721
1722	double pivot = instance((l + r) / 2).value(attIndex);
1723
1724	while (l < r) {
1725	while ((instance(l).value(attIndex) < pivot) && (l < r)) {
1726	l++;
1727	}
1728	while ((instance(r).value(attIndex) > pivot) && (l < r)) {
1729	r--;
1730	}
1731	if (l < r) {
1732	swap(l, r);
1733	l++;
1734	r--;
1735	}
1736	}
1737	if ((l == r) && (instance(r).value(attIndex) > pivot)) {
1738	r--;
1739	}
1740
1741	return r;
1742	}
1743
1744	/**
1745	* Implements quicksort according to Manber's "Introduction to
1746	* Algorithms".
1747	*
1748	* @param attIndex the attribute's index (index starts with 0)
1749	* @param left the first index of the subset to be sorted (index starts with 0)
1750	* @param right the last index of the subset to be sorted (index starts with 0)
1751	*/
1752	//@ requires 0 <= attIndex && attIndex < numAttributes();
1753	//@ requires 0 <= first && first <= right && right < numInstances();
1754	protected void quickSort(int attIndex, int left, int right) {
1755
1756	if (left < right) {
1757	int middle = partition(attIndex, left, right);
1758	quickSort(attIndex, left, middle);
1759	quickSort(attIndex, middle + 1, right);
1760	}
1761	}
1762
1763	/**
1764	* Implements computation of the kth-smallest element according
1765	* to Manber's "Introduction to Algorithms".
1766	*
1767	* @param attIndex the attribute's index (index starts with 0)
1768	* @param left the first index of the subset (index starts with 0)
1769	* @param right the last index of the subset (index starts with 0)
1770	* @param k the value of k
1771	*
1772	* @return the index of the kth-smallest element
1773	*/
1774	//@ requires 0 <= attIndex && attIndex < numAttributes();
1775	//@ requires 0 <= first && first <= right && right < numInstances();
1776	protected int select(int attIndex, int left, int right, int k) {
1777
1778	if (left == right) {
1779	return left;
1780	} else {
1781	int middle = partition(attIndex, left, right);
1782	if ((middle - left + 1) >= k) {
1783	return select(attIndex, left, middle, k);
1784	} else {
1785	return select(attIndex, middle + 1, right, k - (middle - left + 1));
1786	}
1787	}
1788	}
1789
1790	/**
1791	* Help function needed for stratification of set.
1792	*
1793	* @param numFolds the number of folds for the stratification
1794	*/
1795	protected void stratStep (int numFolds){
1796
1797	ArrayList<Instance> newVec = new ArrayList<Instance>(m_Instances.size());
1798	int start = 0, j;
1799
1800	// create stratified batch
1801	while (newVec.size() < numInstances()) {
1802	j = start;
1803	while (j < numInstances()) {
1804	newVec.add(instance(j));
1805	j = j + numFolds;
1806	}
1807	start++;
1808	}
1809	m_Instances = newVec;
1810	}
1811
1812	/**
1813	* Swaps two instances in the set.
1814	*
1815	* @param i the first instance's index (index starts with 0)
1816	* @param j the second instance's index (index starts with 0)
1817	*/
1818	//@ requires 0 <= i && i < numInstances();
1819	//@ requires 0 <= j && j < numInstances();
1820	public void swap(int i, int j){
1821
1822	Instance in = m_Instances.get(i);
1823	m_Instances.set(i, m_Instances.get(j));
1824	m_Instances.set(j, in);
1825	}
1826
1827	/**
1828	* Merges two sets of Instances together. The resulting set will have
1829	* all the attributes of the first set plus all the attributes of the
1830	* second set. The number of instances in both sets must be the same.
1831	*
1832	* @param first the first set of Instances
1833	* @param second the second set of Instances
1834	* @return the merged set of Instances
1835	* @throws IllegalArgumentException if the datasets are not the same size
1836	*/
1837	public static Instances mergeInstances(Instances first, Instances second) {
1838
1839	if (first.numInstances() != second.numInstances()) {
1840	throw new IllegalArgumentException("Instance sets must be of the same size");
1841	}
1842
1843	// Create the vector of merged attributes
1844	ArrayList<Attribute> newAttributes = new ArrayList<Attribute>();
1845	for (int i = 0; i < first.numAttributes(); i++) {
1846	newAttributes.add(first.attribute(i));
1847	}
1848	for (int i = 0; i < second.numAttributes(); i++) {
1849	newAttributes.add(second.attribute(i));
1850	}
1851
1852	// Create the set of Instances
1853	Instances merged = new Instances(first.relationName() + '_'
1854	+ second.relationName(),
1855	newAttributes,
1856	first.numInstances());
1857	// Merge each instance
1858	for (int i = 0; i < first.numInstances(); i++) {
1859	merged.add(first.instance(i).mergeInstance(second.instance(i)));
1860	}
1861	return merged;
1862	}
1863
1864	/**
1865	* Method for testing this class.
1866	*
1867	* @param argv should contain one element: the name of an ARFF file
1868	*/
1869	//@ requires argv != null;
1870	//@ requires argv.length == 1;
1871	//@ requires argv[0] != null;
1872	public static void test(String [] argv) {
1873
1874	Instances instances, secondInstances, train, test, empty;
1875	Random random = new Random(2);
1876	Reader reader;
1877	int start, num;
1878	ArrayList<Attribute> testAtts;
1879	ArrayList<String> testVals;
1880	int i,j;
1881
1882	try{
1883	if (argv.length > 1) {
1884	throw (new Exception("Usage: Instances [<filename>]"));
1885	}
1886
1887	// Creating set of instances from scratch
1888	testVals = new ArrayList<String>(2);
1889	testVals.add("first_value");
1890	testVals.add("second_value");
1891	testAtts = new ArrayList<Attribute>(2);
1892	testAtts.add(new Attribute("nominal_attribute", testVals));
1893	testAtts.add(new Attribute("numeric_attribute"));
1894	instances = new Instances("test_set", testAtts, 10);
1895	instances.add(new DenseInstance(instances.numAttributes()));
1896	instances.add(new DenseInstance(instances.numAttributes()));
1897	instances.add(new DenseInstance(instances.numAttributes()));
1898	instances.setClassIndex(0);
1899	System.out.println("\nSet of instances created from scratch:\n");
1900	System.out.println(instances);
1901
1902	if (argv.length == 1) {
1903	String filename = argv[0];
1904	reader = new FileReader(filename);
1905
1906	// Read first five instances and print them
1907	System.out.println("\nFirst five instances from file:\n");
1908	instances = new Instances(reader, 1);
1909	instances.setClassIndex(instances.numAttributes() - 1);
1910	i = 0;
1911	while ((i < 5) && (instances.readInstance(reader))) {
1912	i++;
1913	}
1914	System.out.println(instances);
1915
1916	// Read all the instances in the file
1917	reader = new FileReader(filename);
1918	instances = new Instances(reader);
1919
1920	// Make the last attribute be the class
1921	instances.setClassIndex(instances.numAttributes() - 1);
1922
1923	// Print header and instances.
1924	System.out.println("\nDataset:\n");
1925	System.out.println(instances);
1926	System.out.println("\nClass index: "+instances.classIndex());
1927	}
1928
1929	// Test basic methods based on class index.
1930	System.out.println("\nClass name: "+instances.classAttribute().name());
1931	System.out.println("\nClass index: "+instances.classIndex());
1932	System.out.println("\nClass is nominal: " +
1933	instances.classAttribute().isNominal());
1934	System.out.println("\nClass is numeric: " +
1935	instances.classAttribute().isNumeric());
1936	System.out.println("\nClasses:\n");
1937	for (i = 0; i < instances.numClasses(); i++) {
1938	System.out.println(instances.classAttribute().value(i));
1939	}
1940	System.out.println("\nClass values and labels of instances:\n");
1941	for (i = 0; i < instances.numInstances(); i++) {
1942	Instance inst = instances.instance(i);
1943	System.out.print(inst.classValue() + "\t");
1944	System.out.print(inst.toString(inst.classIndex()));
1945	if (instances.instance(i).classIsMissing()) {
1946	System.out.println("\tis missing");
1947	} else {
1948	System.out.println();
1949	}
1950	}
1951
1952	// Create random weights.
1953	System.out.println("\nCreating random weights for instances.");
1954	for (i = 0; i < instances.numInstances(); i++) {
1955	instances.instance(i).setWeight(random.nextDouble());
1956	}
1957
1958	// Print all instances and their weights (and the sum of weights).
1959	System.out.println("\nInstances and their weights:\n");
1960	System.out.println(instances.instancesAndWeights());
1961	System.out.print("\nSum of weights: ");
1962	System.out.println(instances.sumOfWeights());
1963
1964	// Insert an attribute
1965	secondInstances = new Instances(instances);
1966	Attribute testAtt = new Attribute("Inserted");
1967	secondInstances.insertAttributeAt(testAtt, 0);
1968	System.out.println("\nSet with inserted attribute:\n");
1969	System.out.println(secondInstances);
1970	System.out.println("\nClass name: "
1971	+ secondInstances.classAttribute().name());
1972
1973	// Delete the attribute
1974	secondInstances.deleteAttributeAt(0);
1975	System.out.println("\nSet with attribute deleted:\n");
1976	System.out.println(secondInstances);
1977	System.out.println("\nClass name: "
1978	+ secondInstances.classAttribute().name());
1979
1980	// Test if headers are equal
1981	System.out.println("\nHeaders equal: "+
1982	instances.equalHeaders(secondInstances) + "\n");
1983
1984	// Print data in internal format.
1985	System.out.println("\nData (internal values):\n");
1986	for (i = 0; i < instances.numInstances(); i++) {
1987	for (j = 0; j < instances.numAttributes(); j++) {
1988	if (instances.instance(i).isMissing(j)) {
1989	System.out.print("? ");
1990	} else {
1991	System.out.print(instances.instance(i).value(j) + " ");
1992	}
1993	}
1994	System.out.println();
1995	}
1996
1997	// Just print header
1998	System.out.println("\nEmpty dataset:\n");
1999	empty = new Instances(instances, 0);
2000	System.out.println(empty);
2001	System.out.println("\nClass name: "+empty.classAttribute().name());
2002
2003	// Create copy and rename an attribute and a value (if possible)
2004	if (empty.classAttribute().isNominal()) {
2005	Instances copy = new Instances(empty, 0);
2006	copy.renameAttribute(copy.classAttribute(), "new_name");
2007	copy.renameAttributeValue(copy.classAttribute(),
2008	copy.classAttribute().value(0),
2009	"new_val_name");
2010	System.out.println("\nDataset with names changed:\n" + copy);
2011	System.out.println("\nOriginal dataset:\n" + empty);
2012	}
2013
2014	// Create and prints subset of instances.
2015	start = instances.numInstances() / 4;
2016	num = instances.numInstances() / 2;
2017	System.out.print("\nSubset of dataset: ");
2018	System.out.println(num + " instances from " + (start + 1)
2019	+ ". instance");
2020	secondInstances = new Instances(instances, start, num);
2021	System.out.println("\nClass name: "
2022	+ secondInstances.classAttribute().name());
2023
2024	// Print all instances and their weights (and the sum of weights).
2025	System.out.println("\nInstances and their weights:\n");
2026	System.out.println(secondInstances.instancesAndWeights());
2027	System.out.print("\nSum of weights: ");
2028	System.out.println(secondInstances.sumOfWeights());
2029
2030	// Create and print training and test sets for 3-fold
2031	// cross-validation.
2032	System.out.println("\nTrain and test folds for 3-fold CV:");
2033	if (instances.classAttribute().isNominal()) {
2034	instances.stratify(3);
2035	}
2036	for (j = 0; j < 3; j++) {
2037	train = instances.trainCV(3,j, new Random(1));
2038	test = instances.testCV(3,j);
2039
2040	// Print all instances and their weights (and the sum of weights).
2041	System.out.println("\nTrain: ");
2042	System.out.println("\nInstances and their weights:\n");
2043	System.out.println(train.instancesAndWeights());
2044	System.out.print("\nSum of weights: ");
2045	System.out.println(train.sumOfWeights());
2046	System.out.println("\nClass name: "+train.classAttribute().name());
2047	System.out.println("\nTest: ");
2048	System.out.println("\nInstances and their weights:\n");
2049	System.out.println(test.instancesAndWeights());
2050	System.out.print("\nSum of weights: ");
2051	System.out.println(test.sumOfWeights());
2052	System.out.println("\nClass name: "+test.classAttribute().name());
2053	}
2054
2055	// Randomize instances and print them.
2056	System.out.println("\nRandomized dataset:");
2057	instances.randomize(random);
2058
2059	// Print all instances and their weights (and the sum of weights).
2060	System.out.println("\nInstances and their weights:\n");
2061	System.out.println(instances.instancesAndWeights());
2062	System.out.print("\nSum of weights: ");
2063	System.out.println(instances.sumOfWeights());
2064
2065	// Sort instances according to first attribute and
2066	// print them.
2067	System.out.print("\nInstances sorted according to first attribute:\n ");
2068	instances.sort(0);
2069
2070	// Print all instances and their weights (and the sum of weights).
2071	System.out.println("\nInstances and their weights:\n");
2072	System.out.println(instances.instancesAndWeights());
2073	System.out.print("\nSum of weights: ");
2074	System.out.println(instances.sumOfWeights());
2075	} catch (Exception e) {
2076	e.printStackTrace();
2077	}
2078	}
2079
2080	/**
2081	* Main method for this class. The following calls are possible:
2082	* <ul>
2083	* <li>
2084	* <code>weka.core.Instances</code> help<br/>
2085	* prints a short list of possible commands.
2086	* </li>
2087	* <li>
2088	* <code>weka.core.Instances</code> <filename><br/>
2089	* prints a summary of a set of instances.
2090	* </li>
2091	* <li>
2092	* <code>weka.core.Instances</code> merge <filename1> <filename2><br/>
2093	* merges the two datasets (must have same number of instances) and
2094	* outputs the results on stdout.
2095	* </li>
2096	* <li>
2097	* <code>weka.core.Instances</code> append <filename1> <filename2><br/>
2098	* appends the second dataset to the first one (must have same headers) and
2099	* outputs the results on stdout.
2100	* </li>
2101	* <li>
2102	* <code>weka.core.Instances</code> headers <filename1> <filename2><br/>
2103	* Compares the headers of the two datasets and prints whether they match
2104	* or not.
2105	* </li>
2106	* <li>
2107	* <code>weka.core.Instances</code> randomize <seed> <filename><br/>
2108	* randomizes the dataset with the given seed and outputs the result on stdout.
2109	* </li>
2110	* </ul>
2111	*
2112	* @param args the commandline parameters
2113	*/
2114	public static void main(String[] args) {
2115
2116	try {
2117	Instances i;
2118	// read from stdin and print statistics
2119	if (args.length == 0) {
2120	DataSource source = new DataSource(System.in);
2121	i = source.getDataSet();
2122	System.out.println(i.toSummaryString());
2123	}
2124	// read file and print statistics
2125	else if ((args.length == 1) && (!args[0].equals("-h")) && (!args[0].equals("help"))) {
2126	DataSource source = new DataSource(args[0]);
2127	i = source.getDataSet();
2128	System.out.println(i.toSummaryString());
2129	}
2130	// read two files, merge them and print result to stdout
2131	else if ((args.length == 3) && (args[0].toLowerCase().equals("merge"))) {
2132	DataSource source1 = new DataSource(args[1]);
2133	DataSource source2 = new DataSource(args[2]);
2134	i = Instances.mergeInstances(source1.getDataSet(), source2.getDataSet());
2135	System.out.println(i);
2136	}
2137	// read two files, append them and print result to stdout
2138	else if ((args.length == 3) && (args[0].toLowerCase().equals("append"))) {
2139	DataSource source1 = new DataSource(args[1]);
2140	DataSource source2 = new DataSource(args[2]);
2141	String msg = source1.getStructure().equalHeadersMsg(source2.getStructure());
2142	if (msg != null)
2143	throw new Exception("The two datasets have different headers:\n" + msg);
2144	Instances structure = source1.getStructure();
2145	System.out.println(source1.getStructure());
2146	while (source1.hasMoreElements(structure))
2147	System.out.println(source1.nextElement(structure));
2148	structure = source2.getStructure();
2149	while (source2.hasMoreElements(structure))
2150	System.out.println(source2.nextElement(structure));
2151	}
2152	// read two files and compare their headers
2153	else if ((args.length == 3) && (args[0].toLowerCase().equals("headers"))) {
2154	DataSource source1 = new DataSource(args[1]);
2155	DataSource source2 = new DataSource(args[2]);
2156	String msg = source1.getStructure().equalHeadersMsg(source2.getStructure());
2157	if (msg == null)
2158	System.out.println("Headers match");
2159	else
2160	System.out.println("Headers don't match:\n" + msg);
2161	}
2162	// read file and seed value, randomize data and print result to stdout
2163	else if ((args.length == 3) && (args[0].toLowerCase().equals("randomize"))) {
2164	DataSource source = new DataSource(args[2]);
2165	i = source.getDataSet();
2166	i.randomize(new Random(Integer.parseInt(args[1])));
2167	System.out.println(i);
2168	}
2169	// wrong parameters or help
2170	else {
2171	System.err.println(
2172	"\nUsage:\n"
2173	// help
2174	+ "\tweka.core.Instances help\n"
2175	+ "\t\tPrints this help\n"
2176	// stats
2177	+ "\tweka.core.Instances <filename>\n"
2178	+ "\t\tOutputs dataset statistics\n"
2179	// merge
2180	+ "\tweka.core.Instances merge <filename1> <filename2>\n"
2181	+ "\t\tMerges the datasets (must have same number of rows).\n"
2182	+ "\t\tGenerated dataset gets output on stdout.\n"
2183	// append
2184	+ "\tweka.core.Instances append <filename1> <filename2>\n"
2185	+ "\t\tAppends the second dataset to the first (must have same number of attributes).\n"
2186	+ "\t\tGenerated dataset gets output on stdout.\n"
2187	// headers
2188	+ "\tweka.core.Instances headers <filename1> <filename2>\n"
2189	+ "\t\tCompares the structure of the two datasets and outputs whether they\n"
2190	+ "\t\tdiffer or not.\n"
2191	// randomize
2192	+ "\tweka.core.Instances randomize <seed> <filename>\n"
2193	+ "\t\tRandomizes the dataset and outputs it on stdout.\n"
2194	);
2195	}
2196	}
2197	catch (Exception ex) {
2198	ex.printStackTrace();
2199	System.err.println(ex.getMessage());
2200	}
2201	}
2202
2203	/**
2204	* Returns the revision string.
2205	*
2206	* @return the revision
2207	*/
2208	public String getRevision() {
2209	return RevisionUtils.extract("$Revision: 5987 $");
2210	}
2211	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: