1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * Instance.java |
---|
19 | * Copyright (C) 1999 University of Waikato, Hamilton, New Zealand |
---|
20 | * |
---|
21 | */ |
---|
22 | |
---|
23 | package weka.core; |
---|
24 | |
---|
25 | import java.io.Serializable; |
---|
26 | import java.util.Enumeration; |
---|
27 | import java.util.ArrayList; |
---|
28 | |
---|
29 | /** |
---|
30 | * Interface representing an instance. All values (numeric, date, |
---|
31 | * nominal, string or relational) are internally stored as |
---|
32 | * floating-point numbers in the original concrete class |
---|
33 | * implementations (now called DenseInstance.java and |
---|
34 | * SparseInstance.java), and the methods in this interface reflect |
---|
35 | * this. If an attribute is nominal (or a string or relational), the |
---|
36 | * stored value is the index of the corresponding nominal (or string |
---|
37 | * or relational) value in the attribute's definition. We have chosen |
---|
38 | * this approach in favor of a more elegant object-oriented approach |
---|
39 | * because it is much faster. <p> |
---|
40 | * |
---|
41 | * Typical usage (code from the main() method of this class): <p> |
---|
42 | * |
---|
43 | * <code> |
---|
44 | * ... <br> |
---|
45 | * |
---|
46 | * // Create empty instance with three attribute values <br> |
---|
47 | * Instance inst = new DenseInstance(3); <br><br> |
---|
48 | * |
---|
49 | * // Set instance's values for the attributes "length", "weight", and "position"<br> |
---|
50 | * inst.setValue(length, 5.3); <br> |
---|
51 | * inst.setValue(weight, 300); <br> |
---|
52 | * inst.setValue(position, "first"); <br><br> |
---|
53 | * |
---|
54 | * // Set instance's dataset to be the dataset "race" <br> |
---|
55 | * inst.setDataset(race); <br><br> |
---|
56 | * |
---|
57 | * // Print the instance <br> |
---|
58 | * System.out.println("The instance: " + inst); <br> |
---|
59 | * |
---|
60 | * ... <br> |
---|
61 | * </code><p> |
---|
62 | * |
---|
63 | * All methods that change an instance's attribute values must be |
---|
64 | * safe, ie. a change of an instance's attribute values must not |
---|
65 | * affect any other instances. |
---|
66 | * |
---|
67 | * @author Eibe Frank (eibe@cs.waikato.ac.nz) |
---|
68 | * @version $Revision: 5987 $ |
---|
69 | */ |
---|
70 | public interface Instance extends Copyable { |
---|
71 | |
---|
72 | /** |
---|
73 | * Returns the attribute with the given index. |
---|
74 | * |
---|
75 | * @param index the attribute's index |
---|
76 | * @return the attribute at the given position |
---|
77 | * @throws UnassignedDatasetException if instance doesn't have access to a |
---|
78 | * dataset |
---|
79 | */ |
---|
80 | public Attribute attribute(int index); |
---|
81 | |
---|
82 | /** |
---|
83 | * Returns the attribute with the given index in the sparse representation. |
---|
84 | * Same as attribute(int) for a DenseInstance. |
---|
85 | * |
---|
86 | * @param indexOfIndex the index of the attribute's index |
---|
87 | * @return the attribute at the given position |
---|
88 | * @throws UnassignedDatasetException if instance doesn't have access to a |
---|
89 | * dataset |
---|
90 | */ |
---|
91 | public Attribute attributeSparse(int indexOfIndex); |
---|
92 | |
---|
93 | /** |
---|
94 | * Returns class attribute. |
---|
95 | * |
---|
96 | * @return the class attribute |
---|
97 | * @throws UnassignedDatasetException if the class is not set or the |
---|
98 | * instance doesn't have access to a dataset |
---|
99 | */ |
---|
100 | public Attribute classAttribute(); |
---|
101 | |
---|
102 | /** |
---|
103 | * Returns the class attribute's index. |
---|
104 | * |
---|
105 | * @return the class index as an integer |
---|
106 | * @throws UnassignedDatasetException if instance doesn't have access to a dataset |
---|
107 | */ |
---|
108 | public int classIndex(); |
---|
109 | |
---|
110 | /** |
---|
111 | * Tests if an instance's class is missing. |
---|
112 | * |
---|
113 | * @return true if the instance's class is missing |
---|
114 | * @throws UnassignedClassException if the class is not set or the instance doesn't |
---|
115 | * have access to a dataset |
---|
116 | */ |
---|
117 | public boolean classIsMissing(); |
---|
118 | |
---|
119 | /** |
---|
120 | * Returns an instance's class value as a floating-point number. |
---|
121 | * |
---|
122 | * @return the corresponding value as a double (If the |
---|
123 | * corresponding attribute is nominal (or a string) then it returns the |
---|
124 | * value's index as a double). |
---|
125 | * @throws UnassignedClassException if the class is not set or the instance doesn't |
---|
126 | * have access to a dataset |
---|
127 | */ |
---|
128 | public double classValue(); |
---|
129 | |
---|
130 | /** |
---|
131 | * Returns the dataset this instance has access to. (ie. obtains |
---|
132 | * information about attribute types from) Null if the instance |
---|
133 | * doesn't have access to a dataset. |
---|
134 | * |
---|
135 | * @return the dataset the instance has accesss to |
---|
136 | */ |
---|
137 | public Instances dataset(); |
---|
138 | |
---|
139 | /** |
---|
140 | * Deletes an attribute at the given position (0 to |
---|
141 | * numAttributes() - 1). Only succeeds if the instance does not |
---|
142 | * have access to any dataset because otherwise inconsistencies |
---|
143 | * could be introduced. |
---|
144 | * |
---|
145 | * @param position the attribute's position |
---|
146 | * @throws RuntimeException if the instance has access to a |
---|
147 | * dataset |
---|
148 | */ |
---|
149 | public void deleteAttributeAt(int position); |
---|
150 | |
---|
151 | /** |
---|
152 | * Returns an enumeration of all the attributes. |
---|
153 | * |
---|
154 | * @return enumeration of all the attributes |
---|
155 | * @throws UnassignedDatasetException if the instance doesn't |
---|
156 | * have access to a dataset |
---|
157 | */ |
---|
158 | public Enumeration enumerateAttributes(); |
---|
159 | |
---|
160 | /** |
---|
161 | * Tests if the headers of two instances are equivalent. |
---|
162 | * |
---|
163 | * @param inst another instance |
---|
164 | * @return true if the header of the given instance is |
---|
165 | * equivalent to this instance's header |
---|
166 | * @throws UnassignedDatasetException if instance doesn't have access to any |
---|
167 | * dataset |
---|
168 | */ |
---|
169 | public boolean equalHeaders(Instance inst); |
---|
170 | |
---|
171 | /** |
---|
172 | * Checks if the headers of two instances are equivalent. |
---|
173 | * If not, then returns a message why they differ. |
---|
174 | * |
---|
175 | * @param dataset another instance |
---|
176 | * @return null if the header of the given instance is equivalent |
---|
177 | * to this instance's header, otherwise a message with details on |
---|
178 | * why they differ |
---|
179 | */ |
---|
180 | public String equalHeadersMsg(Instance inst); |
---|
181 | |
---|
182 | /** |
---|
183 | * Tests whether an instance has a missing value. Skips the class attribute if set. |
---|
184 | * @return true if instance has a missing value. |
---|
185 | * @throws UnassignedDatasetException if instance doesn't have access to any |
---|
186 | * dataset |
---|
187 | */ |
---|
188 | public boolean hasMissingValue(); |
---|
189 | |
---|
190 | /** |
---|
191 | * Returns the index of the attribute stored at the given position in the sparse |
---|
192 | * representation. Identify function for an instance of type DenseInstance. |
---|
193 | * |
---|
194 | * @param position the position |
---|
195 | * @return the index of the attribute stored at the given position |
---|
196 | */ |
---|
197 | public int index(int position); |
---|
198 | |
---|
199 | /** |
---|
200 | * Inserts an attribute at the given position (0 to |
---|
201 | * numAttributes()). Only succeeds if the instance does not |
---|
202 | * have access to any dataset because otherwise inconsistencies |
---|
203 | * could be introduced. |
---|
204 | * |
---|
205 | * @param position the attribute's position |
---|
206 | * @throws RuntimeException if the instance has accesss to a |
---|
207 | * dataset |
---|
208 | * @throws IllegalArgumentException if the position is out of range |
---|
209 | */ |
---|
210 | public void insertAttributeAt(int position); |
---|
211 | |
---|
212 | /** |
---|
213 | * Tests if a specific value is "missing". |
---|
214 | * |
---|
215 | * @param attIndex the attribute's index |
---|
216 | * @return true if the value is "missing" |
---|
217 | */ |
---|
218 | public boolean isMissing(int attIndex); |
---|
219 | |
---|
220 | /** |
---|
221 | * Tests if a specific value is "missing" in the sparse |
---|
222 | * representation. Samse as isMissing(int) for a DenseInstance. |
---|
223 | * |
---|
224 | * @param indexOfIndex the index of the attribute's index |
---|
225 | * @return true if the value is "missing" |
---|
226 | */ |
---|
227 | public boolean isMissingSparse(int indexOfIndex); |
---|
228 | |
---|
229 | /** |
---|
230 | * Tests if a specific value is "missing". |
---|
231 | * The given attribute has to belong to a dataset. |
---|
232 | * |
---|
233 | * @param att the attribute |
---|
234 | * @return true if the value is "missing" |
---|
235 | */ |
---|
236 | public boolean isMissing(Attribute att); |
---|
237 | |
---|
238 | /** |
---|
239 | * Merges this instance with the given instance and returns |
---|
240 | * the result. Dataset is set to null. The returned instance |
---|
241 | * is of the same type as this instance. |
---|
242 | * |
---|
243 | * @param inst the instance to be merged with this one |
---|
244 | * @return the merged instances |
---|
245 | */ |
---|
246 | public Instance mergeInstance(Instance inst); |
---|
247 | |
---|
248 | /** |
---|
249 | * Returns the number of attributes. |
---|
250 | * |
---|
251 | * @return the number of attributes as an integer |
---|
252 | */ |
---|
253 | public int numAttributes(); |
---|
254 | |
---|
255 | /** |
---|
256 | * Returns the number of class labels. |
---|
257 | * |
---|
258 | * @return the number of class labels as an integer if the |
---|
259 | * class attribute is nominal, 1 otherwise. |
---|
260 | * @throws UnassignedDatasetException if instance doesn't have access to any |
---|
261 | * dataset |
---|
262 | */ |
---|
263 | public int numClasses(); |
---|
264 | |
---|
265 | /** |
---|
266 | * Returns the number of values present in a sparse representation. |
---|
267 | * |
---|
268 | * @return the number of values |
---|
269 | */ |
---|
270 | public int numValues(); |
---|
271 | |
---|
272 | /** |
---|
273 | * Replaces all missing values in the instance with the |
---|
274 | * values contained in the given array. A deep copy of |
---|
275 | * the vector of attribute values is performed before the |
---|
276 | * values are replaced. |
---|
277 | * |
---|
278 | * @param array containing the means and modes |
---|
279 | * @throws IllegalArgumentException if numbers of attributes are unequal |
---|
280 | */ |
---|
281 | public void replaceMissingValues(double[] array); |
---|
282 | |
---|
283 | /** |
---|
284 | * Sets the class value of an instance to be "missing". A deep copy of |
---|
285 | * the vector of attribute values is performed before the |
---|
286 | * value is set to be missing. |
---|
287 | * |
---|
288 | * @throws UnassignedClassException if the class is not set |
---|
289 | * @throws UnassignedDatasetException if the instance doesn't |
---|
290 | * have access to a dataset |
---|
291 | */ |
---|
292 | public void setClassMissing(); |
---|
293 | |
---|
294 | /** |
---|
295 | * Sets the class value of an instance to the given value (internal |
---|
296 | * floating-point format). A deep copy of the vector of attribute |
---|
297 | * values is performed before the value is set. |
---|
298 | * |
---|
299 | * @param value the new attribute value (If the corresponding |
---|
300 | * attribute is nominal (or a string) then this is the new value's |
---|
301 | * index as a double). |
---|
302 | * @throws UnassignedClassException if the class is not set |
---|
303 | * @throws UnaddignedDatasetException if the instance doesn't |
---|
304 | * have access to a dataset |
---|
305 | */ |
---|
306 | public void setClassValue(double value); |
---|
307 | |
---|
308 | /** |
---|
309 | * Sets the class value of an instance to the given value. A deep |
---|
310 | * copy of the vector of attribute values is performed before the |
---|
311 | * value is set. |
---|
312 | * |
---|
313 | * @param value the new class value (If the class |
---|
314 | * is a string attribute and the value can't be found, |
---|
315 | * the value is added to the attribute). |
---|
316 | * @throws UnassignedClassException if the class is not set |
---|
317 | * @throws UnassignedDatasetException if the dataset is not set |
---|
318 | * @throws IllegalArgumentException if the attribute is not |
---|
319 | * nominal or a string, or the value couldn't be found for a nominal |
---|
320 | * attribute |
---|
321 | */ |
---|
322 | public void setClassValue(String value); |
---|
323 | |
---|
324 | /** |
---|
325 | * Sets the reference to the dataset. Does not check if the instance |
---|
326 | * is compatible with the dataset. Note: the dataset does not know |
---|
327 | * about this instance. If the structure of the dataset's header |
---|
328 | * gets changed, this instance will not be adjusted automatically. |
---|
329 | * |
---|
330 | * @param instances the reference to the dataset |
---|
331 | */ |
---|
332 | public void setDataset(Instances instances); |
---|
333 | |
---|
334 | /** |
---|
335 | * Sets a specific value to be "missing". Performs a deep copy |
---|
336 | * of the vector of attribute values before the value is set to |
---|
337 | * be missing. |
---|
338 | * |
---|
339 | * @param attIndex the attribute's index |
---|
340 | */ |
---|
341 | public void setMissing(int attIndex); |
---|
342 | |
---|
343 | /** |
---|
344 | * Sets a specific value to be "missing". Performs a deep copy |
---|
345 | * of the vector of attribute values before the value is set to |
---|
346 | * be missing. The given attribute has to belong to a dataset. |
---|
347 | * |
---|
348 | * @param att the attribute |
---|
349 | */ |
---|
350 | public void setMissing(Attribute att); |
---|
351 | |
---|
352 | /** |
---|
353 | * Sets a specific value in the instance to the given value |
---|
354 | * (internal floating-point format). Performs a deep copy |
---|
355 | * of the vector of attribute values before the value is set. |
---|
356 | * |
---|
357 | * @param attIndex the attribute's index |
---|
358 | * @param value the new attribute value (If the corresponding |
---|
359 | * attribute is nominal (or a string) then this is the new value's |
---|
360 | * index as a double). |
---|
361 | */ |
---|
362 | public void setValue(int attIndex, double value); |
---|
363 | |
---|
364 | /** |
---|
365 | * Sets a specific value in the instance to the given value |
---|
366 | * (internal floating-point format), given an index into the sparse |
---|
367 | * representation. Performs a deep copy of the vector of attribute |
---|
368 | * values before the value is set. Same as setValue(int, double) |
---|
369 | * for a DenseInstance. |
---|
370 | * |
---|
371 | * @param indexOfIndex the index of the attribute's index |
---|
372 | * @param value the new attribute value (If the corresponding |
---|
373 | * attribute is nominal (or a string) then this is the new value's |
---|
374 | * index as a double). |
---|
375 | */ |
---|
376 | public void setValueSparse(int indexOfIndex, double value); |
---|
377 | |
---|
378 | /** |
---|
379 | * Sets a value of a nominal or string attribute to the given |
---|
380 | * value. Performs a deep copy of the vector of attribute values |
---|
381 | * before the value is set. |
---|
382 | * |
---|
383 | * @param attIndex the attribute's index |
---|
384 | * @param value the new attribute value (If the attribute |
---|
385 | * is a string attribute and the value can't be found, |
---|
386 | * the value is added to the attribute). |
---|
387 | * @throws UnassignedDatasetException if the dataset is not set |
---|
388 | * @throws IllegalArgumentException if the selected |
---|
389 | * attribute is not nominal or a string, or the supplied value couldn't |
---|
390 | * be found for a nominal attribute |
---|
391 | */ |
---|
392 | public void setValue(int attIndex, String value); |
---|
393 | |
---|
394 | /** |
---|
395 | * Sets a specific value in the instance to the given value |
---|
396 | * (internal floating-point format). Performs a deep copy of the |
---|
397 | * vector of attribute values before the value is set, so if you are |
---|
398 | * planning on calling setValue many times it may be faster to |
---|
399 | * create a new instance using toDoubleArray. The given attribute |
---|
400 | * has to belong to a dataset. |
---|
401 | * |
---|
402 | * @param att the attribute |
---|
403 | * @param value the new attribute value (If the corresponding |
---|
404 | * attribute is nominal (or a string) then this is the new value's |
---|
405 | * index as a double). |
---|
406 | */ |
---|
407 | public void setValue(Attribute att, double value); |
---|
408 | |
---|
409 | /** |
---|
410 | * Sets a value of an nominal or string attribute to the given |
---|
411 | * value. Performs a deep copy of the vector of attribute values |
---|
412 | * before the value is set, so if you are planning on calling setValue many |
---|
413 | * times it may be faster to create a new instance using toDoubleArray. |
---|
414 | * The given attribute has to belong to a dataset. |
---|
415 | * |
---|
416 | * @param att the attribute |
---|
417 | * @param value the new attribute value (If the attribute |
---|
418 | * is a string attribute and the value can't be found, |
---|
419 | * the value is added to the attribute). |
---|
420 | * @throws IllegalArgumentException if the the attribute is not |
---|
421 | * nominal or a string, or the value couldn't be found for a nominal |
---|
422 | * attribute |
---|
423 | */ |
---|
424 | public void setValue(Attribute att, String value); |
---|
425 | |
---|
426 | /** |
---|
427 | * Sets the weight of an instance. |
---|
428 | * |
---|
429 | * @param weight the weight |
---|
430 | */ |
---|
431 | public void setWeight(double weight); |
---|
432 | |
---|
433 | /** |
---|
434 | * Returns the relational value of a relational attribute. |
---|
435 | * |
---|
436 | * @param attIndex the attribute's index |
---|
437 | * @return the corresponding relation as an Instances object |
---|
438 | * @throws IllegalArgumentException if the attribute is not a |
---|
439 | * relation-valued attribute |
---|
440 | * @throws UnassignedDatasetException if the instance doesn't belong |
---|
441 | * to a dataset. |
---|
442 | */ |
---|
443 | public Instances relationalValue(int attIndex); |
---|
444 | |
---|
445 | |
---|
446 | /** |
---|
447 | * Returns the relational value of a relational attribute. |
---|
448 | * |
---|
449 | * @param att the attribute |
---|
450 | * @return the corresponding relation as an Instances object |
---|
451 | * @throws IllegalArgumentException if the attribute is not a |
---|
452 | * relation-valued attribute |
---|
453 | * @throws UnassignedDatasetException if the instance doesn't belong |
---|
454 | * to a dataset. |
---|
455 | */ |
---|
456 | public Instances relationalValue(Attribute att); |
---|
457 | |
---|
458 | /** |
---|
459 | * Returns the value of a nominal, string, date, or relational attribute |
---|
460 | * for the instance as a string. |
---|
461 | * |
---|
462 | * @param attIndex the attribute's index |
---|
463 | * @return the value as a string |
---|
464 | * @throws IllegalArgumentException if the attribute is not a nominal, |
---|
465 | * string, date, or relation-valued attribute. |
---|
466 | * @throws UnassignedDatasetException if the instance doesn't belong |
---|
467 | * to a dataset. |
---|
468 | */ |
---|
469 | public String stringValue(int attIndex); |
---|
470 | |
---|
471 | /** |
---|
472 | * Returns the value of a nominal, string, date, or relational attribute |
---|
473 | * for the instance as a string. |
---|
474 | * |
---|
475 | * @param att the attribute |
---|
476 | * @return the value as a string |
---|
477 | * @throws IllegalArgumentException if the attribute is not a nominal, |
---|
478 | * string, date, or relation-valued attribute. |
---|
479 | * @throws UnassignedDatasetException if the instance doesn't belong |
---|
480 | * to a dataset. |
---|
481 | */ |
---|
482 | public String stringValue(Attribute att); |
---|
483 | |
---|
484 | /** |
---|
485 | * Returns the values of each attribute as an array of doubles. |
---|
486 | * |
---|
487 | * @return an array containing all the instance attribute values |
---|
488 | */ |
---|
489 | public double[] toDoubleArray(); |
---|
490 | |
---|
491 | /** |
---|
492 | * Returns the description of one instance (without weight |
---|
493 | * appended). If the instance |
---|
494 | * doesn't have access to a dataset, it returns the internal |
---|
495 | * floating-point values. Quotes string |
---|
496 | * values that contain whitespace characters. |
---|
497 | * |
---|
498 | * This method is used by getRandomNumberGenerator() in |
---|
499 | * Instances.java in order to maintain backwards compatibility |
---|
500 | * with weka 3.4. |
---|
501 | * |
---|
502 | * @return the instance's description as a string |
---|
503 | */ |
---|
504 | public String toStringNoWeight(); |
---|
505 | |
---|
506 | /** |
---|
507 | * Returns the description of one value of the instance as a |
---|
508 | * string. If the instance doesn't have access to a dataset, it |
---|
509 | * returns the internal floating-point value. Quotes string |
---|
510 | * values that contain whitespace characters, or if they |
---|
511 | * are a question mark. |
---|
512 | * |
---|
513 | * @param attIndex the attribute's index |
---|
514 | * @return the value's description as a string |
---|
515 | */ |
---|
516 | public String toString(int attIndex); |
---|
517 | |
---|
518 | /** |
---|
519 | * Returns the description of one value of the instance as a |
---|
520 | * string. If the instance doesn't have access to a dataset it |
---|
521 | * returns the internal floating-point value. Quotes string |
---|
522 | * values that contain whitespace characters, or if they |
---|
523 | * are a question mark. |
---|
524 | * The given attribute has to belong to a dataset. |
---|
525 | * |
---|
526 | * @param att the attribute |
---|
527 | * @return the value's description as a string |
---|
528 | */ |
---|
529 | public String toString(Attribute att); |
---|
530 | |
---|
531 | /** |
---|
532 | * Returns an instance's attribute value in internal format. |
---|
533 | * |
---|
534 | * @param attIndex the attribute's index |
---|
535 | * @return the specified value as a double (If the corresponding |
---|
536 | * attribute is nominal (or a string) then it returns the value's index as a |
---|
537 | * double). |
---|
538 | */ |
---|
539 | public double value(int attIndex); |
---|
540 | |
---|
541 | /** |
---|
542 | * Returns an instance's attribute value in internal format, given |
---|
543 | * an index in the sparse representation. Same as value(int) for |
---|
544 | * a DenseInstance. |
---|
545 | * |
---|
546 | * @param indexOfIndex the index of the attribute's index |
---|
547 | * @return the specified value as a double (If the corresponding |
---|
548 | * attribute is nominal (or a string) then it returns the value's index as a |
---|
549 | * double). |
---|
550 | */ |
---|
551 | public double valueSparse(int indexOfIndex); |
---|
552 | |
---|
553 | /** |
---|
554 | * Returns an instance's attribute value in internal format. |
---|
555 | * The given attribute has to belong to a dataset. |
---|
556 | * |
---|
557 | * @param att the attribute |
---|
558 | * @return the specified value as a double (If the corresponding |
---|
559 | * attribute is nominal (or a string) then it returns the value's index as a |
---|
560 | * double). |
---|
561 | */ |
---|
562 | public double value(Attribute att); |
---|
563 | |
---|
564 | /** |
---|
565 | * Returns the instance's weight. |
---|
566 | * |
---|
567 | * @return the instance's weight as a double |
---|
568 | */ |
---|
569 | public double weight(); |
---|
570 | } |
---|