source: src/main/java/weka/core/AbstractInstance.java @ 8

Last change on this file since 8 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 23.3 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    DenseInstance.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.core;
24
25import java.io.Serializable;
26import java.util.Enumeration;
27import java.util.ArrayList;
28
29/**
30 * Abstract class providing common functionality for the original
31 * instance implementations.
32 *
33 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
34 * @version $Revision: 5987 $
35 */
36public abstract class AbstractInstance
37  implements Instance, Serializable, RevisionHandler {
38 
39  /** for serialization */
40  static final long serialVersionUID = 1482635194499365155L;
41
42  /**
43   * The dataset the instance has access to.  Null if the instance
44   * doesn't have access to any dataset.  Only if an instance has
45   * access to a dataset, it knows about the actual attribute types. 
46   */
47  protected /*@spec_public@*/ Instances m_Dataset;
48
49  /** The instance's attribute values. */
50  protected /*@spec_public non_null@*/ double[] m_AttValues;
51
52  /** The instance's weight. */
53  protected double m_Weight;
54
55  /**
56   * Returns the attribute with the given index.
57   *
58   * @param index the attribute's index
59   * @return the attribute at the given position
60   * @throws UnassignedDatasetException if instance doesn't have access to a
61   * dataset
62   */ 
63  //@ requires m_Dataset != null;
64  public /*@pure@*/ Attribute attribute(int index) {
65   
66    if (m_Dataset == null) {
67      throw new UnassignedDatasetException("DenseInstance doesn't have access to a dataset!");
68    }
69    return m_Dataset.attribute(index);
70  }
71
72  /**
73   * Returns the attribute with the given index in the sparse representation.
74   *
75   * @param indexOfIndex the index of the attribute's index
76   * @return the attribute at the given position
77   * @throws UnassignedDatasetException if instance doesn't have access to a
78   * dataset
79   */ 
80  //@ requires m_Dataset != null;
81  public /*@pure@*/ Attribute attributeSparse(int indexOfIndex) {
82   
83    if (m_Dataset == null) {
84      throw new UnassignedDatasetException("DenseInstance doesn't have access to a dataset!");
85    }
86    return m_Dataset.attribute(index(indexOfIndex));
87  }
88
89  /**
90   * Returns class attribute.
91   *
92   * @return the class attribute
93   * @throws UnassignedDatasetException if the class is not set or the
94   * instance doesn't have access to a dataset
95   */
96  //@ requires m_Dataset != null;
97  public /*@pure@*/ Attribute classAttribute() {
98
99    if (m_Dataset == null) {
100      throw new UnassignedDatasetException("DenseInstance doesn't have access to a dataset!");
101    }
102    return m_Dataset.classAttribute();
103  }
104
105  /**
106   * Returns the class attribute's index.
107   *
108   * @return the class index as an integer
109   * @throws UnassignedDatasetException if instance doesn't have access to a dataset
110   */
111  //@ requires m_Dataset != null;
112  //@ ensures  \result == m_Dataset.classIndex();
113  public /*@pure@*/ int classIndex() {
114   
115    if (m_Dataset == null) {
116      throw new UnassignedDatasetException("DenseInstance doesn't have access to a dataset!");
117    }
118    return m_Dataset.classIndex();
119  }
120
121  /**
122   * Tests if an instance's class is missing.
123   *
124   * @return true if the instance's class is missing
125   * @throws UnassignedClassException if the class is not set or the instance doesn't
126   * have access to a dataset
127   */
128  //@ requires classIndex() >= 0;
129  public /*@pure@*/ boolean classIsMissing() {
130
131    if (classIndex() < 0) {
132      throw new UnassignedClassException("Class is not set!");
133    }
134    return isMissing(classIndex());
135  }
136
137  /**
138   * Returns an instance's class value in internal format. (ie. as a
139   * floating-point number)
140   *
141   * @return the corresponding value as a double (If the
142   * corresponding attribute is nominal (or a string) then it returns the
143   * value's index as a double).
144   * @throws UnassignedClassException if the class is not set or the instance doesn't
145   * have access to a dataset
146   */
147  //@ requires classIndex() >= 0;
148  public /*@pure@*/ double classValue() {
149   
150    if (classIndex() < 0) {
151      throw new UnassignedClassException("Class is not set!");
152    }
153    return value(classIndex());
154  }
155
156  /**
157   * Returns the dataset this instance has access to. (ie. obtains
158   * information about attribute types from) Null if the instance
159   * doesn't have access to a dataset.
160   *
161   * @return the dataset the instance has accesss to
162   */
163  //@ ensures \result == m_Dataset;
164  public /*@pure@*/ Instances dataset() {
165
166    return m_Dataset;
167  }
168
169  /**
170   * Deletes an attribute at the given position (0 to
171   * numAttributes() - 1). Only succeeds if the instance does not
172   * have access to any dataset because otherwise inconsistencies
173   * could be introduced.
174   *
175   * @param position the attribute's position
176   * @throws RuntimeException if the instance has access to a
177   * dataset
178   */
179  //@ requires m_Dataset != null;
180  public void deleteAttributeAt(int position) {
181
182    if (m_Dataset != null) {
183      throw new RuntimeException("DenseInstance has access to a dataset!");
184    }
185    forceDeleteAttributeAt(position);
186  }
187
188  /**
189   * Returns an enumeration of all the attributes.
190   *
191   * @return enumeration of all the attributes
192   * @throws UnassignedDatasetException if the instance doesn't
193   * have access to a dataset
194   */
195  //@ requires m_Dataset != null;
196  public /*@pure@*/ Enumeration enumerateAttributes() {
197
198    if (m_Dataset == null) {
199      throw new UnassignedDatasetException("DenseInstance doesn't have access to a dataset!");
200    }
201    return m_Dataset.enumerateAttributes();
202  }
203
204  /**
205   * Tests if the headers of two instances are equivalent.
206   *
207   * @param inst another instance
208   * @return true if the header of the given instance is
209   * equivalent to this instance's header
210   * @throws UnassignedDatasetException if instance doesn't have access to any
211   * dataset
212   */
213  //@ requires m_Dataset != null;
214  public /*@pure@*/ boolean equalHeaders(Instance inst) {
215
216    if (m_Dataset == null) {
217      throw new UnassignedDatasetException("DenseInstance doesn't have access to a dataset!");
218    }
219    return m_Dataset.equalHeaders(inst.dataset());
220  }
221
222  /**
223   * Checks if the headers of two instances are equivalent.
224   * If not, then returns a message why they differ.
225   *
226   * @param dataset     another instance
227   * @return            null if the header of the given instance is equivalent
228   *                    to this instance's header, otherwise a message with details on
229   *                    why they differ
230   */
231  public String equalHeadersMsg(Instance inst) {
232    if (m_Dataset == null)
233      throw new UnassignedDatasetException("DenseInstance doesn't have access to a dataset!");
234
235    return m_Dataset.equalHeadersMsg(inst.dataset());
236  }
237
238  /**
239   * Tests whether an instance has a missing value. Skips the class attribute if set.
240   * @return true if instance has a missing value.
241   * @throws UnassignedDatasetException if instance doesn't have access to any
242   * dataset
243   */
244  //@ requires m_Dataset != null;
245  public /*@pure@*/ boolean hasMissingValue() {
246   
247    if (m_Dataset == null) {
248      throw new UnassignedDatasetException("DenseInstance doesn't have access to a dataset!");
249    }
250    for (int i = 0; i < numValues(); i++) {
251      if (index(i) != classIndex()) {
252        if (isMissingSparse(i)) {
253          return true;
254        }
255      }
256    }
257    return false;
258  }
259
260  /**
261   * Inserts an attribute at the given position (0 to
262   * numAttributes()). Only succeeds if the instance does not
263   * have access to any dataset because otherwise inconsistencies
264   * could be introduced.
265   *
266   * @param position the attribute's position
267   * @throws RuntimeException if the instance has accesss to a
268   * dataset
269   * @throws IllegalArgumentException if the position is out of range
270   */
271  //@ requires m_Dataset == null;
272  //@ requires 0 <= position && position <= numAttributes();
273  public void insertAttributeAt(int position) {
274
275    if (m_Dataset != null) {
276      throw new RuntimeException("DenseInstance has accesss to a dataset!");
277    }
278    if ((position < 0) ||
279        (position > numAttributes())) {
280      throw new IllegalArgumentException("Can't insert attribute: index out "+
281                                         "of range");
282    }
283    forceInsertAttributeAt(position);
284  }
285
286  /**
287   * Tests if a specific value is "missing".
288   *
289   * @param attIndex the attribute's index
290   * @return true if the value is "missing"
291   */
292  public /*@pure@*/ boolean isMissing(int attIndex) {
293
294    if (Utils.isMissingValue(value(attIndex))) {
295      return true;
296    }
297    return false;
298  }
299
300  /**
301   * Tests if a specific value is "missing", given
302   * an index in the sparse representation.
303   *
304   * @param indexOfIndex the index of the attribute's index
305   * @return true if the value is "missing"
306   */
307  public /*@pure@*/ boolean isMissingSparse(int indexOfIndex) {
308
309    if (Utils.isMissingValue(valueSparse(indexOfIndex))) {
310      return true;
311    }
312    return false;
313  }
314
315  /**
316   * Tests if a specific value is "missing".
317   * The given attribute has to belong to a dataset.
318   *
319   * @param att the attribute
320   * @return true if the value is "missing"
321   */
322  public /*@pure@*/ boolean isMissing(Attribute att) {
323
324    return isMissing(att.index());
325  }
326
327  /**
328   * Returns the number of class labels.
329   *
330   * @return the number of class labels as an integer if the
331   * class attribute is nominal, 1 otherwise.
332   * @throws UnassignedDatasetException if instance doesn't have access to any
333   * dataset
334   */
335  //@ requires m_Dataset != null;
336  public /*@pure@*/ int numClasses() {
337   
338    if (m_Dataset == null) {
339      throw new UnassignedDatasetException("DenseInstance doesn't have access to a dataset!");
340    }
341    return m_Dataset.numClasses();
342  }
343
344  /**
345   * Sets the class value of an instance to be "missing". A deep copy of
346   * the vector of attribute values is performed before the
347   * value is set to be missing.
348   *
349   * @throws UnassignedClassException if the class is not set
350   * @throws UnassignedDatasetException if the instance doesn't
351   * have access to a dataset
352   */
353  //@ requires classIndex() >= 0;
354  public void setClassMissing() {
355
356    if (classIndex() < 0) {
357      throw new UnassignedClassException("Class is not set!");
358    }
359    setMissing(classIndex());
360  }
361
362  /**
363   * Sets the class value of an instance to the given value (internal
364   * floating-point format).  A deep copy of the vector of attribute
365   * values is performed before the value is set.
366   *
367   * @param value the new attribute value (If the corresponding
368   * attribute is nominal (or a string) then this is the new value's
369   * index as a double). 
370   * @throws UnassignedClassException if the class is not set
371   * @throws UnaddignedDatasetException if the instance doesn't
372   * have access to a dataset
373   */
374  //@ requires classIndex() >= 0;
375  public void setClassValue(double value) {
376
377    if (classIndex() < 0) {
378      throw new UnassignedClassException("Class is not set!");
379    }
380    setValue(classIndex(), value);
381  }
382
383  /**
384   * Sets the class value of an instance to the given value. A deep
385   * copy of the vector of attribute values is performed before the
386   * value is set.
387   *
388   * @param value the new class value (If the class
389   * is a string attribute and the value can't be found,
390   * the value is added to the attribute).
391   * @throws UnassignedClassException if the class is not set
392   * @throws UnassignedDatasetException if the dataset is not set
393   * @throws IllegalArgumentException if the attribute is not
394   * nominal or a string, or the value couldn't be found for a nominal
395   * attribute
396   */
397  //@ requires classIndex() >= 0;
398  public final void setClassValue(String value) {
399
400    if (classIndex() < 0) {
401      throw new UnassignedClassException("Class is not set!");
402    }
403    setValue(classIndex(), value);
404  }
405
406  /**
407   * Sets the reference to the dataset. Does not check if the instance
408   * is compatible with the dataset. Note: the dataset does not know
409   * about this instance. If the structure of the dataset's header
410   * gets changed, this instance will not be adjusted automatically.
411   *
412   * @param instances the reference to the dataset
413   */
414  public final void setDataset(Instances instances) {
415   
416    m_Dataset = instances;
417  }
418
419  /**
420   * Sets a specific value to be "missing". Performs a deep copy
421   * of the vector of attribute values before the value is set to
422   * be missing.
423   *
424   * @param attIndex the attribute's index
425   */
426  public final void setMissing(int attIndex) {
427
428    setValue(attIndex, Utils.missingValue());
429  }
430
431  /**
432   * Sets a specific value to be "missing". Performs a deep copy
433   * of the vector of attribute values before the value is set to
434   * be missing. The given attribute has to belong to a dataset.
435   *
436   * @param att the attribute
437   */
438  public final void setMissing(Attribute att) {
439
440    setMissing(att.index());
441  }
442
443  /**
444   * Sets a value of a nominal or string attribute to the given
445   * value. Performs a deep copy of the vector of attribute values
446   * before the value is set.
447   *
448   * @param attIndex the attribute's index
449   * @param value the new attribute value (If the attribute
450   * is a string attribute and the value can't be found,
451   * the value is added to the attribute).
452   * @throws UnassignedDatasetException if the dataset is not set
453   * @throws IllegalArgumentException if the selected
454   * attribute is not nominal or a string, or the supplied value couldn't
455   * be found for a nominal attribute
456   */
457  //@ requires m_Dataset != null;
458  public final void setValue(int attIndex, String value) {
459   
460    int valIndex;
461
462    if (m_Dataset == null) {
463      throw new UnassignedDatasetException("DenseInstance doesn't have access to a dataset!");
464    }
465    if (!attribute(attIndex).isNominal() &&
466        !attribute(attIndex).isString()) {
467      throw new IllegalArgumentException("Attribute neither nominal nor string!");
468    }
469    valIndex = attribute(attIndex).indexOfValue(value);
470    if (valIndex == -1) {
471      if (attribute(attIndex).isNominal()) {
472        throw new IllegalArgumentException("Value not defined for given nominal attribute!");
473      } else {
474        attribute(attIndex).forceAddValue(value);
475        valIndex = attribute(attIndex).indexOfValue(value);
476      }
477    }
478    setValue(attIndex, (double)valIndex); 
479  }
480
481  /**
482   * Sets a specific value in the instance to the given value
483   * (internal floating-point format). Performs a deep copy of the
484   * vector of attribute values before the value is set, so if you are
485   * planning on calling setValue many times it may be faster to
486   * create a new instance using toDoubleArray.  The given attribute
487   * has to belong to a dataset.
488   *
489   * @param att the attribute
490   * @param value the new attribute value (If the corresponding
491   * attribute is nominal (or a string) then this is the new value's
492   * index as a double). 
493   */
494  public final void setValue(Attribute att, double value) {
495
496    setValue(att.index(), value);
497  }
498
499  /**
500   * Sets a value of an nominal or string attribute to the given
501   * value. Performs a deep copy of the vector of attribute values
502   * before the value is set, so if you are planning on calling setValue many
503   * times it may be faster to create a new instance using toDoubleArray.
504   * The given attribute has to belong to a dataset.
505   *
506   * @param att the attribute
507   * @param value the new attribute value (If the attribute
508   * is a string attribute and the value can't be found,
509   * the value is added to the attribute).
510   * @throws IllegalArgumentException if the the attribute is not
511   * nominal or a string, or the value couldn't be found for a nominal
512   * attribute
513   */
514  public final void setValue(Attribute att, String value) {
515
516    if (!att.isNominal() &&
517        !att.isString()) {
518      throw new IllegalArgumentException("Attribute neither nominal nor string!");
519    }
520    int valIndex = att.indexOfValue(value);
521    if (valIndex == -1) {
522      if (att.isNominal()) {
523        throw new IllegalArgumentException("Value not defined for given nominal attribute!");
524      } else {
525        att.forceAddValue(value);
526        valIndex = att.indexOfValue(value);
527      }
528    }
529    setValue(att.index(), (double)valIndex);
530  }
531 
532  /**
533   * Sets the weight of an instance.
534   *
535   * @param weight the weight
536   */
537  public final void setWeight(double weight) {
538
539    m_Weight = weight;
540  }
541
542  /**
543   * Returns the relational value of a relational attribute.
544   *
545   * @param attIndex the attribute's index
546   * @return the corresponding relation as an Instances object
547   * @throws IllegalArgumentException if the attribute is not a
548   * relation-valued attribute
549   * @throws UnassignedDatasetException if the instance doesn't belong
550   * to a dataset.
551   */
552  //@ requires m_Dataset != null;
553  public final /*@pure@*/ Instances relationalValue(int attIndex) {
554
555    if (m_Dataset == null) {
556      throw new UnassignedDatasetException("DenseInstance doesn't have access to a dataset!");
557    } 
558    return relationalValue(m_Dataset.attribute(attIndex));
559  }
560
561
562  /**
563   * Returns the relational value of a relational attribute.
564   *
565   * @param att the attribute
566   * @return the corresponding relation as an Instances object
567   * @throws IllegalArgumentException if the attribute is not a
568   * relation-valued attribute
569   * @throws UnassignedDatasetException if the instance doesn't belong
570   * to a dataset.
571   */
572  public final /*@pure@*/ Instances relationalValue(Attribute att) {
573
574    int attIndex = att.index();
575    if (att.isRelationValued()) {
576      return att.relation((int) value(attIndex));
577    } else {
578      throw new IllegalArgumentException("Attribute isn't relation-valued!");
579    }
580  }
581
582  /**
583   * Returns the value of a nominal, string, date, or relational attribute
584   * for the instance as a string.
585   *
586   * @param attIndex the attribute's index
587   * @return the value as a string
588   * @throws IllegalArgumentException if the attribute is not a nominal,
589   * string, date, or relation-valued attribute.
590   * @throws UnassignedDatasetException if the instance doesn't belong
591   * to a dataset.
592   */
593  //@ requires m_Dataset != null;
594  public final /*@pure@*/ String stringValue(int attIndex) {
595
596    if (m_Dataset == null) {
597      throw new UnassignedDatasetException("DenseInstance doesn't have access to a dataset!");
598    } 
599    return stringValue(m_Dataset.attribute(attIndex));
600  }
601
602
603  /**
604   * Returns the value of a nominal, string, date, or relational attribute
605   * for the instance as a string.
606   *
607   * @param att the attribute
608   * @return the value as a string
609   * @throws IllegalArgumentException if the attribute is not a nominal,
610   * string, date, or relation-valued attribute.
611   * @throws UnassignedDatasetException if the instance doesn't belong
612   * to a dataset.
613   */
614  public final /*@pure@*/ String stringValue(Attribute att) {
615
616    int attIndex = att.index();
617    switch (att.type()) {
618    case Attribute.NOMINAL:
619    case Attribute.STRING:
620      return att.value((int) value(attIndex));
621    case Attribute.DATE:
622      return att.formatDate(value(attIndex));
623    case Attribute.RELATIONAL:
624      return att.relation((int) value(attIndex)).stringWithoutHeader();
625    default:
626      throw new IllegalArgumentException("Attribute isn't nominal, string or date!");
627    }
628  }
629
630  /**
631   * Returns the description of one instance. If the instance
632   * doesn't have access to a dataset, it returns the internal
633   * floating-point values. Quotes string
634   * values that contain whitespace characters.
635   *
636   * @return the instance's description as a string
637   */
638  public String toString() {
639
640    StringBuffer text = new StringBuffer(toStringNoWeight());
641
642    if (m_Weight != 1.0) {
643      text.append(",{" + Utils.doubleToString(m_Weight, 6) + "}");
644    }
645
646    return text.toString();
647  }
648
649  /**
650   * Returns the description of one value of the instance as a
651   * string. If the instance doesn't have access to a dataset, it
652   * returns the internal floating-point value. Quotes string
653   * values that contain whitespace characters, or if they
654   * are a question mark.
655   *
656   * @param attIndex the attribute's index
657   * @return the value's description as a string
658   */
659  public final /*@pure@*/ String toString(int attIndex) {
660
661   StringBuffer text = new StringBuffer();
662   
663   if (isMissing(attIndex)) {
664     text.append("?");
665   } else {
666     if (m_Dataset == null) {
667       text.append(Utils.doubleToString(value(attIndex),6));
668     } else {
669       switch (m_Dataset.attribute(attIndex).type()) {
670       case Attribute.NOMINAL:
671       case Attribute.STRING:
672       case Attribute.DATE:
673       case Attribute.RELATIONAL:
674         text.append(Utils.quote(stringValue(attIndex)));
675         break;
676       case Attribute.NUMERIC:
677         text.append(Utils.doubleToString(value(attIndex),6));
678         break;
679       default:
680         throw new IllegalStateException("Unknown attribute type");
681       }
682     }
683   }
684   return text.toString();
685  }
686
687  /**
688   * Returns the description of one value of the instance as a
689   * string. If the instance doesn't have access to a dataset it
690   * returns the internal floating-point value. Quotes string
691   * values that contain whitespace characters, or if they
692   * are a question mark.
693   * The given attribute has to belong to a dataset.
694   *
695   * @param att the attribute
696   * @return the value's description as a string
697   */
698  public final String toString(Attribute att) {
699   
700   return toString(att.index());
701  }
702
703  /**
704   * Returns an instance's attribute value in internal format.
705   * The given attribute has to belong to a dataset.
706   *
707   * @param att the attribute
708   * @return the specified value as a double (If the corresponding
709   * attribute is nominal (or a string) then it returns the value's index as a
710   * double).
711   */
712  public /*@pure@*/ double value(Attribute att) {
713
714    return value(att.index());
715  }
716
717  /**
718   * Returns an instance's attribute value in internal format, given
719   * an index in the sparse representation.
720   *
721   * @param indexOfIndex the index of the attribute's index
722   * @return the specified value as a double (If the corresponding
723   * attribute is nominal (or a string) then it returns the value's index as a
724   * double).
725   */
726  public /*@pure@*/ double valueSparse(int indexOfIndex) {
727
728    return m_AttValues[indexOfIndex];
729  } 
730
731  /**
732   * Returns the instance's weight.
733   *
734   * @return the instance's weight as a double
735   */
736  public final /*@pure@*/ double weight() {
737
738    return m_Weight;
739  }
740 
741  /**
742   * Returns the revision string.
743   *
744   * @return            the revision
745   */
746  public String getRevision() {
747    return RevisionUtils.extract("$Revision: 5987 $");
748  }
749
750  /**
751   * Deletes an attribute at the given position (0 to
752   * numAttributes() - 1).
753   *
754   * @param position the attribute's position
755   */
756  protected abstract void forceDeleteAttributeAt(int position);
757
758  /**
759   * Inserts an attribute at the given position
760   * (0 to numAttributes()) and sets its value to be missing.
761   *
762   * @param position the attribute's position
763   */
764  protected abstract void forceInsertAttributeAt(int position);
765}
Note: See TracBrowser for help on using the repository browser.