source: src/main/java/weka/core/BinarySparseInstance.java @ 27

Last change on this file since 27 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 19.6 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    BinarySparseInstance.java
19 *    Copyright (C) 2002 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.core;
24
25import java.util.Enumeration;
26import java.util.ArrayList;
27
28/**
29 * Class for storing a binary-data-only instance as a sparse vector. A
30 * sparse instance only requires storage for those attribute values
31 * that are non-zero.  Since the objective is to reduce storage
32 * requirements for datasets with large numbers of default values,
33 * this also includes nominal attributes -- the first nominal value
34 * (i.e. that which has index 0) will not require explicit storage, so
35 * rearrange your nominal attribute value orderings if
36 * necessary. Missing values are not supported, and will be treated as
37 * 1 (true).
38 *
39 * @version $Revision: 5987 $
40 */
41public class BinarySparseInstance
42  extends SparseInstance {
43
44  /** for serialization */
45  private static final long serialVersionUID = -5297388762342528737L;
46
47  /**
48   * Constructor that generates a sparse instance from the given
49   * instance. Reference to the dataset is set to null.
50   * (ie. the instance doesn't have access to information about the
51   * attribute types)
52   *
53   * @param instance the instance from which the attribute values
54   * and the weight are to be copied
55   */
56  public BinarySparseInstance(Instance instance) {
57   
58    m_Weight = instance.weight();
59    m_Dataset = null;
60    m_NumAttributes = instance.numAttributes();
61    if (instance instanceof SparseInstance) {
62      m_AttValues = null;
63      m_Indices = ((SparseInstance)instance).m_Indices;
64    } else {
65      int[] tempIndices = new int[instance.numAttributes()];
66      int vals = 0;
67      for (int i = 0; i < instance.numAttributes(); i++) {
68        if (instance.value(i) != 0) {
69          tempIndices[vals] = i;
70          vals++;
71        }
72      }
73      m_AttValues = null;
74      m_Indices = new int[vals];
75      System.arraycopy(tempIndices, 0, m_Indices, 0, vals);
76    }
77  }
78 
79  /**
80   * Constructor that copies the info from the given instance.
81   * Reference to the dataset is set to null.
82   * (ie. the instance doesn't have access to information about the
83   * attribute types)
84   *
85   * @param instance the instance from which the attribute
86   * info is to be copied
87   */
88  public BinarySparseInstance(SparseInstance instance) {
89   
90    m_AttValues = null;
91    m_Indices = instance.m_Indices;
92    m_Weight = instance.m_Weight;
93    m_NumAttributes = instance.m_NumAttributes;
94    m_Dataset = null;
95  }
96
97  /**
98   * Constructor that generates a sparse instance from the given
99   * parameters. Reference to the dataset is set to null.
100   * (ie. the instance doesn't have access to information about the
101   * attribute types)
102   *
103   * @param weight the instance's weight
104   * @param attValues a vector of attribute values
105   */
106  public BinarySparseInstance(double weight, double[] attValues) {
107   
108    m_Weight = weight;
109    m_Dataset = null;
110    m_NumAttributes = attValues.length;
111    int[] tempIndices = new int[m_NumAttributes];
112    int vals = 0;
113    for (int i = 0; i < m_NumAttributes; i++) {
114      if (attValues[i] != 0) {
115        tempIndices[vals] = i;
116        vals++;
117      }
118    }
119    m_AttValues = null;
120    m_Indices = new int[vals];
121    System.arraycopy(tempIndices, 0, m_Indices, 0, vals);
122  }
123 
124  /**
125   * Constructor that inititalizes instance variable with given
126   * values. Reference to the dataset is set to null. (ie. the instance
127   * doesn't have access to information about the attribute types)
128   *
129   * @param weight the instance's weight
130   * @param indices the indices of the given values in the full vector
131   * @param maxNumValues the maximium number of values that can be stored
132   */
133  public BinarySparseInstance(double weight,
134                              int[] indices, int maxNumValues) {
135   
136    m_AttValues = null;
137    m_Indices = indices;
138    m_Weight = weight;
139    m_NumAttributes = maxNumValues;
140    m_Dataset = null;
141  }
142
143  /**
144   * Constructor of an instance that sets weight to one, all values to
145   * 1, and the reference to the dataset to null. (ie. the instance
146   * doesn't have access to information about the attribute types)
147   *
148   * @param numAttributes the size of the instance
149   */
150  public BinarySparseInstance(int numAttributes) {
151   
152    m_AttValues = null;
153    m_NumAttributes = numAttributes;
154    m_Indices = new int[numAttributes];
155    for (int i = 0; i < m_Indices.length; i++) {
156      m_Indices[i] = i;
157    }
158    m_Weight = 1;
159    m_Dataset = null;
160  }
161
162  /**
163   * Produces a shallow copy of this instance. The copy doesn't have
164   * access to a dataset.
165   *
166   * @return the shallow copy
167   */
168  public Object copy() {
169
170    return new BinarySparseInstance(this);
171  }
172
173  /**
174   * Merges this instance with the given instance and returns
175   * the result. Dataset is set to null.
176   *
177   * @param inst the instance to be merged with this one
178   * @return the merged instances
179   */
180  public Instance mergeInstance(Instance inst) {
181
182    int [] indices = new int [numValues() + inst.numValues()];
183
184    int m = 0;
185    for (int j = 0; j < numValues(); j++) {
186      indices[m++] = index(j);
187    }
188    for (int j = 0; j < inst.numValues(); j++) {
189      if (inst.valueSparse(j) != 0) {
190        indices[m++] = numAttributes() + inst.index(j);
191      }
192    }
193
194    if (m != indices.length) {
195      // Need to truncate
196      int [] newInd = new int [m];
197      System.arraycopy(indices, 0, newInd, 0, m);
198      indices = newInd;
199    }
200    return new BinarySparseInstance(1.0, indices, numAttributes() +
201                                    inst.numAttributes());
202  }
203
204  /**
205   * Does nothing, since we don't support missing values.
206   *
207   * @param array containing the means and modes
208   */
209  public void replaceMissingValues(double[] array) {
210         
211    // Does nothing, since we don't store missing values.
212  }
213
214  /**
215   * Sets a specific value in the instance to the given value
216   * (internal floating-point format). Performs a deep copy
217   * of the vector of attribute values before the value is set.
218   *
219   * @param attIndex the attribute's index
220   * @param value the new attribute value (If the corresponding
221   * attribute is nominal (or a string) then this is the new value's
222   * index as a double). 
223   */
224  public void setValue(int attIndex, double value) {
225
226    int index = locateIndex(attIndex);
227   
228    if ((index >= 0) && (m_Indices[index] == attIndex)) {
229      if (value == 0) {
230        int[] tempIndices = new int[m_Indices.length - 1];
231        System.arraycopy(m_Indices, 0, tempIndices, 0, index);
232        System.arraycopy(m_Indices, index + 1, tempIndices, index, 
233                         m_Indices.length - index - 1);
234        m_Indices = tempIndices;
235      }
236    } else {
237      if (value != 0) {
238        int[] tempIndices = new int[m_Indices.length + 1];
239        System.arraycopy(m_Indices, 0, tempIndices, 0, index + 1);
240        tempIndices[index + 1] = attIndex;
241        System.arraycopy(m_Indices, index + 1, tempIndices, index + 2, 
242                         m_Indices.length - index - 1);
243        m_Indices = tempIndices;
244      }
245    }
246  }
247
248  /**
249   * Sets a specific value in the instance to the given value
250   * (internal floating-point format). Performs a deep copy
251   * of the vector of attribute values before the value is set.
252   *
253   * @param indexOfIndex the index of the attribute's index
254   * @param value the new attribute value (If the corresponding
255   * attribute is nominal (or a string) then this is the new value's
256   * index as a double). 
257   */
258  public void setValueSparse(int indexOfIndex, double value) {
259
260    if (value == 0) {
261      int[] tempIndices = new int[m_Indices.length - 1];
262      System.arraycopy(m_Indices, 0, tempIndices, 0, indexOfIndex);
263      System.arraycopy(m_Indices, indexOfIndex + 1, tempIndices, indexOfIndex, 
264                       m_Indices.length - indexOfIndex - 1);
265      m_Indices = tempIndices;
266    }
267  }
268
269  /**
270   * Returns the values of each attribute as an array of doubles.
271   *
272   * @return an array containing all the instance attribute values
273   */
274  public double[] toDoubleArray() {
275
276    double[] newValues = new double[m_NumAttributes];
277    for (int i = 0; i < m_Indices.length; i++) {
278      newValues[m_Indices[i]] = 1.0;
279    }
280    return newValues;
281  }
282
283  /**
284   * Returns the description of one instance in sparse format.
285   * If the instance doesn't have access to a dataset, it returns the
286   * internal floating-point values. Quotes string values that contain
287   * whitespace characters.
288   *
289   * @return the instance's description as a string
290   */
291  public String toString() {
292
293    StringBuffer text = new StringBuffer();
294   
295    text.append('{');
296    for (int i = 0; i < m_Indices.length; i++) {
297      if (i > 0) {
298        text.append(",");
299      }
300      if (m_Dataset == null) {
301        text.append(m_Indices[i] + " 1");
302      } else {
303        if (m_Dataset.attribute(m_Indices[i]).isNominal() || 
304            m_Dataset.attribute(m_Indices[i]).isString()) {
305          text.append(m_Indices[i] + " " +
306                      Utils.quote(m_Dataset.attribute(m_Indices[i]).
307                                  value(1)));
308        } else {
309          text.append(m_Indices[i] + " 1");
310        }
311      }
312    }
313    text.append('}');
314    if (m_Weight != 1.0) {
315      text.append(",{" + Utils.doubleToString(m_Weight, 6) + "}");
316    }
317    return text.toString();
318  }
319
320  /**
321   * Returns an instance's attribute value in internal format.
322   *
323   * @param attIndex the attribute's index
324   * @return the specified value as a double (If the corresponding
325   * attribute is nominal (or a string) then it returns the value's index as a
326   * double).
327   */
328  public double value(int attIndex) {
329
330    int index = locateIndex(attIndex);
331    if ((index >= 0) && (m_Indices[index] == attIndex)) {
332      return 1.0;
333    } else {
334      return 0.0;
335    }
336  } 
337
338  /**
339   * Returns an instance's attribute value in internal format.
340   * Does exactly the same thing as value() if applied to an Instance.
341   *
342   * @param indexOfIndex the index of the attribute's index
343   * @return the specified value as a double (If the corresponding
344   * attribute is nominal (or a string) then it returns the value's index as a
345   * double).
346   */
347  public final double valueSparse(int indexOfIndex) {
348
349    int index = m_Indices[indexOfIndex]; // Throws if out of bounds
350    return 1;
351  } 
352
353  /**
354   * Deletes an attribute at the given position (0 to
355   * numAttributes() - 1).
356   *
357   * @param position the attribute's position
358   */
359  protected void forceDeleteAttributeAt(int position) {
360
361    int index = locateIndex(position);
362
363    m_NumAttributes--;
364    if ((index >= 0) && (m_Indices[index] == position)) {
365      int[] tempIndices = new int[m_Indices.length - 1];
366      System.arraycopy(m_Indices, 0, tempIndices, 0, index);
367      for (int i = index; i < m_Indices.length - 1; i++) {
368        tempIndices[i] = m_Indices[i + 1] - 1;
369      }
370      m_Indices = tempIndices;
371    } else {
372      int[] tempIndices = new int[m_Indices.length];
373      System.arraycopy(m_Indices, 0, tempIndices, 0, index + 1);
374      for (int i = index + 1; i < m_Indices.length - 1; i++) {
375        tempIndices[i] = m_Indices[i] - 1;
376      }
377      m_Indices = tempIndices;
378    }
379  }
380
381  /**
382   * Inserts an attribute at the given position
383   * (0 to numAttributes()) and sets its value to 1.
384   *
385   * @param position the attribute's position
386   */
387  protected void forceInsertAttributeAt(int position)  {
388
389    int index = locateIndex(position);
390
391    m_NumAttributes++;
392    if ((index >= 0) && (m_Indices[index] == position)) {
393      int[] tempIndices = new int[m_Indices.length + 1];
394      System.arraycopy(m_Indices, 0, tempIndices, 0, index);
395      tempIndices[index] = position;
396      for (int i = index; i < m_Indices.length; i++) {
397        tempIndices[i + 1] = m_Indices[i] + 1;
398      }
399      m_Indices = tempIndices;
400    } else {
401      int[] tempIndices = new int[m_Indices.length + 1];
402      System.arraycopy(m_Indices, 0, tempIndices, 0, index + 1);
403      tempIndices[index + 1] = position;
404      for (int i = index + 1; i < m_Indices.length; i++) {
405        tempIndices[i + 1] = m_Indices[i] + 1;
406      }
407      m_Indices = tempIndices;
408    }
409  }
410
411  /**
412   * Main method for testing this class.
413   *
414   * @param options     the command line options - ignored
415   */
416  public static void main(String[] options) {
417
418    try {
419
420      // Create numeric attributes "length" and "weight"
421      Attribute length = new Attribute("length");
422      Attribute weight = new Attribute("weight");
423     
424      // Create vector to hold nominal values "first", "second", "third"
425      ArrayList<String> my_nominal_values = new ArrayList<String>(3); 
426      my_nominal_values.add("first"); 
427      my_nominal_values.add("second"); 
428     
429      // Create nominal attribute "position"
430      Attribute position = new Attribute("position", my_nominal_values);
431     
432      // Create vector of the above attributes
433      ArrayList<Attribute> attributes = new ArrayList<Attribute>(3);
434      attributes.add(length);
435      attributes.add(weight);
436      attributes.add(position);
437     
438      // Create the empty dataset "race" with above attributes
439      Instances race = new Instances("race", attributes, 0);
440     
441      // Make position the class attribute
442      race.setClassIndex(position.index());
443     
444      // Create empty instance with three attribute values
445      BinarySparseInstance inst = new BinarySparseInstance(3);
446     
447      // Set instance's values for the attributes "length", "weight", and "position"
448      inst.setValue(length, 5.3);
449      inst.setValue(weight, 300);
450      inst.setValue(position, "first");
451     
452      // Set instance's dataset to be the dataset "race"
453      inst.setDataset(race);
454     
455      // Print the instance
456      System.out.println("The instance: " + inst);
457     
458      // Print the first attribute
459      System.out.println("First attribute: " + inst.attribute(0));
460     
461      // Print the class attribute
462      System.out.println("Class attribute: " + inst.classAttribute());
463     
464      // Print the class index
465      System.out.println("Class index: " + inst.classIndex());
466     
467      // Say if class is missing
468      System.out.println("Class is missing: " + inst.classIsMissing());
469     
470      // Print the instance's class value in internal format
471      System.out.println("Class value (internal format): " + inst.classValue());
472     
473      // Print a shallow copy of this instance
474      SparseInstance copy = (SparseInstance) inst.copy();
475      System.out.println("Shallow copy: " + copy);
476     
477      // Set dataset for shallow copy
478      copy.setDataset(inst.dataset());
479      System.out.println("Shallow copy with dataset set: " + copy);
480
481      // Print out all values in internal format
482      System.out.print("All stored values in internal format: ");
483      for (int i = 0; i < inst.numValues(); i++) {
484        if (i > 0) {
485          System.out.print(",");
486        }
487        System.out.print(inst.valueSparse(i));
488      }
489      System.out.println();
490
491      // Set all values to zero
492      System.out.print("All values set to zero: ");
493      while (inst.numValues() > 0) {
494        inst.setValueSparse(0, 0);
495      }
496      for (int i = 0; i < inst.numValues(); i++) {
497        if (i > 0) {
498          System.out.print(",");
499        }
500        System.out.print(inst.valueSparse(i));
501      }
502      System.out.println();
503
504      // Set all values to one
505      System.out.print("All values set to one: ");
506      for (int i = 0; i < inst.numAttributes(); i++) {
507        inst.setValue(i, 1);
508      }
509      for (int i = 0; i < inst.numValues(); i++) {
510        if (i > 0) {
511          System.out.print(",");
512        }
513        System.out.print(inst.valueSparse(i));
514      }
515      System.out.println();
516
517      // Unset dataset for copy, delete first attribute, and insert it again
518      copy.setDataset(null);
519      copy.deleteAttributeAt(0);
520      copy.insertAttributeAt(0);
521      copy.setDataset(inst.dataset());
522      System.out.println("Copy with first attribute deleted and inserted: " + copy); 
523
524      // Same for second attribute
525      copy.setDataset(null);
526      copy.deleteAttributeAt(1);
527      copy.insertAttributeAt(1);
528      copy.setDataset(inst.dataset());
529      System.out.println("Copy with second attribute deleted and inserted: " + copy); 
530
531      // Same for last attribute
532      copy.setDataset(null);
533      copy.deleteAttributeAt(2);
534      copy.insertAttributeAt(2);
535      copy.setDataset(inst.dataset());
536      System.out.println("Copy with third attribute deleted and inserted: " + copy); 
537     
538      // Enumerate attributes (leaving out the class attribute)
539      System.out.println("Enumerating attributes (leaving out class):");
540      Enumeration enu = inst.enumerateAttributes();
541      while (enu.hasMoreElements()) {
542        Attribute att = (Attribute) enu.nextElement();
543        System.out.println(att);
544      }
545     
546      // Headers are equivalent?
547      System.out.println("Header of original and copy equivalent: " +
548                         inst.equalHeaders(copy));
549
550      // Test for missing values
551      System.out.println("Length of copy missing: " + copy.isMissing(length));
552      System.out.println("Weight of copy missing: " + copy.isMissing(weight.index()));
553      System.out.println("Length of copy missing: " + 
554                         Utils.isMissingValue(copy.value(length)));
555
556      // Prints number of attributes and classes
557      System.out.println("Number of attributes: " + copy.numAttributes());
558      System.out.println("Number of classes: " + copy.numClasses());
559
560      // Replace missing values
561      double[] meansAndModes = {2, 3, 0};
562      copy.replaceMissingValues(meansAndModes);
563      System.out.println("Copy with missing value replaced: " + copy);
564
565      // Setting and getting values and weights
566      copy.setClassMissing();
567      System.out.println("Copy with missing class: " + copy);
568      copy.setClassValue(0);
569      System.out.println("Copy with class value set to first value: " + copy);
570      copy.setClassValue("second");
571      System.out.println("Copy with class value set to \"second\": " + copy);
572      copy.setMissing(1);
573      System.out.println("Copy with second attribute set to be missing: " + copy);
574      copy.setMissing(length);
575      System.out.println("Copy with length set to be missing: " + copy);
576      copy.setValue(0, 0);
577      System.out.println("Copy with first attribute set to 0: " + copy);
578      copy.setValue(weight, 1);
579      System.out.println("Copy with weight attribute set to 1: " + copy);
580      copy.setValue(position, "second");
581      System.out.println("Copy with position set to \"second\": " + copy);
582      copy.setValue(2, "first");
583      System.out.println("Copy with last attribute set to \"first\": " + copy);
584      System.out.println("Current weight of instance copy: " + copy.weight());
585      copy.setWeight(2);
586      System.out.println("Current weight of instance copy (set to 2): " + copy.weight());
587      System.out.println("Last value of copy: " + copy.toString(2));
588      System.out.println("Value of position for copy: " + copy.toString(position));
589      System.out.println("Last value of copy (internal format): " + copy.value(2));
590      System.out.println("Value of position for copy (internal format): " + 
591                         copy.value(position));
592    } catch (Exception e) {
593      e.printStackTrace();
594    }
595  }
596 
597  /**
598   * Returns the revision string.
599   *
600   * @return            the revision
601   */
602  public String getRevision() {
603    return RevisionUtils.extract("$Revision: 5987 $");
604  }
605}
Note: See TracBrowser for help on using the repository browser.