source: src/main/java/weka/core/AttributeStats.java @ 20

Last change on this file since 20 was 4, checked in by gnappo, 14 years ago

Import di weka.

File size: 4.9 KB
Line 
1/*
2 *    This program is free software; you can redistribute it and/or modify
3 *    it under the terms of the GNU General Public License as published by
4 *    the Free Software Foundation; either version 2 of the License, or
5 *    (at your option) any later version.
6 *
7 *    This program is distributed in the hope that it will be useful,
8 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
9 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 *    GNU General Public License for more details.
11 *
12 *    You should have received a copy of the GNU General Public License
13 *    along with this program; if not, write to the Free Software
14 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15 */
16
17/*
18 *    AttributeStats.java
19 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
20 *
21 */
22
23package weka.core;
24
25import java.io.Serializable;
26
27/**
28 * A Utility class that contains summary information on an
29 * the values that appear in a dataset for a particular attribute.
30 *
31 * @author <a href="mailto:len@reeltwo.com">Len Trigg</a>
32 * @version $Revision: 5296 $
33 */
34public class AttributeStats
35  implements Serializable, RevisionHandler {
36
37  /** for serialization */
38  private static final long serialVersionUID = 4434688832743939380L;
39 
40  /** The number of int-like values */
41  public int intCount = 0;
42 
43  /** The number of real-like values (i.e. have a fractional part) */
44  public int realCount = 0;
45 
46  /** The number of missing values */
47  public int missingCount = 0;
48 
49  /** The number of distinct values */
50  public int distinctCount = 0;
51 
52  /** The number of values that only appear once */
53  public int uniqueCount = 0;
54 
55  /** The total number of values (i.e. number of instances) */
56  public int totalCount = 0;
57 
58  /** Stats on numeric value distributions */
59  // perhaps Stats should be moved from weka.experiment to weka.core
60  public weka.experiment.Stats numericStats;
61 
62  /** Counts of each nominal value */
63  public int [] nominalCounts;
64 
65  /** Weight mass for each nominal value */
66  public double[] nominalWeights;
67   
68  /**
69   * Updates the counters for one more observed distinct value.
70   *
71   * @param value the value that has just been seen
72   * @param count the number of times the value appeared
73   * @param weight the weight mass of the value
74   */
75  protected void addDistinct(double value, int count, double weight) {
76   
77    if (count > 0) {
78      if (count == 1) {
79        uniqueCount++;
80        }
81      if (Utils.eq(value, (double)((int)value))) {
82        intCount += count;
83      } else {
84        realCount += count;
85      }
86      if (nominalCounts != null) {
87        nominalCounts[(int)value] = count;
88        nominalWeights[(int)value] = weight;
89      }
90      if (numericStats != null) {
91          //numericStats.add(value, count);
92          numericStats.add(value, weight);
93          numericStats.calculateDerived();
94      }
95    }
96    distinctCount++;
97  }
98
99  /**
100   * Returns a human readable representation of this AttributeStats instance.
101   *
102   * @return a String represtinging these AttributeStats.
103   */
104  public String toString() {
105
106    StringBuffer sb = new StringBuffer();
107    sb.append(Utils.padLeft("Type", 4)).append(Utils.padLeft("Nom", 5));
108    sb.append(Utils.padLeft("Int", 5)).append(Utils.padLeft("Real", 5));
109    sb.append(Utils.padLeft("Missing", 12));
110    sb.append(Utils.padLeft("Unique", 12));
111    sb.append(Utils.padLeft("Dist", 6));
112    if (nominalCounts != null) {
113      sb.append(' ');
114      for (int i = 0; i < nominalCounts.length; i++) {
115        sb.append(Utils.padLeft("C[" + i + "]", 5));
116      }
117    }
118    sb.append('\n');
119
120    long percent;
121    percent = Math.round(100.0 * intCount / totalCount);
122    if (nominalCounts != null) {
123      sb.append(Utils.padLeft("Nom", 4)).append(' ');
124      sb.append(Utils.padLeft("" + percent, 3)).append("% ");
125      sb.append(Utils.padLeft("" + 0, 3)).append("% ");
126    } else {
127      sb.append(Utils.padLeft("Num", 4)).append(' ');
128      sb.append(Utils.padLeft("" + 0, 3)).append("% ");
129      sb.append(Utils.padLeft("" + percent, 3)).append("% ");
130    }
131    percent = Math.round(100.0 * realCount / totalCount);
132    sb.append(Utils.padLeft("" + percent, 3)).append("% ");
133    sb.append(Utils.padLeft("" + missingCount, 5)).append(" /");
134    percent = Math.round(100.0 * missingCount / totalCount);
135    sb.append(Utils.padLeft("" + percent, 3)).append("% ");
136    sb.append(Utils.padLeft("" + uniqueCount, 5)).append(" /");
137    percent = Math.round(100.0 * uniqueCount / totalCount);
138    sb.append(Utils.padLeft("" + percent, 3)).append("% ");
139    sb.append(Utils.padLeft("" + distinctCount, 5)).append(' ');
140    if (nominalCounts != null) {
141      for (int i = 0; i < nominalCounts.length; i++) {
142        sb.append(Utils.padLeft("" + nominalCounts[i], 5));
143      }
144    }
145    sb.append('\n');
146    return sb.toString();
147  }
148 
149  /**
150   * Returns the revision string.
151   *
152   * @return            the revision
153   */
154  public String getRevision() {
155    return RevisionUtils.extract("$Revision: 5296 $");
156  }
157}
Note: See TracBrowser for help on using the repository browser.