/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* DecisionTable.java
* Copyright (C) 2008 University of Waikato, Hamilton, New Zealand
*
*/
package weka.classifiers.rules;
import weka.attributeSelection.ASEvaluation;
import weka.attributeSelection.ASSearch;
import weka.attributeSelection.SubsetEvaluator;
import weka.classifiers.bayes.NaiveBayes;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.TechnicalInformation;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import java.util.BitSet;
import java.util.Enumeration;
import java.util.Vector;
/**
*
* Class for building and using a decision table/naive bayes hybrid classifier. At each point in the search, the algorithm evaluates the merit of dividing the attributes into two disjoint subsets: one for the decision table, the other for naive Bayes. A forward selection search is used, where at each step, selected attributes are modeled by naive Bayes and the remainder by the decision table, and all attributes are modelled by the decision table initially. At each step, the algorithm also considers dropping an attribute entirely from the model.
*
* For more information, see:
*
* Mark Hall, Eibe Frank: Combining Naive Bayes and Decision Tables. In: Proceedings of the 21st Florida Artificial Intelligence Society Conference (FLAIRS), ???-???, 2008.
*
* @inproceedings{Hall2008, * author = {Mark Hall and Eibe Frank}, * booktitle = {Proceedings of the 21st Florida Artificial Intelligence Society Conference (FLAIRS)}, * pages = {???-???}, * publisher = {AAAI press}, * title = {Combining Naive Bayes and Decision Tables}, * year = {2008} * } ** * * Valid options are: * *
-X <number of folds> * Use cross validation to evaluate features. * Use number of folds = 1 for leave one out CV. * (Default = leave one out CV)* *
-E <acc | rmse | mae | auc> * Performance evaluation measure to use for selecting attributes. * (Default = accuracy for discrete class and rmse for numeric class)* *
-I * Use nearest neighbour instead of global table majority.* *
-R * Display decision table rules. ** * * @author Mark Hall (mhall{[at]}pentaho{[dot]}org) * @author Eibe Frank (eibe{[at]}cs{[dot]}waikato{[dot]}ac{[dot]}nz) * * @version $Revision: 1.4 $ * */ public class DTNB extends DecisionTable { /** * The naive Bayes half of the hybrid */ protected NaiveBayes m_NB; /** * The features used by naive Bayes */ private int [] m_nbFeatures; /** * Percentage of the total number of features used by the decision table */ private double m_percentUsedByDT; /** * Percentage of the features features that were dropped entirely */ private double m_percentDeleted; static final long serialVersionUID = 2999557077765701326L; /** * Returns a string describing classifier * @return a description suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Class for building and using a decision table/naive bayes hybrid classifier. At each point " + "in the search, the algorithm evaluates the merit of dividing the attributes into two disjoint " + "subsets: one for the decision table, the other for naive Bayes. A forward selection search is " + "used, where at each step, selected attributes are modeled by naive Bayes and the remainder " + "by the decision table, and all attributes are modelled by the decision table initially. At each " + "step, the algorithm also considers dropping an attribute entirely from the model.\n\n" + "For more information, see: \n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.INPROCEEDINGS); result.setValue(Field.AUTHOR, "Mark Hall and Eibe Frank"); result.setValue(Field.TITLE, "Combining Naive Bayes and Decision Tables"); result.setValue(Field.BOOKTITLE, "Proceedings of the 21st Florida Artificial Intelligence " + "Society Conference (FLAIRS)"); result.setValue(Field.YEAR, "2008"); result.setValue(Field.PAGES, "???-???"); result.setValue(Field.PUBLISHER, "AAAI press"); return result; } /** * Calculates the accuracy on a test fold for internal cross validation * of feature sets * * @param fold set of instances to be "left out" and classified * @param fs currently selected feature set * @return the accuracy for the fold * @throws Exception if something goes wrong */ double evaluateFoldCV(Instances fold, int [] fs) throws Exception { int i; int ruleCount = 0; int numFold = fold.numInstances(); int numCl = m_theInstances.classAttribute().numValues(); double [][] class_distribs = new double [numFold][numCl]; double [] instA = new double [fs.length]; double [] normDist; DecisionTableHashKey thekey; double acc = 0.0; int classI = m_theInstances.classIndex(); Instance inst; if (m_classIsNominal) { normDist = new double [numCl]; } else { normDist = new double [2]; } // first *remove* instances for (i=0;i
-X <number of folds> * Use cross validation to evaluate features. * Use number of folds = 1 for leave one out CV. * (Default = leave one out CV)* *
-E <acc | rmse | mae | auc> * Performance evaluation measure to use for selecting attributes. * (Default = accuracy for discrete class and rmse for numeric class)* *
-I * Use nearest neighbour instead of global table majority.* *
-R * Display decision table rules. ** * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String optionString; resetOptions(); optionString = Utils.getOption('X',options); if (optionString.length() != 0) { setCrossVal(Integer.parseInt(optionString)); } m_useIBk = Utils.getFlag('I',options); m_displayRules = Utils.getFlag('R',options); optionString = Utils.getOption('E', options); if (optionString.length() != 0) { if (optionString.equals("acc")) { setEvaluationMeasure(new SelectedTag(EVAL_ACCURACY, TAGS_EVALUATION)); } else if (optionString.equals("rmse")) { setEvaluationMeasure(new SelectedTag(EVAL_RMSE, TAGS_EVALUATION)); } else if (optionString.equals("mae")) { setEvaluationMeasure(new SelectedTag(EVAL_MAE, TAGS_EVALUATION)); } else if (optionString.equals("auc")) { setEvaluationMeasure(new SelectedTag(EVAL_AUC, TAGS_EVALUATION)); } else { throw new IllegalArgumentException("Invalid evaluation measure"); } } } /** * Gets the current settings of the classifier. * * @return an array of strings suitable for passing to setOptions */ public String [] getOptions() { String [] options = new String [9]; int current = 0; options[current++] = "-X"; options[current++] = "" + getCrossVal(); if (m_evaluationMeasure != EVAL_DEFAULT) { options[current++] = "-E"; switch (m_evaluationMeasure) { case EVAL_ACCURACY: options[current++] = "acc"; break; case EVAL_RMSE: options[current++] = "rmse"; break; case EVAL_MAE: options[current++] = "mae"; break; case EVAL_AUC: options[current++] = "auc"; break; } } if (m_useIBk) { options[current++] = "-I"; } if (m_displayRules) { options[current++] = "-R"; } while (current < options.length) { options[current++] = ""; } return options; } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 1.4 $"); } /** * Main method for testing this class. * * @param argv the command-line options */ public static void main(String [] argv) { runClassifier(new DTNB(), argv); } }