/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * RemoteBoundaryVisualizerSubTask.java * Copyright (C) 2003 University of Waikato, Hamilton, New Zealand * */ package weka.gui.boundaryvisualizer; import weka.classifiers.Classifier; import weka.classifiers.AbstractClassifier; import weka.core.Instance; import weka.core.DenseInstance; import weka.core.Instances; import weka.core.Utils; import weka.experiment.Task; import weka.experiment.TaskStatusInfo; import java.util.Random; /** * Class that encapsulates a sub task for distributed boundary * visualization. Produces probability distributions for each pixel * in one row of the visualization. * * @author Mark Hall * @version $Revision: 5987 $ * @since 1.0 * @see Task */ public class RemoteBoundaryVisualizerSubTask implements Task { // status information for this sub task private TaskStatusInfo m_status = new TaskStatusInfo(); // the result of this sub task private RemoteResult m_result; // which row are we doing private int m_rowNumber; // width and height of the visualization private int m_panelHeight; private int m_panelWidth; // the classifier to use private Classifier m_classifier; // the kernel density estimator private DataGenerator m_dataGenerator; // the training data private Instances m_trainingData; // attributes for visualizing on (fixed dimensions) private int m_xAttribute; private int m_yAttribute; // pixel width and height in terms of attribute values private double m_pixHeight; private double m_pixWidth; // min, max of these attributes private double m_minX; private double m_minY; private double m_maxX; private double m_maxY; // number of samples to take from each region in the fixed dimensions private int m_numOfSamplesPerRegion = 2; // number of samples per kernel = base ^ (# non-fixed dimensions) private int m_numOfSamplesPerGenerator; private double m_samplesBase = 2.0; // A random number generator private Random m_random; private double [] m_weightingAttsValues; private boolean [] m_attsToWeightOn; private double [] m_vals; private double [] m_dist; private Instance m_predInst; /** * Set the row number for this sub task * * @param rn the row number */ public void setRowNumber(int rn) { m_rowNumber = rn; } /** * Set the width of the visualization * * @param pw the width */ public void setPanelWidth(int pw) { m_panelWidth = pw; } /** * Set the height of the visualization * * @param ph the height */ public void setPanelHeight(int ph) { m_panelHeight = ph; } /** * Set the height of a pixel * * @param ph the height of a pixel */ public void setPixHeight(double ph) { m_pixHeight = ph; } /** * Set the width of a pixel * * @param pw the width of a pixel */ public void setPixWidth(double pw) { m_pixWidth = pw; } /** * Set the classifier to use * * @param dc the classifier */ public void setClassifier(Classifier dc) { m_classifier = dc; } /** * Set the density estimator to use * * @param dg the density estimator */ public void setDataGenerator(DataGenerator dg) { m_dataGenerator = dg; } /** * Set the training data * * @param i the training data */ public void setInstances(Instances i) { m_trainingData = i; } /** * Set the minimum and maximum values of the x axis fixed dimension * * @param minx a double value * @param maxx a double value */ public void setMinMaxX(double minx, double maxx) { m_minX = minx; m_maxX = maxx; } /** * Set the minimum and maximum values of the y axis fixed dimension * * @param miny a double value * @param maxy a double value */ public void setMinMaxY(double miny, double maxy) { m_minY = miny; m_maxY = maxy; } /** * Set the x axis fixed dimension * * @param xatt an int value */ public void setXAttribute(int xatt) { m_xAttribute = xatt; } /** * Set the y axis fixed dimension * * @param yatt an int value */ public void setYAttribute(int yatt) { m_yAttribute = yatt; } /** * Set the number of points to uniformly sample from a region (fixed * dimensions). * * @param num an int value */ public void setNumSamplesPerRegion(int num) { m_numOfSamplesPerRegion = num; } /** * Set the base for computing the number of samples to obtain from each * generator. number of samples = base ^ (# non fixed dimensions) * * @param ksb a double value */ public void setGeneratorSamplesBase(double ksb) { m_samplesBase = ksb; } /** * Perform the sub task */ public void execute() { m_random = new Random(m_rowNumber * 11); m_dataGenerator.setSeed(m_rowNumber * 11); m_result = new RemoteResult(m_rowNumber, m_panelWidth); m_status.setTaskResult(m_result); m_status.setExecutionStatus(TaskStatusInfo.PROCESSING); try { m_numOfSamplesPerGenerator = (int)Math.pow(m_samplesBase, m_trainingData.numAttributes()-3); if (m_trainingData == null) { throw new Exception("No training data set (BoundaryPanel)"); } if (m_classifier == null) { throw new Exception("No classifier set (BoundaryPanel)"); } if (m_dataGenerator == null) { throw new Exception("No data generator set (BoundaryPanel)"); } if (m_trainingData.attribute(m_xAttribute).isNominal() || m_trainingData.attribute(m_yAttribute).isNominal()) { throw new Exception("Visualization dimensions must be numeric " +"(RemoteBoundaryVisualizerSubTask)"); } m_attsToWeightOn = new boolean[m_trainingData.numAttributes()]; m_attsToWeightOn[m_xAttribute] = true; m_attsToWeightOn[m_yAttribute] = true; // generate samples m_weightingAttsValues = new double [m_attsToWeightOn.length]; m_vals = new double[m_trainingData.numAttributes()]; m_predInst = new DenseInstance(1.0, m_vals); m_predInst.setDataset(m_trainingData); System.err.println("Executing row number "+m_rowNumber); for (int j = 0; j < m_panelWidth; j++) { double [] preds = calculateRegionProbs(j, m_rowNumber); m_result.setLocationProbs(j, preds); m_result. setPercentCompleted((int)(100 * ((double)j / (double)m_panelWidth))); } } catch (Exception ex) { m_status.setExecutionStatus(TaskStatusInfo.FAILED); m_status.setStatusMessage("Row "+m_rowNumber+" failed."); System.err.print(ex); return; } // finished m_status.setExecutionStatus(TaskStatusInfo.FINISHED); m_status.setStatusMessage("Row "+m_rowNumber+" completed successfully."); } private double [] calculateRegionProbs(int j, int i) throws Exception { double [] sumOfProbsForRegion = new double [m_trainingData.classAttribute().numValues()]; for (int u = 0; u < m_numOfSamplesPerRegion; u++) { double [] sumOfProbsForLocation = new double [m_trainingData.classAttribute().numValues()]; m_weightingAttsValues[m_xAttribute] = getRandomX(j); m_weightingAttsValues[m_yAttribute] = getRandomY(m_panelHeight-i-1); m_dataGenerator.setWeightingValues(m_weightingAttsValues); double [] weights = m_dataGenerator.getWeights(); double sumOfWeights = Utils.sum(weights); int [] indices = Utils.sort(weights); // Prune 1% of weight mass int [] newIndices = new int[indices.length]; double sumSoFar = 0; double criticalMass = 0.99 * sumOfWeights; int index = weights.length - 1; int counter = 0; for (int z = weights.length - 1; z >= 0; z--) { newIndices[index--] = indices[z]; sumSoFar += weights[indices[z]]; counter++; if (sumSoFar > criticalMass) { break; } } indices = new int[counter]; System.arraycopy(newIndices, index + 1, indices, 0, counter); for (int z = 0; z < m_numOfSamplesPerGenerator; z++) { m_dataGenerator.setWeightingValues(m_weightingAttsValues); double [][] values = m_dataGenerator.generateInstances(indices); for (int q = 0; q < values.length; q++) { if (values[q] != null) { System.arraycopy(values[q], 0, m_vals, 0, m_vals.length); m_vals[m_xAttribute] = m_weightingAttsValues[m_xAttribute]; m_vals[m_yAttribute] = m_weightingAttsValues[m_yAttribute]; // classify the instance m_dist = m_classifier.distributionForInstance(m_predInst); for (int k = 0; k < sumOfProbsForLocation.length; k++) { sumOfProbsForLocation[k] += (m_dist[k] * weights[q]); } } } } for (int k = 0; k < sumOfProbsForRegion.length; k++) { sumOfProbsForRegion[k] += (sumOfProbsForLocation[k] * sumOfWeights); } } // average Utils.normalize(sumOfProbsForRegion); // cache double [] tempDist = new double[sumOfProbsForRegion.length]; System.arraycopy(sumOfProbsForRegion, 0, tempDist, 0, sumOfProbsForRegion.length); return tempDist; } /** * Return a random x attribute value contained within * the pix'th horizontal pixel * * @param pix the horizontal pixel number * @return a value in attribute space */ private double getRandomX(int pix) { double minPix = m_minX + (pix * m_pixWidth); return minPix + m_random.nextDouble() * m_pixWidth; } /** * Return a random y attribute value contained within * the pix'th vertical pixel * * @param pix the vertical pixel number * @return a value in attribute space */ private double getRandomY(int pix) { double minPix = m_minY + (pix * m_pixHeight); return minPix + m_random.nextDouble() * m_pixHeight; } /** * Return status information for this sub task * * @return a TaskStatusInfo value */ public TaskStatusInfo getTaskStatus() { return m_status; } }