| 1 | /* | 
|---|
| 2 |  * This program is free software; you can redistribute it and/or modify | 
|---|
| 3 |  * it under the terms of the GNU General Public License as published by | 
|---|
| 4 |  * the Free Software Foundation; either version 2 of the License, or | 
|---|
| 5 |  * (at your option) any later version. | 
|---|
| 6 |  *  | 
|---|
| 7 |  * This program is distributed in the hope that it will be useful, | 
|---|
| 8 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 9 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 10 |  * GNU General Public License for more details. | 
|---|
| 11 |  *  | 
|---|
| 12 |  * You should have received a copy of the GNU General Public License | 
|---|
| 13 |  * along with this program; if not, write to the Free Software | 
|---|
| 14 |  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 
|---|
| 15 |  */ | 
|---|
| 16 |  | 
|---|
| 17 | /* | 
|---|
| 18 |  * TabuSearch.java | 
|---|
| 19 |  * Copyright (C) 2004 University of Waikato, Hamilton, New Zealand | 
|---|
| 20 |  *  | 
|---|
| 21 |  */ | 
|---|
| 22 |   | 
|---|
| 23 | package weka.classifiers.bayes.net.search.global; | 
|---|
| 24 |  | 
|---|
| 25 | import weka.classifiers.bayes.BayesNet; | 
|---|
| 26 | import weka.core.Instances; | 
|---|
| 27 | import weka.core.Option; | 
|---|
| 28 | import weka.core.RevisionUtils; | 
|---|
| 29 | import weka.core.TechnicalInformation; | 
|---|
| 30 | import weka.core.TechnicalInformation.Type; | 
|---|
| 31 | import weka.core.TechnicalInformation.Field; | 
|---|
| 32 | import weka.core.TechnicalInformationHandler; | 
|---|
| 33 | import weka.core.Utils; | 
|---|
| 34 |  | 
|---|
| 35 | import java.util.Enumeration; | 
|---|
| 36 | import java.util.Vector; | 
|---|
| 37 |  | 
|---|
| 38 | /**  | 
|---|
| 39 |  <!-- globalinfo-start --> | 
|---|
| 40 |  * This Bayes Network learning algorithm uses tabu search for finding a well scoring Bayes network structure. Tabu search is hill climbing till an optimum is reached. The following step is the least worst possible step. The last X steps are kept in a list and none of the steps in this so called tabu list is considered in taking the next step. The best network found in this traversal is returned.<br/> | 
|---|
| 41 |  * <br/> | 
|---|
| 42 |  * For more information see:<br/> | 
|---|
| 43 |  * <br/> | 
|---|
| 44 |  * R.R. Bouckaert (1995). Bayesian Belief Networks: from Construction to Inference. Utrecht, Netherlands. | 
|---|
| 45 |  * <p/> | 
|---|
| 46 |  <!-- globalinfo-end --> | 
|---|
| 47 |  *  | 
|---|
| 48 |  <!-- technical-bibtex-start --> | 
|---|
| 49 |  * BibTeX: | 
|---|
| 50 |  * <pre> | 
|---|
| 51 |  * @phdthesis{Bouckaert1995, | 
|---|
| 52 |  *    address = {Utrecht, Netherlands}, | 
|---|
| 53 |  *    author = {R.R. Bouckaert}, | 
|---|
| 54 |  *    institution = {University of Utrecht}, | 
|---|
| 55 |  *    title = {Bayesian Belief Networks: from Construction to Inference}, | 
|---|
| 56 |  *    year = {1995} | 
|---|
| 57 |  * } | 
|---|
| 58 |  * </pre> | 
|---|
| 59 |  * <p/> | 
|---|
| 60 |  <!-- technical-bibtex-end --> | 
|---|
| 61 |  *  | 
|---|
| 62 |  <!-- options-start --> | 
|---|
| 63 |  * Valid options are: <p/> | 
|---|
| 64 |  *  | 
|---|
| 65 |  * <pre> -L <integer> | 
|---|
| 66 |  *  Tabu list length</pre> | 
|---|
| 67 |  *  | 
|---|
| 68 |  * <pre> -U <integer> | 
|---|
| 69 |  *  Number of runs</pre> | 
|---|
| 70 |  *  | 
|---|
| 71 |  * <pre> -P <nr of parents> | 
|---|
| 72 |  *  Maximum number of parents</pre> | 
|---|
| 73 |  *  | 
|---|
| 74 |  * <pre> -R | 
|---|
| 75 |  *  Use arc reversal operation. | 
|---|
| 76 |  *  (default false)</pre> | 
|---|
| 77 |  *  | 
|---|
| 78 |  * <pre> -P <nr of parents> | 
|---|
| 79 |  *  Maximum number of parents</pre> | 
|---|
| 80 |  *  | 
|---|
| 81 |  * <pre> -R | 
|---|
| 82 |  *  Use arc reversal operation. | 
|---|
| 83 |  *  (default false)</pre> | 
|---|
| 84 |  *  | 
|---|
| 85 |  * <pre> -N | 
|---|
| 86 |  *  Initial structure is empty (instead of Naive Bayes)</pre> | 
|---|
| 87 |  *  | 
|---|
| 88 |  * <pre> -mbc | 
|---|
| 89 |  *  Applies a Markov Blanket correction to the network structure,  | 
|---|
| 90 |  *  after a network structure is learned. This ensures that all  | 
|---|
| 91 |  *  nodes in the network are part of the Markov blanket of the  | 
|---|
| 92 |  *  classifier node.</pre> | 
|---|
| 93 |  *  | 
|---|
| 94 |  * <pre> -S [LOO-CV|k-Fold-CV|Cumulative-CV] | 
|---|
| 95 |  *  Score type (LOO-CV,k-Fold-CV,Cumulative-CV)</pre> | 
|---|
| 96 |  *  | 
|---|
| 97 |  * <pre> -Q | 
|---|
| 98 |  *  Use probabilistic or 0/1 scoring. | 
|---|
| 99 |  *  (default probabilistic scoring)</pre> | 
|---|
| 100 |  *  | 
|---|
| 101 |  <!-- options-end --> | 
|---|
| 102 |  * | 
|---|
| 103 |  * @author Remco Bouckaert (rrb@xm.co.nz) | 
|---|
| 104 |  * @version $Revision: 1.5 $ | 
|---|
| 105 |  */ | 
|---|
| 106 | public class TabuSearch  | 
|---|
| 107 |     extends HillClimber | 
|---|
| 108 |     implements TechnicalInformationHandler { | 
|---|
| 109 |  | 
|---|
| 110 |     /** for serialization */ | 
|---|
| 111 |     static final long serialVersionUID = 1176705618756672292L; | 
|---|
| 112 |    | 
|---|
| 113 |     /** number of runs **/ | 
|---|
| 114 |     int m_nRuns = 10; | 
|---|
| 115 |                  | 
|---|
| 116 |         /** size of tabu list **/ | 
|---|
| 117 |         int m_nTabuList = 5; | 
|---|
| 118 |  | 
|---|
| 119 |         /** the actual tabu list **/ | 
|---|
| 120 |         Operation[] m_oTabuList = null; | 
|---|
| 121 |  | 
|---|
| 122 |         /** | 
|---|
| 123 |          * Returns an instance of a TechnicalInformation object, containing  | 
|---|
| 124 |          * detailed information about the technical background of this class, | 
|---|
| 125 |          * e.g., paper reference or book this class is based on. | 
|---|
| 126 |          *  | 
|---|
| 127 |          * @return the technical information about this class | 
|---|
| 128 |          */ | 
|---|
| 129 |         public TechnicalInformation getTechnicalInformation() { | 
|---|
| 130 |           TechnicalInformation  result; | 
|---|
| 131 |            | 
|---|
| 132 |           result = new TechnicalInformation(Type.PHDTHESIS); | 
|---|
| 133 |           result.setValue(Field.AUTHOR, "R.R. Bouckaert"); | 
|---|
| 134 |           result.setValue(Field.YEAR, "1995"); | 
|---|
| 135 |           result.setValue(Field.TITLE, "Bayesian Belief Networks: from Construction to Inference"); | 
|---|
| 136 |           result.setValue(Field.INSTITUTION, "University of Utrecht"); | 
|---|
| 137 |           result.setValue(Field.ADDRESS, "Utrecht, Netherlands"); | 
|---|
| 138 |            | 
|---|
| 139 |           return result; | 
|---|
| 140 |         } | 
|---|
| 141 |  | 
|---|
| 142 |         /** | 
|---|
| 143 |          * search determines the network structure/graph of the network | 
|---|
| 144 |          * with the Tabu search algorithm. | 
|---|
| 145 |          *  | 
|---|
| 146 |          * @param bayesNet the network to use | 
|---|
| 147 |          * @param instances the instances to use | 
|---|
| 148 |          * @throws Exception if something goes wrong | 
|---|
| 149 |          */ | 
|---|
| 150 |         protected void search(BayesNet bayesNet, Instances instances) throws Exception { | 
|---|
| 151 |         m_oTabuList = new Operation[m_nTabuList]; | 
|---|
| 152 |         int iCurrentTabuList = 0; | 
|---|
| 153 |  | 
|---|
| 154 |                 // keeps track of score pf best structure found so far  | 
|---|
| 155 |                 double fBestScore;       | 
|---|
| 156 |                 double fCurrentScore = calcScore(bayesNet); | 
|---|
| 157 |  | 
|---|
| 158 |                 // keeps track of best structure found so far  | 
|---|
| 159 |                 BayesNet bestBayesNet; | 
|---|
| 160 |  | 
|---|
| 161 |                 // initialize bestBayesNet | 
|---|
| 162 |                 fBestScore = fCurrentScore; | 
|---|
| 163 |                 bestBayesNet = new BayesNet(); | 
|---|
| 164 |                 bestBayesNet.m_Instances = instances; | 
|---|
| 165 |                 bestBayesNet.initStructure(); | 
|---|
| 166 |                 copyParentSets(bestBayesNet, bayesNet); | 
|---|
| 167 |                  | 
|---|
| 168 |                  | 
|---|
| 169 |         // go do the search         | 
|---|
| 170 |         for (int iRun = 0; iRun < m_nRuns; iRun++) { | 
|---|
| 171 |             Operation oOperation = getOptimalOperation(bayesNet, instances); | 
|---|
| 172 |                         performOperation(bayesNet, instances, oOperation); | 
|---|
| 173 |             // sanity check | 
|---|
| 174 |             if (oOperation  == null) { | 
|---|
| 175 |                                 throw new Exception("Panic: could not find any step to make. Tabu list too long?"); | 
|---|
| 176 |             } | 
|---|
| 177 |             // update tabu list | 
|---|
| 178 |             m_oTabuList[iCurrentTabuList] = oOperation; | 
|---|
| 179 |             iCurrentTabuList = (iCurrentTabuList + 1) % m_nTabuList; | 
|---|
| 180 |  | 
|---|
| 181 |                         fCurrentScore += oOperation.m_fScore; | 
|---|
| 182 |                         // keep track of best network seen so far | 
|---|
| 183 |                         if (fCurrentScore > fBestScore) { | 
|---|
| 184 |                                 fBestScore = fCurrentScore; | 
|---|
| 185 |                                 copyParentSets(bestBayesNet, bayesNet); | 
|---|
| 186 |                         } | 
|---|
| 187 |  | 
|---|
| 188 |                         if (bayesNet.getDebug()) { | 
|---|
| 189 |                                 printTabuList(); | 
|---|
| 190 |                         } | 
|---|
| 191 |         } | 
|---|
| 192 |          | 
|---|
| 193 |         // restore current network to best network | 
|---|
| 194 |                 copyParentSets(bayesNet, bestBayesNet); | 
|---|
| 195 |                  | 
|---|
| 196 |                 // free up memory | 
|---|
| 197 |                 bestBayesNet = null; | 
|---|
| 198 |     } // search | 
|---|
| 199 |  | 
|---|
| 200 |  | 
|---|
| 201 |         /** copyParentSets copies parent sets of source to dest BayesNet | 
|---|
| 202 |          * @param dest destination network | 
|---|
| 203 |          * @param source source network | 
|---|
| 204 |          */ | 
|---|
| 205 |         void copyParentSets(BayesNet dest, BayesNet source) { | 
|---|
| 206 |                 int nNodes = source.getNrOfNodes(); | 
|---|
| 207 |                 // clear parent set first | 
|---|
| 208 |                 for (int iNode = 0; iNode < nNodes; iNode++) { | 
|---|
| 209 |                         dest.getParentSet(iNode).copy(source.getParentSet(iNode)); | 
|---|
| 210 |                 }                | 
|---|
| 211 |         } // CopyParentSets | 
|---|
| 212 |  | 
|---|
| 213 |         /** check whether the operation is not in the tabu list | 
|---|
| 214 |          * @param oOperation operation to be checked | 
|---|
| 215 |          * @return true if operation is not in the tabu list | 
|---|
| 216 |          */ | 
|---|
| 217 |         boolean isNotTabu(Operation oOperation) { | 
|---|
| 218 |                 for (int iTabu = 0; iTabu < m_nTabuList; iTabu++) { | 
|---|
| 219 |                         if (oOperation.equals(m_oTabuList[iTabu])) { | 
|---|
| 220 |                                         return false; | 
|---|
| 221 |                                 } | 
|---|
| 222 |                 } | 
|---|
| 223 |                 return true; | 
|---|
| 224 |         } // isNotTabu | 
|---|
| 225 |  | 
|---|
| 226 |         /** print tabu list for debugging purposes. | 
|---|
| 227 |          */ | 
|---|
| 228 |         void printTabuList() { | 
|---|
| 229 |                 for (int i = 0; i < m_nTabuList; i++) { | 
|---|
| 230 |                         Operation o = m_oTabuList[i]; | 
|---|
| 231 |                         if (o != null) { | 
|---|
| 232 |                                 if (o.m_nOperation == 0) {System.out.print(" +(");} else {System.out.print(" -(");} | 
|---|
| 233 |                                 System.out.print(o.m_nTail + "->" + o.m_nHead + ")"); | 
|---|
| 234 |                         } | 
|---|
| 235 |                 } | 
|---|
| 236 |                 System.out.println(); | 
|---|
| 237 |         } // printTabuList | 
|---|
| 238 |  | 
|---|
| 239 |     /** | 
|---|
| 240 |     * @return number of runs | 
|---|
| 241 |     */ | 
|---|
| 242 |     public int getRuns() { | 
|---|
| 243 |         return m_nRuns; | 
|---|
| 244 |     } // getRuns | 
|---|
| 245 |  | 
|---|
| 246 |     /** | 
|---|
| 247 |      * Sets the number of runs | 
|---|
| 248 |      * @param nRuns The number of runs to set | 
|---|
| 249 |      */ | 
|---|
| 250 |     public void setRuns(int nRuns) { | 
|---|
| 251 |         m_nRuns = nRuns; | 
|---|
| 252 |     } // setRuns | 
|---|
| 253 |  | 
|---|
| 254 |     /** | 
|---|
| 255 |      * @return the Tabu List length | 
|---|
| 256 |      */ | 
|---|
| 257 |     public int getTabuList() { | 
|---|
| 258 |         return m_nTabuList; | 
|---|
| 259 |     } // getTabuList | 
|---|
| 260 |  | 
|---|
| 261 |     /** | 
|---|
| 262 |      * Sets the Tabu List length. | 
|---|
| 263 |      * @param nTabuList The nTabuList to set | 
|---|
| 264 |      */ | 
|---|
| 265 |     public void setTabuList(int nTabuList) { | 
|---|
| 266 |         m_nTabuList = nTabuList; | 
|---|
| 267 |     } // setTabuList | 
|---|
| 268 |  | 
|---|
| 269 |         /** | 
|---|
| 270 |          * Returns an enumeration describing the available options. | 
|---|
| 271 |          * | 
|---|
| 272 |          * @return an enumeration of all the available options. | 
|---|
| 273 |          */ | 
|---|
| 274 |         public Enumeration listOptions() { | 
|---|
| 275 |                 Vector newVector = new Vector(4); | 
|---|
| 276 |  | 
|---|
| 277 |                 newVector.addElement(new Option("\tTabu list length", "L", 1, "-L <integer>")); | 
|---|
| 278 |                 newVector.addElement(new Option("\tNumber of runs", "U", 1, "-U <integer>")); | 
|---|
| 279 |                 newVector.addElement(new Option("\tMaximum number of parents", "P", 1, "-P <nr of parents>")); | 
|---|
| 280 |                 newVector.addElement(new Option("\tUse arc reversal operation.\n\t(default false)", "R", 0, "-R")); | 
|---|
| 281 |  | 
|---|
| 282 |                 Enumeration enu = super.listOptions(); | 
|---|
| 283 |                 while (enu.hasMoreElements()) { | 
|---|
| 284 |                         newVector.addElement(enu.nextElement()); | 
|---|
| 285 |                 } | 
|---|
| 286 |                 return newVector.elements(); | 
|---|
| 287 |         } // listOptions | 
|---|
| 288 |  | 
|---|
| 289 |         /** | 
|---|
| 290 |          * Parses a given list of options. <p/> | 
|---|
| 291 |          * | 
|---|
| 292 |          <!-- options-start --> | 
|---|
| 293 |          * Valid options are: <p/> | 
|---|
| 294 |          *  | 
|---|
| 295 |          * <pre> -L <integer> | 
|---|
| 296 |          *  Tabu list length</pre> | 
|---|
| 297 |          *  | 
|---|
| 298 |          * <pre> -U <integer> | 
|---|
| 299 |          *  Number of runs</pre> | 
|---|
| 300 |          *  | 
|---|
| 301 |          * <pre> -P <nr of parents> | 
|---|
| 302 |          *  Maximum number of parents</pre> | 
|---|
| 303 |          *  | 
|---|
| 304 |          * <pre> -R | 
|---|
| 305 |          *  Use arc reversal operation. | 
|---|
| 306 |          *  (default false)</pre> | 
|---|
| 307 |          *  | 
|---|
| 308 |          * <pre> -P <nr of parents> | 
|---|
| 309 |          *  Maximum number of parents</pre> | 
|---|
| 310 |          *  | 
|---|
| 311 |          * <pre> -R | 
|---|
| 312 |          *  Use arc reversal operation. | 
|---|
| 313 |          *  (default false)</pre> | 
|---|
| 314 |          *  | 
|---|
| 315 |          * <pre> -N | 
|---|
| 316 |          *  Initial structure is empty (instead of Naive Bayes)</pre> | 
|---|
| 317 |          *  | 
|---|
| 318 |          * <pre> -mbc | 
|---|
| 319 |          *  Applies a Markov Blanket correction to the network structure,  | 
|---|
| 320 |          *  after a network structure is learned. This ensures that all  | 
|---|
| 321 |          *  nodes in the network are part of the Markov blanket of the  | 
|---|
| 322 |          *  classifier node.</pre> | 
|---|
| 323 |          *  | 
|---|
| 324 |          * <pre> -S [LOO-CV|k-Fold-CV|Cumulative-CV] | 
|---|
| 325 |          *  Score type (LOO-CV,k-Fold-CV,Cumulative-CV)</pre> | 
|---|
| 326 |          *  | 
|---|
| 327 |          * <pre> -Q | 
|---|
| 328 |          *  Use probabilistic or 0/1 scoring. | 
|---|
| 329 |          *  (default probabilistic scoring)</pre> | 
|---|
| 330 |          *  | 
|---|
| 331 |          <!-- options-end --> | 
|---|
| 332 |          * | 
|---|
| 333 |          * @param options the list of options as an array of strings | 
|---|
| 334 |          * @throws Exception if an option is not supported | 
|---|
| 335 |          */ | 
|---|
| 336 |         public void setOptions(String[] options) throws Exception { | 
|---|
| 337 |                 String sTabuList = Utils.getOption('L', options); | 
|---|
| 338 |                 if (sTabuList.length() != 0) { | 
|---|
| 339 |                         setTabuList(Integer.parseInt(sTabuList)); | 
|---|
| 340 |                 } | 
|---|
| 341 |                 String sRuns = Utils.getOption('U', options); | 
|---|
| 342 |                 if (sRuns.length() != 0) { | 
|---|
| 343 |                         setRuns(Integer.parseInt(sRuns)); | 
|---|
| 344 |                 } | 
|---|
| 345 |                  | 
|---|
| 346 |                 super.setOptions(options); | 
|---|
| 347 |         } // setOptions | 
|---|
| 348 |  | 
|---|
| 349 |         /** | 
|---|
| 350 |          * Gets the current settings of the search algorithm. | 
|---|
| 351 |          * | 
|---|
| 352 |          * @return an array of strings suitable for passing to setOptions | 
|---|
| 353 |          */ | 
|---|
| 354 |         public String[] getOptions() { | 
|---|
| 355 |                 String[] superOptions = super.getOptions(); | 
|---|
| 356 |                 String[] options = new String[7 + superOptions.length]; | 
|---|
| 357 |                 int current = 0; | 
|---|
| 358 |                  | 
|---|
| 359 |                 options[current++] = "-L"; | 
|---|
| 360 |                 options[current++] = "" + getTabuList(); | 
|---|
| 361 |  | 
|---|
| 362 |                 options[current++] = "-U"; | 
|---|
| 363 |                 options[current++] = "" + getRuns(); | 
|---|
| 364 |  | 
|---|
| 365 |                 // insert options from parent class | 
|---|
| 366 |                 for (int iOption = 0; iOption < superOptions.length; iOption++) { | 
|---|
| 367 |                         options[current++] = superOptions[iOption]; | 
|---|
| 368 |                 } | 
|---|
| 369 |  | 
|---|
| 370 |                 // Fill up rest with empty strings, not nulls! | 
|---|
| 371 |                 while (current < options.length) { | 
|---|
| 372 |                         options[current++] = ""; | 
|---|
| 373 |                 } | 
|---|
| 374 |                 return options; | 
|---|
| 375 |         } // getOptions | 
|---|
| 376 |  | 
|---|
| 377 |         /** | 
|---|
| 378 |          * This will return a string describing the classifier. | 
|---|
| 379 |          * @return The string. | 
|---|
| 380 |          */ | 
|---|
| 381 |         public String globalInfo() { | 
|---|
| 382 |                 return "This Bayes Network learning algorithm uses tabu search for finding a well scoring " + | 
|---|
| 383 |                 "Bayes network structure. Tabu search is hill climbing till an optimum is reached. The " + | 
|---|
| 384 |                 "following step is the least worst possible step. The last X steps are kept in a list and " + | 
|---|
| 385 |                 "none of the steps in this so called tabu list is considered in taking the next step. " + | 
|---|
| 386 |                 "The best network found in this traversal is returned.\n\n" | 
|---|
| 387 |                 + "For more information see:\n\n" | 
|---|
| 388 |                 + getTechnicalInformation().toString(); | 
|---|
| 389 |         } // globalInfo | 
|---|
| 390 |          | 
|---|
| 391 |         /** | 
|---|
| 392 |          * @return a string to describe the Runs option. | 
|---|
| 393 |          */ | 
|---|
| 394 |         public String runsTipText() { | 
|---|
| 395 |           return "Sets the number of steps to be performed."; | 
|---|
| 396 |         } // runsTipText | 
|---|
| 397 |  | 
|---|
| 398 |         /** | 
|---|
| 399 |          * @return a string to describe the TabuList option. | 
|---|
| 400 |          */ | 
|---|
| 401 |         public String tabuListTipText() { | 
|---|
| 402 |           return "Sets the length of the tabu list."; | 
|---|
| 403 |         } // tabuListTipText | 
|---|
| 404 |  | 
|---|
| 405 |         /** | 
|---|
| 406 |          * Returns the revision string. | 
|---|
| 407 |          *  | 
|---|
| 408 |          * @return              the revision | 
|---|
| 409 |          */ | 
|---|
| 410 |         public String getRevision() { | 
|---|
| 411 |           return RevisionUtils.extract("$Revision: 1.5 $"); | 
|---|
| 412 |         } | 
|---|
| 413 |  | 
|---|
| 414 | } // TabuSearch | 
|---|