1 | /* |
---|
2 | * This program is free software; you can redistribute it and/or modify |
---|
3 | * it under the terms of the GNU General Public License as published by |
---|
4 | * the Free Software Foundation; either version 2 of the License, or |
---|
5 | * (at your option) any later version. |
---|
6 | * |
---|
7 | * This program is distributed in the hope that it will be useful, |
---|
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
10 | * GNU General Public License for more details. |
---|
11 | * |
---|
12 | * You should have received a copy of the GNU General Public License |
---|
13 | * along with this program; if not, write to the Free Software |
---|
14 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
---|
15 | */ |
---|
16 | |
---|
17 | /* |
---|
18 | * DataGenerator.java |
---|
19 | * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand |
---|
20 | * |
---|
21 | */ |
---|
22 | |
---|
23 | package weka.datagenerators; |
---|
24 | |
---|
25 | import weka.core.Instance; |
---|
26 | import weka.core.Instances; |
---|
27 | import weka.core.Option; |
---|
28 | import weka.core.OptionHandler; |
---|
29 | import weka.core.Randomizable; |
---|
30 | import weka.core.RevisionHandler; |
---|
31 | import weka.core.Utils; |
---|
32 | |
---|
33 | import java.io.FileOutputStream; |
---|
34 | import java.io.PrintWriter; |
---|
35 | import java.io.Serializable; |
---|
36 | import java.io.StringWriter; |
---|
37 | import java.util.Enumeration; |
---|
38 | import java.util.HashSet; |
---|
39 | import java.util.Hashtable; |
---|
40 | import java.util.Random; |
---|
41 | import java.util.Vector; |
---|
42 | |
---|
43 | /** |
---|
44 | * Abstract superclass for data generators that generate data for |
---|
45 | * classifiers and clusterers. |
---|
46 | * |
---|
47 | * @author FracPete (fracpete at waikato dot ac dot nz) |
---|
48 | * @version $Revision: 1.8 $ |
---|
49 | */ |
---|
50 | public abstract class DataGenerator |
---|
51 | implements OptionHandler, Randomizable, Serializable, RevisionHandler { |
---|
52 | |
---|
53 | /** for serialization */ |
---|
54 | private static final long serialVersionUID = -3698585946221802578L; |
---|
55 | |
---|
56 | /** Debugging mode */ |
---|
57 | protected boolean m_Debug = false; |
---|
58 | |
---|
59 | /** The format for the generated dataset */ |
---|
60 | protected Instances m_DatasetFormat = null; |
---|
61 | |
---|
62 | /** Relation name the dataset should have */ |
---|
63 | protected String m_RelationName = ""; |
---|
64 | |
---|
65 | /** Number of instances that should be produced into the dataset |
---|
66 | * this number is by default m_NumExamples, |
---|
67 | * but can be reset by the generator |
---|
68 | */ |
---|
69 | protected int m_NumExamplesAct; |
---|
70 | |
---|
71 | /** default output (is printed to stdout after generation) */ |
---|
72 | protected transient StringWriter m_DefaultOutput = new StringWriter(); |
---|
73 | |
---|
74 | /** PrintWriter for outputting the generated data */ |
---|
75 | protected transient PrintWriter m_Output = new PrintWriter(m_DefaultOutput); |
---|
76 | |
---|
77 | /** random number generator seed*/ |
---|
78 | protected int m_Seed; |
---|
79 | |
---|
80 | /** random number generator*/ |
---|
81 | protected Random m_Random = null; |
---|
82 | |
---|
83 | /** flag, that indicates whether the relationname is currently assembled */ |
---|
84 | protected boolean m_CreatingRelationName = false; |
---|
85 | |
---|
86 | /** a black list for options not to be listed (for derived generators) |
---|
87 | * in the makeOptionString method |
---|
88 | * @see #makeOptionString(DataGenerator) */ |
---|
89 | protected static HashSet m_OptionBlacklist; |
---|
90 | static { |
---|
91 | m_OptionBlacklist = new HashSet(); |
---|
92 | } |
---|
93 | |
---|
94 | /** |
---|
95 | * initializes with default settings. <br/> |
---|
96 | * Note: default values are set via a default<name> method. These |
---|
97 | * default methods are also used in the listOptions method and in the |
---|
98 | * setOptions method. Why? Derived generators can override the return value |
---|
99 | * of these default methods, to avoid exceptions. |
---|
100 | */ |
---|
101 | public DataGenerator() { |
---|
102 | clearBlacklist(); |
---|
103 | |
---|
104 | setNumExamplesAct(defaultNumExamplesAct()); |
---|
105 | setSeed(defaultSeed()); |
---|
106 | } |
---|
107 | |
---|
108 | /** |
---|
109 | * creates a vector out of the enumeration from the listOptions of the |
---|
110 | * super class. Only a "convenience" method. |
---|
111 | * @param enm the Enumeration to dump into a vector |
---|
112 | * @return the elements of the enumeration in a vector |
---|
113 | */ |
---|
114 | protected Vector enumToVector(Enumeration enm) { |
---|
115 | Vector result; |
---|
116 | |
---|
117 | result = new Vector(); |
---|
118 | |
---|
119 | while (enm.hasMoreElements()) |
---|
120 | result.add(enm.nextElement()); |
---|
121 | |
---|
122 | return result; |
---|
123 | } |
---|
124 | |
---|
125 | /** |
---|
126 | * Returns an enumeration describing the available options. |
---|
127 | * |
---|
128 | * @return an enumeration of all the available options |
---|
129 | */ |
---|
130 | public Enumeration listOptions() { |
---|
131 | Vector result; |
---|
132 | |
---|
133 | result = new Vector(); |
---|
134 | |
---|
135 | result.addElement(new Option( |
---|
136 | "\tPrints this help.", |
---|
137 | "h", 1, "-h")); |
---|
138 | |
---|
139 | result.addElement(new Option( |
---|
140 | "\tThe name of the output file, otherwise the generated data is\n" |
---|
141 | + "\tprinted to stdout.", |
---|
142 | "o", 1, "-o <file>")); |
---|
143 | |
---|
144 | result.addElement(new Option( |
---|
145 | "\tThe name of the relation.", |
---|
146 | "r", 1, "-r <name>")); |
---|
147 | |
---|
148 | result.addElement(new Option( |
---|
149 | "\tWhether to print debug informations.", |
---|
150 | "d", 0, "-d")); |
---|
151 | |
---|
152 | result.addElement(new Option( |
---|
153 | "\tThe seed for random function (default " |
---|
154 | + defaultSeed() + ")", |
---|
155 | "S", 1, "-S")); |
---|
156 | |
---|
157 | return result.elements(); |
---|
158 | } |
---|
159 | |
---|
160 | /** |
---|
161 | * Parses a list of options for this object. <p/> |
---|
162 | * |
---|
163 | * For list of valid options see class description. <p/> |
---|
164 | * |
---|
165 | * @param options the list of options as an array of strings |
---|
166 | * @throws Exception if an option is not supported |
---|
167 | */ |
---|
168 | public void setOptions(String[] options) throws Exception { |
---|
169 | String tmpStr; |
---|
170 | |
---|
171 | // remove unwanted options |
---|
172 | options = removeBlacklist(options); |
---|
173 | |
---|
174 | tmpStr = Utils.getOption('r', options); |
---|
175 | if (tmpStr.length() != 0) |
---|
176 | setRelationName(Utils.unquote(tmpStr)); |
---|
177 | else |
---|
178 | setRelationName(""); |
---|
179 | |
---|
180 | tmpStr = Utils.getOption('o', options); |
---|
181 | if (tmpStr.length() != 0) |
---|
182 | setOutput(new PrintWriter(new FileOutputStream(tmpStr))); |
---|
183 | else if (getOutput() == null) |
---|
184 | throw new Exception("No Output defined!"); |
---|
185 | |
---|
186 | setDebug(Utils.getFlag('d', options)); |
---|
187 | |
---|
188 | tmpStr = Utils.getOption('S', options); |
---|
189 | if (tmpStr.length() != 0) |
---|
190 | setSeed(Integer.parseInt(tmpStr)); |
---|
191 | else |
---|
192 | setSeed(defaultSeed()); |
---|
193 | } |
---|
194 | |
---|
195 | /** |
---|
196 | * Gets the current settings of the datagenerator RDG1. Removing of |
---|
197 | * blacklisted options has to be done in the derived class, that defines |
---|
198 | * the blacklist-entry. |
---|
199 | * |
---|
200 | * @return an array of strings suitable for passing to setOptions |
---|
201 | * @see #removeBlacklist(String[]) |
---|
202 | */ |
---|
203 | public String[] getOptions() { |
---|
204 | Vector result; |
---|
205 | |
---|
206 | result = new Vector(); |
---|
207 | |
---|
208 | // to avoid endless loop |
---|
209 | if (!m_CreatingRelationName) { |
---|
210 | result.add("-r"); |
---|
211 | result.add(Utils.quote(getRelationNameToUse())); |
---|
212 | } |
---|
213 | |
---|
214 | if (getDebug()) |
---|
215 | result.add("-d"); |
---|
216 | |
---|
217 | result.add("-S"); |
---|
218 | result.add("" + getSeed()); |
---|
219 | |
---|
220 | return (String[]) result.toArray(new String[result.size()]); |
---|
221 | } |
---|
222 | |
---|
223 | /** |
---|
224 | * Initializes the format for the dataset produced. |
---|
225 | * Must be called before the generateExample or generateExamples |
---|
226 | * methods are used. Also sets a default relation name in case |
---|
227 | * the current relation name is empty. |
---|
228 | * |
---|
229 | * @return the format for the dataset |
---|
230 | * @throws Exception if the generating of the format failed |
---|
231 | * @see #defaultRelationName() |
---|
232 | */ |
---|
233 | public Instances defineDataFormat() throws Exception { |
---|
234 | if (getRelationName().length() == 0) |
---|
235 | setRelationName(defaultRelationName()); |
---|
236 | |
---|
237 | return m_DatasetFormat; |
---|
238 | } |
---|
239 | |
---|
240 | /** |
---|
241 | * Generates one example of the dataset. |
---|
242 | * |
---|
243 | * @return the generated example |
---|
244 | * @throws Exception if the format of the dataset is not yet defined |
---|
245 | * @throws Exception if the generator only works with generateExamples |
---|
246 | * which means in non single mode |
---|
247 | */ |
---|
248 | public abstract Instance generateExample() throws Exception; |
---|
249 | |
---|
250 | /** |
---|
251 | * Generates all examples of the dataset. |
---|
252 | * |
---|
253 | * @return the generated dataset |
---|
254 | * @throws Exception if the format of the dataset is not yet defined |
---|
255 | * @throws Exception if the generator only works with generateExample, |
---|
256 | * which means in single mode |
---|
257 | */ |
---|
258 | public abstract Instances generateExamples() throws Exception; |
---|
259 | |
---|
260 | /** |
---|
261 | * Generates a comment string that documentates the data generator. |
---|
262 | * By default this string is added at the beginning of the produced output |
---|
263 | * as ARFF file type, next after the options. |
---|
264 | * |
---|
265 | * @return string contains info about the generated rules |
---|
266 | * @throws Exception if the generating of the documentation fails |
---|
267 | */ |
---|
268 | public abstract String generateStart () throws Exception; |
---|
269 | |
---|
270 | /** |
---|
271 | * Generates a comment string that documentates the data generator. |
---|
272 | * By default this string is added at the end of the produced output |
---|
273 | * as ARFF file type. |
---|
274 | * |
---|
275 | * @return string contains info about the generated rules |
---|
276 | * @throws Exception if the generating of the documentation fails |
---|
277 | */ |
---|
278 | public abstract String generateFinished () throws Exception; |
---|
279 | |
---|
280 | /** |
---|
281 | * Return if single mode is set for the given data generator |
---|
282 | * mode depends on option setting and or generator type. |
---|
283 | * |
---|
284 | * @return single mode flag |
---|
285 | * @throws Exception if mode is not set yet |
---|
286 | */ |
---|
287 | public abstract boolean getSingleModeFlag () throws Exception; |
---|
288 | |
---|
289 | /** |
---|
290 | * Sets the debug flag. |
---|
291 | * @param debug the new debug flag |
---|
292 | */ |
---|
293 | public void setDebug(boolean debug) { |
---|
294 | m_Debug = debug; |
---|
295 | } |
---|
296 | |
---|
297 | /** |
---|
298 | * Gets the debug flag. |
---|
299 | * @return the debug flag |
---|
300 | */ |
---|
301 | public boolean getDebug() { |
---|
302 | return m_Debug; |
---|
303 | } |
---|
304 | |
---|
305 | /** |
---|
306 | * Returns the tip text for this property |
---|
307 | * |
---|
308 | * @return tip text for this property suitable for |
---|
309 | * displaying in the explorer/experimenter gui |
---|
310 | */ |
---|
311 | public String debugTipText() { |
---|
312 | return "Whether the generator is run in debug mode or not."; |
---|
313 | } |
---|
314 | |
---|
315 | /** |
---|
316 | * Sets the relation name the dataset should have. |
---|
317 | * @param relationName the new relation name |
---|
318 | */ |
---|
319 | public void setRelationName(String relationName) { |
---|
320 | m_RelationName = relationName; |
---|
321 | } |
---|
322 | |
---|
323 | /** |
---|
324 | * returns a relation name based on the options |
---|
325 | * |
---|
326 | * @return a relation name based on the options |
---|
327 | */ |
---|
328 | protected String defaultRelationName() { |
---|
329 | StringBuffer result; |
---|
330 | String[] options; |
---|
331 | String option; |
---|
332 | int i; |
---|
333 | |
---|
334 | m_CreatingRelationName = true; |
---|
335 | |
---|
336 | result = new StringBuffer(this.getClass().getName()); |
---|
337 | |
---|
338 | options = getOptions(); |
---|
339 | for (i = 0; i < options.length; i++) { |
---|
340 | option = options[i].trim(); |
---|
341 | if (i > 0) |
---|
342 | result.append("_"); |
---|
343 | result.append(option.replaceAll(" ", "_")); |
---|
344 | } |
---|
345 | |
---|
346 | m_CreatingRelationName = false; |
---|
347 | |
---|
348 | return result.toString(); |
---|
349 | } |
---|
350 | |
---|
351 | /** |
---|
352 | * returns the relation name to use, i.e., in case the currently set |
---|
353 | * relation name is empty, a generic one is returned. Must be used in |
---|
354 | * defineDataFormat() |
---|
355 | * @return the relation name |
---|
356 | * @see #defaultRelationName() |
---|
357 | * @see #defineDataFormat() |
---|
358 | */ |
---|
359 | protected String getRelationNameToUse() { |
---|
360 | String result; |
---|
361 | |
---|
362 | result = getRelationName(); |
---|
363 | if (result.length() == 0) |
---|
364 | result = defaultRelationName(); |
---|
365 | |
---|
366 | return result; |
---|
367 | } |
---|
368 | |
---|
369 | /** |
---|
370 | * Gets the relation name the dataset should have. |
---|
371 | * @return the relation name the dataset should have |
---|
372 | */ |
---|
373 | public String getRelationName() { |
---|
374 | return m_RelationName; |
---|
375 | } |
---|
376 | |
---|
377 | /** |
---|
378 | * Returns the tip text for this property |
---|
379 | * |
---|
380 | * @return tip text for this property suitable for |
---|
381 | * displaying in the explorer/experimenter gui |
---|
382 | */ |
---|
383 | public String relationNameTipText() { |
---|
384 | return "The relation name of the generated data (if empty, a generic one will be supplied)."; |
---|
385 | } |
---|
386 | |
---|
387 | /** |
---|
388 | * returns the default number of actual examples |
---|
389 | * |
---|
390 | * @return the default number of actual examples |
---|
391 | */ |
---|
392 | protected int defaultNumExamplesAct() { |
---|
393 | return 0; |
---|
394 | } |
---|
395 | |
---|
396 | /** |
---|
397 | * Sets the number of examples the dataset should have. |
---|
398 | * @param numExamplesAct the new number of examples |
---|
399 | */ |
---|
400 | protected void setNumExamplesAct(int numExamplesAct) { |
---|
401 | m_NumExamplesAct = numExamplesAct; |
---|
402 | } |
---|
403 | |
---|
404 | /** |
---|
405 | * Gets the number of examples the dataset should have. |
---|
406 | * @return the number of examples the dataset should have |
---|
407 | */ |
---|
408 | public int getNumExamplesAct() { |
---|
409 | return m_NumExamplesAct; |
---|
410 | } |
---|
411 | |
---|
412 | /** |
---|
413 | * Returns the tip text for this property |
---|
414 | * |
---|
415 | * @return tip text for this property suitable for |
---|
416 | * displaying in the explorer/experimenter gui |
---|
417 | */ |
---|
418 | protected String numExamplesActTipText() { |
---|
419 | return "The actual number of examples to generate."; |
---|
420 | } |
---|
421 | |
---|
422 | /** |
---|
423 | * Sets the print writer. |
---|
424 | * @param newOutput the new print writer |
---|
425 | */ |
---|
426 | public void setOutput(PrintWriter newOutput) { |
---|
427 | m_Output = newOutput; |
---|
428 | m_DefaultOutput = null; |
---|
429 | } |
---|
430 | |
---|
431 | /** |
---|
432 | * Gets the print writer. |
---|
433 | * @return print writer object |
---|
434 | */ |
---|
435 | public PrintWriter getOutput() { |
---|
436 | return m_Output; |
---|
437 | } |
---|
438 | |
---|
439 | /** |
---|
440 | * Gets the string writer, which is used for outputting to stdout. |
---|
441 | * A workaround for the problem of closing stdout when closing the |
---|
442 | * associated Printwriter. |
---|
443 | * @return print string writer object |
---|
444 | */ |
---|
445 | public StringWriter defaultOutput() { |
---|
446 | return m_DefaultOutput; |
---|
447 | } |
---|
448 | |
---|
449 | /** |
---|
450 | * Returns the tip text for this property |
---|
451 | * |
---|
452 | * @return tip text for this property suitable for |
---|
453 | * displaying in the explorer/experimenter gui |
---|
454 | */ |
---|
455 | public String outputTipText() { |
---|
456 | return "The output writer to use for printing the generated data."; |
---|
457 | } |
---|
458 | |
---|
459 | /** |
---|
460 | * Sets the format of the dataset that is to be generated. |
---|
461 | * @param newFormat the new dataset format of the dataset |
---|
462 | */ |
---|
463 | public void setDatasetFormat(Instances newFormat) { |
---|
464 | m_DatasetFormat = new Instances(newFormat, 0); |
---|
465 | } |
---|
466 | |
---|
467 | /** |
---|
468 | * Gets the format of the dataset that is to be generated. |
---|
469 | * @return the dataset format of the dataset |
---|
470 | */ |
---|
471 | public Instances getDatasetFormat() { |
---|
472 | if (m_DatasetFormat != null) |
---|
473 | return new Instances(m_DatasetFormat, 0); |
---|
474 | else |
---|
475 | return null; |
---|
476 | } |
---|
477 | |
---|
478 | /** |
---|
479 | * Returns the tip text for this property |
---|
480 | * |
---|
481 | * @return tip text for this property suitable for |
---|
482 | * displaying in the explorer/experimenter gui |
---|
483 | */ |
---|
484 | public String formatTipText() { |
---|
485 | return "The data format to use."; |
---|
486 | } |
---|
487 | |
---|
488 | /** |
---|
489 | * returns the default seed |
---|
490 | * |
---|
491 | * @return the default seed |
---|
492 | */ |
---|
493 | protected int defaultSeed() { |
---|
494 | return 1; |
---|
495 | } |
---|
496 | |
---|
497 | /** |
---|
498 | * Gets the random number seed. |
---|
499 | * |
---|
500 | * @return the random number seed. |
---|
501 | */ |
---|
502 | public int getSeed() { |
---|
503 | return m_Seed; |
---|
504 | } |
---|
505 | |
---|
506 | /** |
---|
507 | * Sets the random number seed. |
---|
508 | * |
---|
509 | * @param newSeed the new random number seed. |
---|
510 | */ |
---|
511 | public void setSeed(int newSeed) { |
---|
512 | m_Seed = newSeed; |
---|
513 | m_Random = new Random(newSeed); |
---|
514 | } |
---|
515 | |
---|
516 | /** |
---|
517 | * Returns the tip text for this property |
---|
518 | * |
---|
519 | * @return tip text for this property suitable for |
---|
520 | * displaying in the explorer/experimenter gui |
---|
521 | */ |
---|
522 | public String seedTipText() { |
---|
523 | return "The seed value for the random number generator."; |
---|
524 | } |
---|
525 | |
---|
526 | /** |
---|
527 | * Gets the random generator. |
---|
528 | * |
---|
529 | * @return the random generator |
---|
530 | */ |
---|
531 | public Random getRandom() { |
---|
532 | if (m_Random == null) |
---|
533 | m_Random = new Random (getSeed()); |
---|
534 | |
---|
535 | return m_Random; |
---|
536 | } |
---|
537 | |
---|
538 | /** |
---|
539 | * Sets the random generator. |
---|
540 | * |
---|
541 | * @param newRandom is the random generator. |
---|
542 | */ |
---|
543 | public void setRandom(Random newRandom) { |
---|
544 | m_Random = newRandom; |
---|
545 | } |
---|
546 | |
---|
547 | /** |
---|
548 | * Returns the tip text for this property |
---|
549 | * |
---|
550 | * @return tip text for this property suitable for |
---|
551 | * displaying in the explorer/experimenter gui |
---|
552 | */ |
---|
553 | public String randomTipText() { |
---|
554 | return "The random number generator to use."; |
---|
555 | } |
---|
556 | |
---|
557 | /** |
---|
558 | * Returns a string representing the dataset in the instance queue. |
---|
559 | * @return the string representing the output data format |
---|
560 | */ |
---|
561 | protected String toStringFormat() { |
---|
562 | if (m_DatasetFormat == null) |
---|
563 | return ""; |
---|
564 | return |
---|
565 | m_DatasetFormat.toString(); |
---|
566 | } |
---|
567 | |
---|
568 | /** |
---|
569 | * removes all entries from the options blacklist |
---|
570 | */ |
---|
571 | protected static void clearBlacklist() { |
---|
572 | m_OptionBlacklist.clear(); |
---|
573 | } |
---|
574 | |
---|
575 | /** |
---|
576 | * adds the given option, e.g., for "-V" use "V", to the blacklist of options |
---|
577 | * that are not to be output via the makeOptionString method |
---|
578 | * @param option the option to exclude from listing |
---|
579 | * @see #makeOptionString(DataGenerator) |
---|
580 | */ |
---|
581 | protected static void addToBlacklist(String option) { |
---|
582 | m_OptionBlacklist.add(option); |
---|
583 | } |
---|
584 | |
---|
585 | /** |
---|
586 | * checks, whether the given option is in the blacklist of options not to |
---|
587 | * be output by makeOptionString |
---|
588 | * @param option the option to check |
---|
589 | * @return true if the option is on the blacklist |
---|
590 | * @see #makeOptionString(DataGenerator) |
---|
591 | */ |
---|
592 | protected static boolean isOnBlacklist(String option) { |
---|
593 | return m_OptionBlacklist.contains(option); |
---|
594 | } |
---|
595 | |
---|
596 | /** |
---|
597 | * removes all the options from the options array that are blacklisted |
---|
598 | * |
---|
599 | * @param options the options to remove from the blacklist |
---|
600 | * @return the processed options array |
---|
601 | */ |
---|
602 | protected String[] removeBlacklist(String[] options) { |
---|
603 | Enumeration enm; |
---|
604 | Hashtable pool; |
---|
605 | Option option; |
---|
606 | |
---|
607 | // retrieve options that are on blacklist |
---|
608 | enm = listOptions(); |
---|
609 | pool = new Hashtable(); |
---|
610 | while (enm.hasMoreElements()) { |
---|
611 | option = (Option) enm.nextElement(); |
---|
612 | if (isOnBlacklist(option.name())) |
---|
613 | pool.put(option.name(), option); |
---|
614 | } |
---|
615 | |
---|
616 | // remove options |
---|
617 | enm = pool.keys(); |
---|
618 | while (enm.hasMoreElements()) { |
---|
619 | option = (Option) pool.get(enm.nextElement()); |
---|
620 | try { |
---|
621 | if (option.numArguments() == 0) |
---|
622 | Utils.getFlag(option.name(), options); |
---|
623 | else |
---|
624 | Utils.getOption(option.name(), options); |
---|
625 | } |
---|
626 | catch (Exception e) { |
---|
627 | e.printStackTrace(); |
---|
628 | } |
---|
629 | } |
---|
630 | |
---|
631 | return options; |
---|
632 | } |
---|
633 | |
---|
634 | /** |
---|
635 | * returns all the options in a string |
---|
636 | * |
---|
637 | * @param generator the DataGenerator to return all the options for |
---|
638 | * @return the assembled option string |
---|
639 | */ |
---|
640 | protected static String makeOptionString(DataGenerator generator) { |
---|
641 | StringBuffer result; |
---|
642 | Enumeration enm; |
---|
643 | Option option; |
---|
644 | |
---|
645 | result = new StringBuffer(); |
---|
646 | result.append("\nData Generator options:\n\n"); |
---|
647 | |
---|
648 | enm = generator.listOptions(); |
---|
649 | while (enm.hasMoreElements()) { |
---|
650 | option = (Option) enm.nextElement(); |
---|
651 | // skip option if on blacklist |
---|
652 | if (isOnBlacklist(option.name())) |
---|
653 | continue; |
---|
654 | result.append(option.synopsis() + "\n" + option.description() + "\n"); |
---|
655 | } |
---|
656 | |
---|
657 | return result.toString(); |
---|
658 | } |
---|
659 | |
---|
660 | /** |
---|
661 | * Calls the data generator. |
---|
662 | * |
---|
663 | * @param generator one of the data generators |
---|
664 | * @param options options of the data generator |
---|
665 | * @throws Exception if there was an error in the option list |
---|
666 | */ |
---|
667 | public static void makeData(DataGenerator generator, String[] options) |
---|
668 | throws Exception { |
---|
669 | |
---|
670 | boolean printhelp; |
---|
671 | Vector unknown; |
---|
672 | int i; |
---|
673 | |
---|
674 | // help? |
---|
675 | printhelp = (Utils.getFlag('h', options)); |
---|
676 | |
---|
677 | // read options |
---|
678 | if (!printhelp) { |
---|
679 | try { |
---|
680 | options = generator.removeBlacklist(options); |
---|
681 | generator.setOptions(options); |
---|
682 | |
---|
683 | // check for left-over options, but don't raise exception |
---|
684 | unknown = new Vector(); |
---|
685 | for (i = 0; i < options.length; i++) { |
---|
686 | if (options[i].length() != 0) |
---|
687 | unknown.add(options[i]); |
---|
688 | } |
---|
689 | if (unknown.size() > 0) { |
---|
690 | System.out.print("Unknown options:"); |
---|
691 | for (i = 0; i < unknown.size(); i++) |
---|
692 | System.out.print(" " + unknown.get(i)); |
---|
693 | System.out.println(); |
---|
694 | } |
---|
695 | } |
---|
696 | catch (Exception e) { |
---|
697 | e.printStackTrace(); |
---|
698 | printhelp = true; |
---|
699 | } |
---|
700 | } |
---|
701 | |
---|
702 | if (printhelp) { |
---|
703 | System.out.println(makeOptionString(generator)); |
---|
704 | return; |
---|
705 | } |
---|
706 | |
---|
707 | // define dataset format |
---|
708 | // computes actual number of examples to be produced |
---|
709 | generator.setDatasetFormat(generator.defineDataFormat()); |
---|
710 | |
---|
711 | // get print writer |
---|
712 | PrintWriter output = generator.getOutput(); |
---|
713 | |
---|
714 | // output of options |
---|
715 | output.println("%"); |
---|
716 | output.println("% Commandline"); |
---|
717 | output.println("%"); |
---|
718 | output.println("% " + generator.getClass().getName() + " " |
---|
719 | + Utils.joinOptions(generator.getOptions())); |
---|
720 | output.println("%"); |
---|
721 | |
---|
722 | // comment at beginning of ARFF File |
---|
723 | String commentAtStart = generator.generateStart(); |
---|
724 | |
---|
725 | if (commentAtStart.length() > 0) { |
---|
726 | output.println("%"); |
---|
727 | output.println("% Prologue"); |
---|
728 | output.println("%"); |
---|
729 | output.println(commentAtStart.trim()); |
---|
730 | output.println("%"); |
---|
731 | } |
---|
732 | |
---|
733 | // ask data generator which mode |
---|
734 | boolean singleMode = generator.getSingleModeFlag(); |
---|
735 | |
---|
736 | // start data producer |
---|
737 | if (singleMode) { |
---|
738 | // output of dataset header |
---|
739 | output.println(generator.toStringFormat()); |
---|
740 | for (i = 0; i < generator.getNumExamplesAct(); i++) { |
---|
741 | // over all examples to be produced |
---|
742 | Instance inst = generator.generateExample(); |
---|
743 | output.println(inst); |
---|
744 | } |
---|
745 | } |
---|
746 | else { // generator produces all instances at once |
---|
747 | Instances dataset = generator.generateExamples(); |
---|
748 | // output of dataset |
---|
749 | output.println(dataset); |
---|
750 | } |
---|
751 | // comment at end of ARFF File |
---|
752 | String commentAtEnd = generator.generateFinished(); |
---|
753 | |
---|
754 | if (commentAtEnd.length() > 0) { |
---|
755 | output.println("%"); |
---|
756 | output.println("% Epilogue"); |
---|
757 | output.println("%"); |
---|
758 | output.println(commentAtEnd.trim()); |
---|
759 | output.println("%"); |
---|
760 | } |
---|
761 | |
---|
762 | output.flush(); |
---|
763 | output.close(); |
---|
764 | |
---|
765 | // print result to stdout? |
---|
766 | if (generator.defaultOutput() != null) |
---|
767 | System.out.println(generator.defaultOutput().toString()); |
---|
768 | } |
---|
769 | |
---|
770 | /** |
---|
771 | * runs the datagenerator instance with the given options. |
---|
772 | * |
---|
773 | * @param datagenerator the datagenerator to run |
---|
774 | * @param options the commandline options |
---|
775 | */ |
---|
776 | protected static void runDataGenerator(DataGenerator datagenerator, String[] options) { |
---|
777 | try { |
---|
778 | DataGenerator.makeData(datagenerator, options); |
---|
779 | } |
---|
780 | catch (Exception e) { |
---|
781 | if ( (e.getMessage() != null) |
---|
782 | && (e.getMessage().indexOf("Data Generator options") == -1) ) |
---|
783 | e.printStackTrace(); |
---|
784 | else |
---|
785 | System.err.println(e.getMessage()); |
---|
786 | } |
---|
787 | } |
---|
788 | } |
---|