001 /**
002 * Copyright (C) 2007-2011, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program. If not, see <http://www.gnu.org/licenses/>.
018 */
019
020 package org.dllearner.algorithms.gp;
021
022 import java.text.DecimalFormat;
023 import java.util.Arrays;
024 import java.util.Collection;
025 import java.util.Comparator;
026 import java.util.LinkedList;
027 import java.util.Random;
028 import java.util.Set;
029 import java.util.Map.Entry;
030
031 import org.dllearner.algorithms.hybridgp.Psi;
032 import org.dllearner.core.AbstractCELA;
033 import org.dllearner.core.AbstractLearningProblem;
034 import org.dllearner.core.AbstractReasonerComponent;
035 import org.dllearner.core.configurators.GPConfigurator;
036 import org.dllearner.core.options.BooleanConfigOption;
037 import org.dllearner.core.options.ConfigEntry;
038 import org.dllearner.core.options.ConfigOption;
039 import org.dllearner.core.options.DoubleConfigOption;
040 import org.dllearner.core.options.IntegerConfigOption;
041 import org.dllearner.core.options.InvalidConfigOptionValueException;
042 import org.dllearner.core.options.StringConfigOption;
043 import org.dllearner.core.owl.Description;
044 import org.dllearner.core.owl.Thing;
045 import org.dllearner.learningproblems.EvaluatedDescriptionPosNeg;
046 import org.dllearner.learningproblems.PosNegLP;
047 import org.dllearner.learningproblems.ScorePosNeg;
048 import org.dllearner.utilities.Helper;
049
050 /**
051 * This class implements the genetic programming (GP) algorithm and provides
052 * methods to configure and run it.
053 *
054 * @author Jens Lehmann
055 *
056 */
057 public class GP extends AbstractCELA {
058
059 private GPConfigurator configurator;
060 @Override
061 public GPConfigurator getConfigurator(){
062 return configurator;
063 }
064
065
066 // NumberFormat f;
067 DecimalFormat df = new DecimalFormat("0.00");
068
069 public enum AlgorithmType {
070 /**
071 * This is a type of algorithm, where the offspring completely replaces the
072 * previous generation.
073 */
074 GENERATIONAL,
075 /**
076 * In this type of algorithm offspring is produced by a number of indivuals.
077 * The offspring then replaces the weakest individuals of the previous
078 * generation.
079 */
080 STEADY_STATE};
081
082 public enum SelectionType {
083 /**
084 * Rank selection is a selection type, where the probability of being
085 * selected depends only on the order of the fitness of all individuals.
086 */
087 RANK_SELECTION,
088 /**
089 * FPS (Fitness Proportionate Selection) is a selection type, where the
090 * probability of being selected is proportional to the fitness of an
091 * individual.
092 */
093 FPS,
094 TOURNAMENT_SELECTION};
095
096 // configuration options
097 private SelectionType selectionType = SelectionType.RANK_SELECTION;
098 private int tournamentSize = 3;
099 private boolean elitism = true;
100 private AlgorithmType algorithmType = AlgorithmType.STEADY_STATE;
101 private double mutationProbability = 0.03d;
102 private double crossoverProbability = 0.95d;
103 private double hillClimbingProbability = 0.0d;
104 private double refinementProbability = 0.0;
105 private int numberOfIndividuals = 100;
106 private int numberOfSelectedIndividuals = 96;
107 private boolean useFixedNumberOfGenerations = false;
108 private int generations = 20;
109 private int postConvergenceGenerations = 50;
110 private boolean adc = false;
111 private int initMinDepth = 4;
112 private int initMaxDepth = 6;
113 private int maxConceptLength = 75;
114 // private boolean useMultiStructures = true;
115
116 private Program[] individuals;
117
118 private Program fittestIndividual;
119
120 public int fittestIndividualGeneration;
121
122 private Comparator<Program> fitnessComparator;
123
124 private static Random rand = new Random();
125
126 private long startTime;
127
128 private ScorePosNeg bestScore;
129 private Description bestConcept;
130
131 // private GeneticRefinementOperator psi;
132 private Psi psi;
133
134
135 /**
136 * Creates an algorithm object. By default a steady state algorithm with
137 * rank selection is used. This operates
138 * on a population of 1000 individuals with a probability of crossover of
139 * 1.0 and a probability of mutation of 0.01.
140 *
141 */
142 public GP(PosNegLP learningProblem, AbstractReasonerComponent rs) {
143 super(learningProblem, rs);
144 this.configurator = new GPConfigurator(this);
145 }
146
147 public static String getName() {
148 return "genetic programming learning algorithm";
149 }
150
151 public static Collection<Class<? extends AbstractLearningProblem>> supportedLearningProblems() {
152 Collection<Class<? extends AbstractLearningProblem>> problems = new LinkedList<Class<? extends AbstractLearningProblem>>();
153 problems.add(PosNegLP.class);
154 return problems;
155 }
156
157 public static Collection<ConfigOption<?>> createConfigOptions() {
158 Collection<ConfigOption<?>> options = new LinkedList<ConfigOption<?>>();
159 StringConfigOption selectionType = new StringConfigOption("selectionType", "selection type", "rankSelection");
160 selectionType.setAllowedValues(new String[] {"rankSelection", "fps", "tournament"});
161 options.add(selectionType);
162 IntegerConfigOption tournamentSize = new IntegerConfigOption("tournamentSize", "tournament size (applies only to tournament selection)", 3);
163 tournamentSize.setLowerLimit(2);
164 tournamentSize.setUpperLimit(20);
165 options.add(tournamentSize);
166 options.add(new BooleanConfigOption("elitism", "specifies whether to use elitism in selection", true));
167 StringConfigOption algorithmType = new StringConfigOption("algorithmType", "algorithm type", "steadyState");
168 algorithmType.setAllowedValues(new String[] {"generational", "steadyState"});
169 options.add(algorithmType);
170 DoubleConfigOption mutationProb = new DoubleConfigOption("mutationProbability", "mutation probability", 0.03);
171 mutationProb.setLowerLimit(0.0);
172 mutationProb.setUpperLimit(1.0);
173 options.add(mutationProb);
174 DoubleConfigOption crossoverProb = new DoubleConfigOption("crossoverProbability", "crossover probability", 0.95);
175 crossoverProb.setLowerLimit(0.0);
176 crossoverProb.setUpperLimit(1.0);
177 options.add(crossoverProb);
178 DoubleConfigOption hillClimbingProb = new DoubleConfigOption("hillClimbingProbability", "hill climbing probability", 0.0);
179 hillClimbingProb.setLowerLimit(0.0);
180 hillClimbingProb.setUpperLimit(1.0);
181 options.add(hillClimbingProb);
182 DoubleConfigOption refinementProb = new DoubleConfigOption("refinementProbability", "refinement operator probability (values higher than 0 turn this into a hybrid GP algorithm - see publication)", 0.00);
183 refinementProb.setLowerLimit(0.0);
184 refinementProb.setUpperLimit(1.0);
185 options.add(refinementProb);
186 IntegerConfigOption numberOfInd = new IntegerConfigOption("numberOfIndividuals", "number of individuals", 100);
187 numberOfInd.setLowerLimit(1);
188 options.add(numberOfInd);
189 IntegerConfigOption numberOfSelInd = new IntegerConfigOption("numberOfSelectedIndividuals", "number of selected individuals", 92);
190 numberOfSelInd.setLowerLimit(1);
191 options.add(numberOfSelInd);
192 options.add(new BooleanConfigOption("useFixedNumberOfGenerations", "specifies whether to use a fixed number of generations", false));
193 IntegerConfigOption gen = new IntegerConfigOption("generations", "number of generations (only valid if a fixed number of generations is used)", 20);
194 gen.setLowerLimit(1);
195 options.add(gen);
196 IntegerConfigOption post = new IntegerConfigOption("postConvergenceGenerations", "number of generations after which to stop if no improvement wrt. the best solution has been achieved", 50);
197 post.setLowerLimit(1);
198 options.add(post);
199 options.add(new BooleanConfigOption("adc", "whether to use automatically defined concept (this invents new helper concepts, but enlarges the search space", false));
200 IntegerConfigOption initMin = new IntegerConfigOption("initMinDepth", "minimum depth to use when creating the initial population", 4);
201 initMin.setLowerLimit(1);
202 options.add(initMin);
203 IntegerConfigOption initMax = new IntegerConfigOption("initMaxDepth", "maximum depth to use when creating the initial population", 6);
204 initMax.setLowerLimit(1);
205 options.add(initMax);
206 IntegerConfigOption max = new IntegerConfigOption("maxConceptLength", "maximum concept length (higher length means lowest possible fitness)", 75);
207 max.setLowerLimit(1);
208 options.add(max);
209 // options.add(new BooleanConfigOption("useMultiStructures", "specifies whether to use e.g. (a AND b AND c) instead of ((A and b) and c) - there is no apparent reason to set this to false", true));
210 return options;
211 }
212
213 /* (non-Javadoc)
214 * @see org.dllearner.core.Component#applyConfigEntry(org.dllearner.core.ConfigEntry)
215 */
216 @Override
217 public <T> void applyConfigEntry(ConfigEntry<T> entry) throws InvalidConfigOptionValueException {
218 String name = entry.getOptionName();
219 if(name.equals("selectionType")) {
220 String value = (String) entry.getValue();
221 if(value.equals("fps"))
222 selectionType = SelectionType.FPS;
223 else if(value.equals("tournament"))
224 selectionType = SelectionType.TOURNAMENT_SELECTION;
225 else
226 selectionType = SelectionType.RANK_SELECTION;
227 } else if(name.equals("tournamentSize")) {
228 tournamentSize = (Integer) entry.getValue();
229 } else if(name.equals("elitism")) {
230 elitism = (Boolean) entry.getValue();
231 } else if(name.equals("algorithmType")) {
232 String value = (String) entry.getValue();
233 if(value.equals("generational"))
234 algorithmType = AlgorithmType.GENERATIONAL;
235 else
236 algorithmType = AlgorithmType.STEADY_STATE;
237 } else if(name.equals("mutationProbability")) {
238 mutationProbability = (Double) entry.getValue();
239 } else if(name.equals("crossoverProbability")) {
240 crossoverProbability = (Double) entry.getValue();
241 } else if(name.equals("refinementProbability")) {
242 refinementProbability = (Double) entry.getValue();
243 } else if(name.equals("hillClimbingProbability")) {
244 hillClimbingProbability = (Double) entry.getValue();
245 } else if(name.equals("numberOfIndividuals")) {
246 numberOfIndividuals = (Integer) entry.getValue();
247 } else if(name.equals("numberOfSelectedIndividuals")) {
248 numberOfSelectedIndividuals = (Integer) entry.getValue();
249 } else if(name.equals("useFixedNumberOfGenerations")) {
250 useFixedNumberOfGenerations = (Boolean) entry.getValue();
251 } else if(name.equals("generations")) {
252 generations = (Integer) entry.getValue();
253 } else if(name.equals("postConvergenceGenerations")) {
254 postConvergenceGenerations = (Integer) entry.getValue();
255 } else if(name.equals("adc")) {
256 adc = (Boolean) entry.getValue();
257 } else if(name.equals("initMinDepth")) {
258 initMinDepth = (Integer) entry.getValue();
259 } else if(name.equals("initMaxDepth")) {
260 initMaxDepth = (Integer) entry.getValue();
261 } else if(name.equals("maxConceptLength")) {
262 maxConceptLength = (Integer) entry.getValue();
263 }
264
265 }
266
267 /* (non-Javadoc)
268 * @see org.dllearner.core.Component#init()
269 */
270 @Override
271 public void init() {
272 // reasoner.prepareSubsumptionHierarchy();
273 // reasoner.prepareRoleHierarchy();
274 }
275
276 @Override
277 public void start() {
278 // falls refinement-Wahrscheinlichkeit gröÃer 0, dann erzeuge psi
279 psi = new Psi((PosNegLP)learningProblem, reasoner);
280
281 System.out.println();
282 System.out.println("Starting Genetic Programming Learner");
283 System.out.println();
284
285 System.out.println("Settings:");
286 System.out.println("algorithm type: " + algorithmType);
287 System.out.print("selection type: " + selectionType);
288 if(elitism)
289 System.out.println(" (elitism activated)");
290 else
291 System.out.println();
292 System.out.println("number of individuals: " + numberOfIndividuals);
293 if(algorithmType == AlgorithmType.STEADY_STATE)
294 System.out.println("number of selected individuals: " + numberOfSelectedIndividuals);
295 System.out.println("probability of crossover: " + df.format(crossoverProbability*100) + "%");
296 System.out.println("probability of mutation: " + df.format(mutationProbability*100) + "%");
297 System.out.println("probability of hill climbing: " + df.format(hillClimbingProbability*100) + "%");
298 System.out.println("probability of refinement: " + df.format(refinementProbability*100) + "%");
299 System.out.println("number of post convergence generations: " + postConvergenceGenerations);
300 System.out.println();
301
302 // represents the individuals in the current run
303 individuals = new Program[numberOfIndividuals];
304
305 if (algorithmType == AlgorithmType.GENERATIONAL) {
306 if (elitism) {
307 numberOfSelectedIndividuals = numberOfIndividuals - 1;
308 // ensure that number of selected individuals is even
309 if (numberOfSelectedIndividuals % 2 == 1)
310 error("Number of Individuals must be odd when elitism "
311 + "is used in a generational algorithm");
312
313 } else {
314 numberOfSelectedIndividuals = numberOfIndividuals;
315 if (numberOfSelectedIndividuals % 2 == 1)
316 error("Number of Individuals must be even when elitism "
317 + "is not used in a generational algorithm");
318 }
319 }
320
321 int numberOfNewIndividuals;
322 if (elitism)
323 numberOfNewIndividuals = numberOfSelectedIndividuals + 1;
324 else
325 numberOfNewIndividuals = numberOfSelectedIndividuals;
326
327 // perform some simple checks to see if configured values make sense
328 if (numberOfIndividuals < 2)
329 error("Number of individuals must be at least 2.");
330 // if (numberOfIndividuals % 8 != 0)
331 // error("The number of individuals must be divisible by 8.");
332 if (numberOfSelectedIndividuals % 2 == 1)
333 error("The number of selected individuals must be even.");
334 if ((numberOfSelectedIndividuals < 2)
335 || (numberOfSelectedIndividuals > numberOfIndividuals))
336 error("Number of selected individuals should be between 2 and "
337 + numberOfIndividuals);
338
339 // a comparator for comparing two programs according to their fitness
340 fitnessComparator = new Comparator<Program>() {
341 public int compare(Program p1, Program p2) {
342 double diff = p1.getFitness() - p2.getFitness();
343 if (diff > 0)
344 return 1;
345 else if (diff < 0)
346 return -1;
347 else
348 return 0;
349 }
350 };
351
352 startTime = System.nanoTime();
353
354 // System.out.println("If Java does not allocate enough memory on your system\n" +
355 // "use \"java -Xmx128m Start\" to start the program. This allocates\n" +
356 // "a maximum of 128 MB of memory. 64 MB are sufficient so it is usually\n" +
357 // "not necessary to do this.\n");
358
359 createIndividuals();
360
361 // keep track of the fittest individual
362 // Arrays.sort(individuals, fitnessComparator);
363 fittestIndividual = getFittestIndividual(); //individuals[0];
364 fittestIndividualGeneration = 0;
365
366 // create a population of individuals and print initial statistics
367 System.out.println("Initial Population:");
368 printStatistics(fittestIndividual);
369
370 // variables used in the inner loop of the algorithm
371 int[] selectedIndividuals = new int[numberOfSelectedIndividuals];
372 Program[] newIndividuals = new Program[numberOfNewIndividuals];
373 Program[] tmp = new Program[2];
374
375 // long startTime = System.currentTimeMillis();
376
377
378 int generation = 0;
379
380 // MAIN LOOP
381 do {
382
383 // sort individuals if necessary
384 if (selectionType == SelectionType.RANK_SELECTION
385 || algorithmType == AlgorithmType.STEADY_STATE)
386 Arrays.sort(individuals, fitnessComparator);
387
388 boolean showIndividuals = false;
389
390 // Individuen ausgeben
391 if(showIndividuals) {
392 System.out.println("GENERATION " + generation);
393 for(Program p : individuals) {
394 // TODO: rausfinden warum es Inkonsistenzen bei hybrid GP gibt
395 // => liegt daran, dass DIG- und KAON2Reasoner bei getAtomicConcepts()
396 // nicht klonen, sondern die Referenzen zurückgeben; dadurch können
397 // die parent Links falsch sein; für hybrid GP nicht schlimm, aber für
398 // gemischte Varianten schon
399 // => alleine dort zu klonen reicht aber nicht, es müsste stattdessen
400 // bei der Baumerzeugung geklont werden
401 // GPUtilities.checkProgram(p);
402 // if(generation % 5 == 0)
403 System.out.println(p.getFitness() + " " + p.getTree());
404 // System.out.println(" ADC " + p.getAdc());
405 }
406 System.out.println("<===>");
407 }
408
409 // apply the configured selection algorithm
410 selectedIndividuals = selectIndividuals(generation);
411
412 // produce offspring
413 for (int i = 0; i < numberOfSelectedIndividuals; i++) {
414 double rand = Math.random();
415
416 double crossoverBoundary = crossoverProbability;
417 double mutationBoundary = crossoverBoundary + mutationProbability;
418 double hillClimbingBoundary = mutationBoundary + hillClimbingProbability;
419 double refinementBoundary = hillClimbingBoundary + refinementProbability;
420
421 // wenn nur noch ein Individual zur Auswahl ist, dann
422 // darf Crossover nicht genommen werden (falls es ausgewaehlt wird,
423 // dann wird stattdessen reproduction genommen)
424 if (rand < crossoverBoundary && i+1 != numberOfSelectedIndividuals) {
425 // crossover
426 tmp = GPUtilities.crossover(learningProblem,
427 individuals[selectedIndividuals[i]],
428 individuals[selectedIndividuals[i + 1]]);
429
430 //System.out.println(tmp[0].getTree());
431
432 newIndividuals[i] = tmp[0];
433 newIndividuals[i + 1] = tmp[1];
434
435 // Incrementing i here is a significant code change! (2007/08/31)
436 // This is done, because crossover uses two individuals as input and
437 // outputs two. Without the increment that second produced child is
438 // overwritten, which caused a significant loss in performance.
439 i++;
440 // mutation
441 } else if(rand >= crossoverBoundary && rand < mutationBoundary) {
442 newIndividuals[i] = GPUtilities.mutation(learningProblem, reasoner, individuals[selectedIndividuals[i]]);
443 // hill climbing
444 } else if(rand >= mutationBoundary && rand < hillClimbingBoundary) {
445 // System.out.println("hill climbing");
446 newIndividuals[i] = GPUtilities.hillClimbing(learningProblem, reasoner, individuals[selectedIndividuals[i]]);
447 // refinement operator
448 } else if(rand >= hillClimbingBoundary && rand < refinementBoundary) {
449 newIndividuals[i] = psi.applyOperator(individuals[selectedIndividuals[i]]);
450 // reproduction
451 } else {
452 newIndividuals[i] = individuals[selectedIndividuals[i]];
453 }
454
455 /* alter Code
456 else {
457 // reproduction
458 newIndividuals[i] = individuals[selectedIndividuals[i]];
459 newIndividuals[i + 1] = individuals[selectedIndividuals[i + 1]];
460 }
461
462 // mutate individuals
463 if (Math.random() < mutationProbability) {
464 // System.out.println(newIndividuals[i].getFitness() + " " + newIndividuals[i].getTree());
465 newIndividuals[i] = Utilities.mutation(newIndividuals[i]);
466 // System.out.println(newIndividuals[i].getFitness() + " " + newIndividuals[i].getTree());
467 // System.out.println("====");
468 }
469
470 if (Math.random() < mutationProbability)
471 newIndividuals[i+1] = Utilities.mutation(newIndividuals[i + 1]);
472 */
473 }
474
475 // update fittest individual
476 Program chr = getFittestIndividual();
477 if (chr.getFitness() > fittestIndividual.getFitness()) {
478 fittestIndividual = chr;
479 fittestIndividualGeneration = generation;
480 }
481 // update fittest individual
482 /*
483 Program chr2 = getFittestValidIndividual();
484 if (chr2.getFitness() > fittestValidIndividual.getFitness()) {
485 fittestValidIndividual = chr2;
486 fittestValidIndividualGeneration = generation;
487 }
488 */
489
490 // if elitism is used, the fittest individual is copied over to the
491 // new generation automatically
492 if (elitism)
493 newIndividuals[numberOfNewIndividuals - 1] = fittestIndividual;
494
495 // the steady state algorithm replaces the weakest individuals by
496 // the new ones (note that the individuals are ordered by fitness,
497 // so the new individuals can just be copied over at the start of
498 // the array)
499 if (algorithmType == AlgorithmType.STEADY_STATE)
500 System.arraycopy(newIndividuals, 0, individuals, 0,
501 numberOfNewIndividuals);
502 // the generational algorithm replaces the whole generation
503 else
504 System.arraycopy(newIndividuals, 0, individuals, 0,
505 numberOfIndividuals);
506
507 if (generation % 5 == 0) {
508 System.out.println("Generation " + generation);
509 printStatistics(fittestIndividual);
510 }
511
512 // alle Individuen auf maximale Konzeptlänge überprüfen um mögliche
513 // Speicherprobleme zu verhindern
514 for(int i=0; i<numberOfIndividuals; i++) {
515 if(individuals[i].getTree().getLength()>maxConceptLength) {
516 System.out.println("Warning: GP produced concept longer then " + maxConceptLength + ". Replacing it with TOP.");
517 individuals[i] = GPUtilities.createProgram(learningProblem, new Thing());
518 }
519 }
520
521 generation++;
522 } while ( (useFixedNumberOfGenerations && generation < generations)
523 || (!useFixedNumberOfGenerations && (generation - fittestIndividualGeneration < postConvergenceGenerations)));
524
525 // fittestIndividual.optimize();
526
527 long endTime = System.nanoTime(); // .currentTimeMillis();
528 // R�ckgabewert des Algorithmus speichern
529 bestScore = fittestIndividual.getScore();
530 bestConcept = fittestIndividual.getTree();
531
532 // nachschauen, ob ev. noch bessere Konzepte im Psi-Cache sind
533 boolean betterValueFoundInPsiCache = false;
534 double bestValue = bestScore.getScoreValue();
535
536 if(refinementProbability > 0) {
537 // das Problem ist hier, dass die gecachte Score nicht unbedingt
538 // der echten Score entsprechen muss, d.h. hier muss die
539 // Konzeptlänge mit einberechnet werden => deswegen werden
540 // neue Score-Objekte generiert
541 Set<Entry<Description,ScorePosNeg>> entrySet = psi.evalCache.entrySet();
542 for(Entry<Description,ScorePosNeg> entry : entrySet) {
543 ScorePosNeg tmpScore = entry.getValue();
544 Description c = entry.getKey();
545 tmpScore = tmpScore.getModifiedLengthScore(c.getLength());
546 double tmpScoreValue = tmpScore.getScoreValue();
547 if(tmpScoreValue>bestValue) {
548 bestValue = tmpScoreValue;
549 betterValueFoundInPsiCache = true;
550 bestScore = tmpScore;
551 bestConcept = c;
552 }
553
554 }
555 }
556
557 System.out.println("final report");
558 System.out.println("============");
559 // System.out.println("fittest individual: " + fittestIndividual.getTree());
560 System.out.println("generations: " + generation);
561 System.out.println("fittest individual found after "
562 + fittestIndividualGeneration + " generations");
563 System.out.println("runtime in ms: " + Helper.prettyPrintNanoSeconds(endTime - startTime));
564 System.out.println("fitness evaluations: "
565 + GPUtilities.fitnessEvaluations);
566 if(refinementProbability > 0) {
567 System.out.println("operator applications: " + psi.getNrOfRequests() + " psi, " + GPUtilities.crossover + " crossover, " +
568 GPUtilities.mutation + " mutation, " + GPUtilities.hillClimbing + " hillClimbing");
569 }
570
571 //System.out.println("invalid-to-valid transformations by FROG optimization: "
572 // + Program.treesOptimized);
573 System.out.println();
574 printStatistics(fittestIndividual);
575 System.out.println(fittestIndividual.getScore());
576 if(betterValueFoundInPsiCache) {
577 System.out.println("Found better solution in Psi-Cache:");
578 System.out.println(bestConcept);
579 int misClassifications = bestScore.getNotCoveredPositives().size()+
580 bestScore.getCoveredNegatives().size();
581 System.out.println("misclassifications: " + misClassifications + ", length " + bestConcept.getLength());
582 }
583
584 /*
585 Collection<Concept> test = ReasonerComponent.retrievals;
586 // for(Concept c : )
587
588 test.removeAll(psi.evalCache.keySet());
589 for(Concept c : test)
590 System.out.println(c);
591 */
592 }
593
594 // Anmerkung: beim Lernen von DLs ist es besonders wichtig, dass schon bei der
595 // Initialisierung gute Teilbaeume herauskommen, d.h. nicht notwendigerweise Teilbaeume
596 // der gewuenschten Loesung, aber Baeume die solche Teilbaeume enthalten und auÃerdem
597 // Fehlklassifikationen vermeiden
598 private void createIndividuals() {
599 // trees are created by the ramped half and half method with a tree depth
600 // between 6 and 9
601
602 // Lookuptable generieren
603 double[] functionValues = new double[initMaxDepth-initMinDepth+1];
604 double functionSum = 0;
605 double functionValue;
606 for(int i=initMinDepth; i<=initMaxDepth; i++) {
607 functionValue = initFunction(i);
608 functionSum += functionValue;
609 functionValues[i-initMinDepth] = functionSum;
610 }
611
612 // function-based-half-and-half
613 for(int i = 0; i< numberOfIndividuals; i++) {
614 boolean grow = (Math.random()>0.5);
615
616 int depth = getLookupTablePosition(functionValues,rand.nextDouble()*functionSum) + initMinDepth;
617 // int depth = rand.nextInt(initMaxDepth-initMinDepth)+initMinDepth;
618
619 if(grow)
620 individuals[i] = GPUtilities.createGrowRandomProgram(learningProblem, reasoner, depth, adc);
621 else
622 individuals[i] = GPUtilities.createFullRandomProgram(learningProblem, reasoner, depth, adc);
623 }
624
625 /*
626 for(int i = 0; i< numberOfIndividuals; i++) {
627 double nr = Math.random();
628 if(Math.random()>0.5) {
629 if(nr<1/4d)
630 individuals[i]= GPUtilities.createFullRandomProgram(learningProblem, 2);
631 else if(nr<2/4d)
632 individuals[i]= GPUtilities.createFullRandomProgram(learningProblem, 3);
633 else if(nr<3/4d)
634 individuals[i]= GPUtilities.createFullRandomProgram(learningProblem, 4);
635 else
636 individuals[i]= GPUtilities.createFullRandomProgram(learningProblem, 5);
637 } else {
638 if(nr<1/4d)
639 individuals[i]= GPUtilities.createGrowRandomProgram(learningProblem,3);
640 else if(nr<2/4d)
641 individuals[i]= GPUtilities.createGrowRandomProgram(learningProblem,4);
642 else if(nr<3/4d)
643 individuals[i]= GPUtilities.createGrowRandomProgram(learningProblem,5);
644 else
645 individuals[i]= GPUtilities.createGrowRandomProgram(learningProblem,6);
646 }
647 }
648 */
649
650
651 /*
652 for (int i = 0; i < numberOfIndividuals / 2; i = i + 4) {
653 individuals[i] = Utilities.createFullRandomProgram(2);
654 individuals[i + 1] = Utilities.createFullRandomProgram(3);
655 individuals[i + 2] = Utilities.createFullRandomProgram(4);
656 individuals[i + 3] = Utilities.createFullRandomProgram(5);
657 }
658
659 for (int i = numberOfIndividuals / 2; i < numberOfIndividuals; i = i + 4) {
660 individuals[i] = Utilities.createGrowRandomProgram(3);
661 individuals[i + 1] = Utilities.createGrowRandomProgram(4);
662 individuals[i + 2] = Utilities.createGrowRandomProgram(5);
663 individuals[i + 3] = Utilities.createGrowRandomProgram(6);
664 }
665 */
666 }
667
668 private double initFunction(int x) {
669 // Funktion f(x)=x
670 return 1;
671 }
672
673 private int getIndividualsForRankSelection(int generation) {
674 // return a value how many of the best individuals will actually
675 // be used by rank selection (in the standard version this is
676 // numberOfIndividuals)
677 /*
678 double factor = 0.5*(Math.tanh(0.125*(generation-30))+1);
679 if(factor*generation<20)
680 return 50;
681 else
682 return (int)Math.round(factor*generation);
683 */
684 return numberOfSelectedIndividuals;
685 }
686
687 // es wird einfach nur ein Array von Positionen zur�ckgegeben, d.h. es
688 // werden keine Individuen kopiert etc.
689 // TODO: man muss �berlegen, ob es Vorteile hat hier direkt Programme zu
690 // kopieren => dann m�sste man bei Crossover und Mutation nicht aufpassen,
691 // dass man keine Programmb�ume modifiziert
692 private int[] selectIndividuals(int generation) {
693 int[] positions = new int[numberOfSelectedIndividuals];
694 if (selectionType == SelectionType.FPS) {
695
696 // select individuals according to FPS
697 double[] lookupTable = getFitnessLookupTable(false);
698 double fitnessSum = lookupTable[numberOfIndividuals - 1];
699 double rand;
700
701 for (int i = 0; i < numberOfSelectedIndividuals; i++) {
702 // create a random number between 0 and fitness sum
703 rand = Math.random() * fitnessSum;
704 positions[i] = getLookupTablePosition(lookupTable, rand);
705 }
706 } else if (selectionType == SelectionType.RANK_SELECTION) {
707 int individualsForRankSelection = getIndividualsForRankSelection(generation);
708 double[] lookupTable = new double[individualsForRankSelection];
709 double sum = 0;
710 double rand, val;
711 // create a lookup-table
712 for (int i = 0; i < individualsForRankSelection; i++) {
713 // you can use any function here
714 // fittestes Individuum 10mal wahrscheinlicher als schlechtestes
715 // Individuum (relativ schwache Selektion für MLDM-Paper, da
716 // im hybrid GP immer stetig Richtung Lösung gegangen wird, also
717 // es sich oft auch lohnt weniger fitte Individuen zu wählen)
718 val = i + individualsForRankSelection / 9; // 8
719 // einfachste rank funktion
720 // val = i;
721 sum += val;
722 lookupTable[i] = sum;
723 }
724
725 for (int i = 0; i < numberOfSelectedIndividuals; i++) {
726 // create a random number between 0 and sum
727 rand = Math.random() * sum;
728 positions[i] = numberOfIndividuals - individualsForRankSelection
729 + getLookupTablePosition(lookupTable, rand);
730 }
731 } else if (selectionType == SelectionType.TOURNAMENT_SELECTION) {
732 for(int i = 0; i < numberOfSelectedIndividuals; i++) {
733 // choose the appropriate number of individuals randomly
734 int[] randomIndividuals = new int[tournamentSize];
735 for(int j=0; j<tournamentSize; j++) {
736 randomIndividuals[j] = rand.nextInt(numberOfIndividuals);
737 }
738
739 double fitness = individuals[randomIndividuals[0]].getFitness();
740 int pos = 0;
741 for (int j = 1; j < tournamentSize; j++) {
742 if (individuals[randomIndividuals[j]].getFitness() > fitness) {
743 fitness = individuals[randomIndividuals[j]].getFitness();
744 pos = j;
745 }
746 }
747 positions[i] = pos;
748 }
749 }
750
751 return positions;
752 }
753
754 private double[] getFitnessLookupTable(boolean allowNegativeFitness) {
755 // we sum up the fitness for all individuals and store the sums
756 // such that we can later easily determine, which individual we
757 // have chosen (the last entry in the lookup table is the sum
758 // of the fitness of all individuals)
759 double[] lookupTable = new double[numberOfIndividuals];
760 double sum = 0, fitness;
761
762 // sum up the fitness for all individuals
763 for (int i = 0; i < numberOfIndividuals; i++) {
764 fitness = individuals[i].getFitness();
765 if(!allowNegativeFitness && fitness<0)
766 error("Negative fitness value " + fitness + " in FPS!");
767 sum += fitness;
768 lookupTable[i] = sum;
769 }
770
771 return lookupTable;
772 }
773
774 private int getLookupTablePosition(double[] lookupTable, double rand) {
775 // look up the position of the random number in the table
776 // for (int i = 0; i < numberOfIndividuals; i++) {
777 for (int i = 0; i < lookupTable.length; i++) {
778 if (rand <= lookupTable[i])
779 return i;
780 }
781 // if no position was found, an error has occured
782 throw new Error();
783 }
784
785 private double getFitnessSum() {
786 double sum = 0;
787
788 // sum up the fitness for all individuals
789 for (int i = 0; i < numberOfIndividuals; i++) {
790 sum += individuals[i].getFitness();
791 }
792
793 return sum;
794 }
795
796 private Program getFittestIndividual() {
797 double fitness = individuals[0].getFitness();
798 int pos = 0;
799 for (int i = 1; i < numberOfIndividuals; i++) {
800 if (individuals[i].getFitness() > fitness) {
801 fitness = individuals[i].getFitness();
802 pos = i;
803 }
804 }
805 return individuals[pos];
806 }
807
808 /*
809 private Program getFittestValidIndividual() {
810 double fitness = 0;
811 // individuals[0].getFitness();
812 int pos = 0;
813 for (int i = 0; i < numberOfIndividuals; i++) {
814 if (individuals[i].getFitness() > fitness
815 && individuals[i].isValidSolution()) {
816 fitness = individuals[i].getFitness();
817 pos = i;
818 }
819 }
820 return individuals[pos];
821 }
822 */
823
824 private void printStatistics(Program fittestIndividual) {
825 // output statistics: best individual, average fitness, etc.
826 double averageFitness = getFitnessSum() / numberOfIndividuals;
827 Description n = fittestIndividual.getTree();
828
829 int misClassifications = fittestIndividual.getScore().getNotCoveredPositives().size()
830 + fittestIndividual.getScore().getCoveredNegatives().size();
831
832 System.out.println("average fitness: " + averageFitness);
833 System.out.println("highest fitness: " + fittestIndividual.getFitness() + " ["+misClassifications+" misclassifcations, length "+n.getLength()+"]");
834
835 // durchschnittliche Konzeptlänge berechnen (wahrscheinlich zeitaufwändig,
836 // also nicht standardmäÃig machen)
837 int conceptLengthSum = 0;
838 for(Program p : individuals)
839 conceptLengthSum += p.getTree().getLength();
840 double conceptLengthAverage = conceptLengthSum/(double)individuals.length;
841 System.out.println("average concept length: " + df.format(conceptLengthAverage));
842 // long algorithmTime = System.nanoTime() - Main.getAlgorithmStartTime();
843 long algorithmTime = System.nanoTime() - startTime;
844 System.out.println("overall algorithm runtime: " + Helper.prettyPrintNanoSeconds(algorithmTime));
845 // System.out.println("instance checks: " + learningProblem.getReasonerComponent().getNrOfInstanceChecks());
846 // System.out.println("fitness evals: " + Program.fitnessEvaluations);
847 // System.out.println("nr. of individuals: " + individuals.length + " (" + numberOfSelectedIndividuals + " selected)");
848
849
850 // für temporäre Performance-Werte
851 // double some = 100*(psi.someTime/(double)algorithmTime);
852 // System.out.println("some: " + df.format(some) + "%");
853
854 // System.out.println();
855 System.out.println("best definition found: " + n);
856 if(refinementProbability > 0) {
857 double cacheHitRate=0;
858 double pdCacheHitRate=0, puCacheHitRate=0;
859 if(psi.getNrOfRequests()>0) {
860 cacheHitRate = 100*(psi.getConceptCacheHits()/(double)psi.getNrOfRequests());
861 pdCacheHitRate = 100*(psi.getPdCacheHits()/(double)psi.getPdRequests());
862 puCacheHitRate = 100*(psi.getPuCacheHits()/(double)psi.getPuRequests());
863 }
864 System.out.println("Psi down cache: " + psi.getPdCache().size() + "; " + psi.getPdRequests() + " requests; hit rate " + df.format(pdCacheHitRate) + "%");
865 System.out.println("Psi up cache: " + psi.getPuCache().size() + "; " + psi.getPuRequests() + " requests; hit rate " + df.format(puCacheHitRate) + "%");
866 System.out.println("Psi cache: size " + psi.getCacheSize() + "; " + psi.getNrOfRequests() + " requests; hit rate " + df.format(cacheHitRate) + "%");
867
868 double psiTimePercent = 100*psi.getPsiApplicationTimeNs()/(double)algorithmTime;
869 double psiWOReasoningTimePercent = 100*(psi.getPsiApplicationTimeNs()-psi.getPsiReasoningTimeNs())/(double)algorithmTime;
870 // System.out.println(Helper.prettyPrintNanoSeconds(psi.getPsiApplicationTimeNs()));
871 // System.out.println(Helper.prettyPrintNanoSeconds(psi.getPsiReasoningTimeNs()));
872 System.out.println("Psi application time percentage: " + df.format(psiTimePercent) + "%; " + df.format(psiWOReasoningTimePercent) + "% excluding reasoning");
873 }
874
875 if(adc)
876 System.out.println("ADC: " + fittestIndividual.getAdc());
877 // TODO: hier muss noch eine Zusammenfassung rein, die sowohl f�r zweiwertiges als auch
878 // dreiwertiges Lernproblem funktioniert
879 // System.out.println("classified positive: " + fittestIndividual.getScore().getDefPosSet());
880 // System.out.println("classified negative: " + fittestIndividual.getScore().getDefNegSet());
881 System.out.println();
882 }
883
884 /**
885 * Sets the probability that crossover is performed. If crossover is not
886 * performed individuals are cloned, i.e. copied over to the next
887 * generation.
888 *
889 * @param crossoverProbability
890 * A number between 0 and 1.
891 */
892 //public void setCrossoverProbability(float crossoverProbability) {
893 // if (crossoverProbability >= 0.0 && crossoverProbability <= 1.0)
894 // this.crossoverProbability = crossoverProbability;
895 // else
896 // error("Probability for crossover must be a value between 0 and 1.");
897 //}
898
899 /**
900 * Sets the probability that an individual is mutated.
901 *
902 * @param mutationProbability
903 * A number between 0 and 1.
904 */
905 //public void setMutationProbability(float mutationProbability) {
906 // if (mutationProbability >= 0.0 && mutationProbability <= 1.0)
907 // this.mutationProbability = mutationProbability;
908 // else
909 // error("Probability for mutation must be a value between 0 and 1.");
910 //}
911
912 /**
913 * Switch for turning elitism on or off. Elitism means that the fittest
914 * individual is copied over to the next generation automatically and cannot
915 * be lost.
916 *
917 * @param elitism
918 * True if elitism should be actived, false otherwise.
919 */
920 //public void setElitism(boolean elitism) {
921 // this.elitism = elitism;
922 //}
923
924 /**
925 * This method controls how many individuals will actually be selected and
926 * transfered to the mating pool in a steady state algorithm (it is ignored
927 * in a generational algorithm).
928 *
929 * @param numberOfSelectedIndividuals
930 * Number of individuals, which will be selected.
931 */
932 //public void setNumberOfSelectedIndividuals(int numberOfSelectedIndividuals) {
933 // if (algorithmType == AlgorithmType.STEADY_STATE)
934 // this.numberOfSelectedIndividuals = numberOfSelectedIndividuals;
935 // else
936 // System.out.println("Warning: number of selected individuals "
937 // + "will be ignored in a generational algorithm");
938 //}
939
940 private void error(String errorMessage) {
941 // prints an error message and exits
942 System.out.println("Error: " + errorMessage);
943 System.out
944 .println("Please correct all errors and run the algorithm again.");
945 System.exit(0);
946 }
947
948 // @Override
949 public ScorePosNeg getSolutionScore() {
950 return bestScore;
951 }
952
953 @Override
954 public Description getCurrentlyBestDescription() {
955 return bestConcept;
956 }
957
958 @Override
959 public EvaluatedDescriptionPosNeg getCurrentlyBestEvaluatedDescription() {
960 // return fittestIndividual.getTree();
961 return new EvaluatedDescriptionPosNeg(bestConcept,bestScore);
962 }
963
964 @Override
965 public void stop() {
966 // TODO Auto-generated method stub
967
968 }
969
970 /* (non-Javadoc)
971 * @see org.dllearner.core.LearningAlgorithm#isRunning()
972 */
973 @Override
974 public boolean isRunning() {
975 // TODO Auto-generated method stub
976 return false;
977 }
978
979 /**
980 * This allows to set the number of individuals in the whole population. A
981 * higher value slows down the algorithm, but will in general improve
982 * accuracy of the results.
983 *
984 * @param numberOfIndividuals
985 * Number of individuals in the population.
986 */
987 //public void setNumberOfIndividuals(int numberOfIndividuals) {
988 // this.numberOfIndividuals = numberOfIndividuals;
989 //}
990
991 /**
992 * The algorithm converges, if it doesn't find a fitter individual after a
993 * number of generations. This number controls for how many generations the
994 * algorithm will try to find a fitter individual.
995 *
996 * @param postConvergenceGenerations
997 * A number specifying after how many generations without a new
998 * fittest individual the algorithm should stop.
999 */
1000 //public void setPostConvergenceGenerations(int postConvergenceGenerations) {
1001 // this.postConvergenceGenerations = postConvergenceGenerations;
1002 //}
1003
1004 /**
1005 * This method switches the type of selection, which is performed. It can be
1006 * either Fitness Proportionate Selection (FPS) or Rank Selection.
1007 *
1008 * @param selectionType
1009 * Either FPS or RANK_SELECTION.
1010 */
1011 //public void setSelectionType(SelectionType selectionType) {
1012 // this.selectionType = selectionType;
1013 //}
1014
1015 /**
1016 * This switch controls, which type of algorithm is used.
1017 *
1018 * @param algorithmType
1019 * STEADY_STATE or GENERATIONAL.
1020 */
1021 //public void setAlgorithmType(AlgorithmType algorithmType) {
1022 // this.algorithmType = algorithmType;
1023 //}
1024
1025 }