001    /**
002     * Copyright (C) 2007-2011, Jens Lehmann
003     *
004     * This file is part of DL-Learner.
005     *
006     * DL-Learner is free software; you can redistribute it and/or modify
007     * it under the terms of the GNU General Public License as published by
008     * the Free Software Foundation; either version 3 of the License, or
009     * (at your option) any later version.
010     *
011     * DL-Learner is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014     * GNU General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018     */
019    
020    package org.dllearner.algorithms.gp;
021    
022    import java.text.DecimalFormat;
023    import java.util.Arrays;
024    import java.util.Collection;
025    import java.util.Comparator;
026    import java.util.LinkedList;
027    import java.util.Random;
028    import java.util.Set;
029    import java.util.Map.Entry;
030    
031    import org.dllearner.algorithms.hybridgp.Psi;
032    import org.dllearner.core.AbstractCELA;
033    import org.dllearner.core.AbstractLearningProblem;
034    import org.dllearner.core.AbstractReasonerComponent;
035    import org.dllearner.core.configurators.GPConfigurator;
036    import org.dllearner.core.options.BooleanConfigOption;
037    import org.dllearner.core.options.ConfigEntry;
038    import org.dllearner.core.options.ConfigOption;
039    import org.dllearner.core.options.DoubleConfigOption;
040    import org.dllearner.core.options.IntegerConfigOption;
041    import org.dllearner.core.options.InvalidConfigOptionValueException;
042    import org.dllearner.core.options.StringConfigOption;
043    import org.dllearner.core.owl.Description;
044    import org.dllearner.core.owl.Thing;
045    import org.dllearner.learningproblems.EvaluatedDescriptionPosNeg;
046    import org.dllearner.learningproblems.PosNegLP;
047    import org.dllearner.learningproblems.ScorePosNeg;
048    import org.dllearner.utilities.Helper;
049    
050    /**
051     * This class implements the genetic programming (GP) algorithm and provides
052     * methods to configure and run it.
053     * 
054     * @author Jens Lehmann
055     * 
056     */
057    public class GP extends AbstractCELA {
058            
059            private GPConfigurator configurator;
060            @Override
061            public GPConfigurator getConfigurator(){
062                    return configurator;
063            }
064            
065            
066        // NumberFormat f;
067            DecimalFormat df = new DecimalFormat("0.00");
068     
069            public enum AlgorithmType {
070            /**
071             * This is a type of algorithm, where the offspring completely replaces the
072             * previous generation.
073             */     
074            GENERATIONAL,
075            /**
076             * In this type of algorithm offspring is produced by a number of indivuals.
077             * The offspring then replaces the weakest individuals of the previous
078             * generation.
079             */     
080            STEADY_STATE};     
081        
082        public enum SelectionType {
083            /**
084             * Rank selection is a selection type, where the probability of being
085             * selected depends only on the order of the fitness of all individuals.
086             */
087            RANK_SELECTION, 
088            /**
089             * FPS (Fitness Proportionate Selection) is a selection type, where the
090             * probability of being selected is proportional to the fitness of an
091             * individual.
092             */     
093            FPS,
094            TOURNAMENT_SELECTION};
095            
096        // configuration options
097        private SelectionType selectionType = SelectionType.RANK_SELECTION;
098            private int tournamentSize = 3;
099            private boolean elitism = true;
100            private AlgorithmType algorithmType = AlgorithmType.STEADY_STATE;
101            private double mutationProbability = 0.03d;
102            private double crossoverProbability = 0.95d;
103            private double hillClimbingProbability = 0.0d;
104            private double refinementProbability = 0.0;
105            private int numberOfIndividuals = 100;
106            private int numberOfSelectedIndividuals = 96;
107            private boolean useFixedNumberOfGenerations = false;
108            private int generations = 20;
109            private int postConvergenceGenerations = 50;
110            private boolean adc = false;
111            private int initMinDepth = 4;
112            private int initMaxDepth = 6;
113            private int maxConceptLength = 75;
114    //      private boolean useMultiStructures = true;      
115            
116        private Program[] individuals;
117        
118        private Program fittestIndividual;
119        
120        public int fittestIndividualGeneration;
121    
122        private Comparator<Program> fitnessComparator;
123        
124        private static Random rand = new Random();
125        
126        private long startTime;
127    
128        private ScorePosNeg bestScore;
129        private Description bestConcept;
130        
131        // private GeneticRefinementOperator psi;
132        private Psi psi;
133        
134        
135        /**
136         * Creates an algorithm object. By default a steady state algorithm with
137         * rank selection is used. This operates
138         * on a population of 1000 individuals with a probability of crossover of
139         * 1.0 and a probability of mutation of 0.01.
140         * 
141         */
142        public GP(PosNegLP learningProblem, AbstractReasonerComponent rs) {
143            super(learningProblem, rs);
144            this.configurator = new GPConfigurator(this);
145        }
146          
147            public static String getName() {
148                    return "genetic programming learning algorithm";
149            }           
150        
151            public static Collection<Class<? extends AbstractLearningProblem>> supportedLearningProblems() {
152                    Collection<Class<? extends AbstractLearningProblem>> problems = new LinkedList<Class<? extends AbstractLearningProblem>>();
153                    problems.add(PosNegLP.class);
154                    return problems;
155            }       
156            
157            public static Collection<ConfigOption<?>> createConfigOptions() {
158                    Collection<ConfigOption<?>> options = new LinkedList<ConfigOption<?>>();
159                    StringConfigOption selectionType = new StringConfigOption("selectionType", "selection type", "rankSelection");
160                    selectionType.setAllowedValues(new String[] {"rankSelection", "fps", "tournament"});
161                    options.add(selectionType);
162                    IntegerConfigOption tournamentSize = new IntegerConfigOption("tournamentSize", "tournament size (applies only to tournament selection)", 3);
163                    tournamentSize.setLowerLimit(2);
164                    tournamentSize.setUpperLimit(20);
165                    options.add(tournamentSize);
166                    options.add(new BooleanConfigOption("elitism", "specifies whether to use elitism in selection", true));
167                    StringConfigOption algorithmType = new StringConfigOption("algorithmType", "algorithm type", "steadyState");
168                    algorithmType.setAllowedValues(new String[] {"generational", "steadyState"});
169                    options.add(algorithmType);
170                    DoubleConfigOption mutationProb = new DoubleConfigOption("mutationProbability", "mutation probability", 0.03);
171                    mutationProb.setLowerLimit(0.0);
172                    mutationProb.setUpperLimit(1.0);
173                    options.add(mutationProb);
174                    DoubleConfigOption crossoverProb = new DoubleConfigOption("crossoverProbability", "crossover probability", 0.95);
175                    crossoverProb.setLowerLimit(0.0);
176                    crossoverProb.setUpperLimit(1.0);
177                    options.add(crossoverProb);
178                    DoubleConfigOption hillClimbingProb = new DoubleConfigOption("hillClimbingProbability", "hill climbing probability", 0.0);
179                    hillClimbingProb.setLowerLimit(0.0);
180                    hillClimbingProb.setUpperLimit(1.0);
181                    options.add(hillClimbingProb);
182                    DoubleConfigOption refinementProb = new DoubleConfigOption("refinementProbability", "refinement operator probability (values higher than 0 turn this into a hybrid GP algorithm - see publication)", 0.00);
183                    refinementProb.setLowerLimit(0.0);
184                    refinementProb.setUpperLimit(1.0);
185                    options.add(refinementProb);
186                    IntegerConfigOption numberOfInd = new IntegerConfigOption("numberOfIndividuals", "number of individuals", 100);
187                    numberOfInd.setLowerLimit(1);
188                    options.add(numberOfInd);
189                    IntegerConfigOption numberOfSelInd = new IntegerConfigOption("numberOfSelectedIndividuals", "number of selected individuals", 92);
190                    numberOfSelInd.setLowerLimit(1);
191                    options.add(numberOfSelInd);
192                    options.add(new BooleanConfigOption("useFixedNumberOfGenerations", "specifies whether to use a fixed number of generations", false));
193                    IntegerConfigOption gen = new IntegerConfigOption("generations", "number of generations (only valid if a fixed number of generations is used)", 20);
194                    gen.setLowerLimit(1);
195                    options.add(gen);
196                    IntegerConfigOption post = new IntegerConfigOption("postConvergenceGenerations", "number of generations after which to stop if no improvement wrt. the best solution has been achieved", 50);
197                    post.setLowerLimit(1);
198                    options.add(post);              
199                    options.add(new BooleanConfigOption("adc", "whether to use automatically defined concept (this invents new helper concepts, but enlarges the search space", false));
200                    IntegerConfigOption initMin = new IntegerConfigOption("initMinDepth", "minimum depth to use when creating the initial population", 4);
201                    initMin.setLowerLimit(1);
202                    options.add(initMin);
203                    IntegerConfigOption initMax = new IntegerConfigOption("initMaxDepth", "maximum depth to use when creating the initial population", 6);
204                    initMax.setLowerLimit(1);
205                    options.add(initMax);           
206                    IntegerConfigOption max = new IntegerConfigOption("maxConceptLength", "maximum concept length (higher length means lowest possible fitness)", 75);
207                    max.setLowerLimit(1);
208                    options.add(max);               
209    //              options.add(new BooleanConfigOption("useMultiStructures", "specifies whether to use e.g. (a AND b AND c) instead of ((A and b) and c) - there is no apparent reason to set this to false", true));
210                    return options;
211            }
212            
213            /* (non-Javadoc)
214             * @see org.dllearner.core.Component#applyConfigEntry(org.dllearner.core.ConfigEntry)
215             */
216            @Override
217            public <T> void applyConfigEntry(ConfigEntry<T> entry) throws InvalidConfigOptionValueException {
218                    String name = entry.getOptionName();
219                    if(name.equals("selectionType")) {
220                            String value = (String) entry.getValue();
221                            if(value.equals("fps"))
222                                    selectionType = SelectionType.FPS;
223                            else if(value.equals("tournament"))
224                                    selectionType = SelectionType.TOURNAMENT_SELECTION;
225                            else
226                                    selectionType = SelectionType.RANK_SELECTION;
227                    } else if(name.equals("tournamentSize")) {
228                            tournamentSize = (Integer) entry.getValue();
229                    } else if(name.equals("elitism")) {
230                            elitism = (Boolean) entry.getValue();
231                    } else if(name.equals("algorithmType")) {
232                            String value = (String) entry.getValue();
233                            if(value.equals("generational"))
234                                    algorithmType = AlgorithmType.GENERATIONAL;
235                            else 
236                                    algorithmType = AlgorithmType.STEADY_STATE;
237                    } else if(name.equals("mutationProbability")) {
238                            mutationProbability = (Double) entry.getValue();
239                    } else if(name.equals("crossoverProbability")) {
240                            crossoverProbability = (Double) entry.getValue();
241                    } else if(name.equals("refinementProbability")) {
242                            refinementProbability = (Double) entry.getValue();
243                    } else if(name.equals("hillClimbingProbability")) {
244                            hillClimbingProbability = (Double) entry.getValue();
245                    } else if(name.equals("numberOfIndividuals")) {
246                            numberOfIndividuals = (Integer) entry.getValue();
247                    } else if(name.equals("numberOfSelectedIndividuals")) {
248                            numberOfSelectedIndividuals = (Integer) entry.getValue();
249                    } else if(name.equals("useFixedNumberOfGenerations")) {
250                            useFixedNumberOfGenerations = (Boolean) entry.getValue();
251                    } else if(name.equals("generations")) {
252                            generations = (Integer) entry.getValue();
253                    } else if(name.equals("postConvergenceGenerations")) {
254                            postConvergenceGenerations = (Integer) entry.getValue();
255                    } else if(name.equals("adc")) {
256                            adc = (Boolean) entry.getValue();
257                    } else if(name.equals("initMinDepth")) {
258                            initMinDepth = (Integer) entry.getValue();
259                    } else if(name.equals("initMaxDepth")) {
260                            initMaxDepth = (Integer) entry.getValue();
261                    } else if(name.equals("maxConceptLength")) {
262                            maxConceptLength = (Integer) entry.getValue();
263                    }               
264                    
265            }
266    
267            /* (non-Javadoc)
268             * @see org.dllearner.core.Component#init()
269             */
270            @Override
271            public void init() {
272    //              reasoner.prepareSubsumptionHierarchy();
273    //              reasoner.prepareRoleHierarchy();                
274            }
275            
276            @Override
277        public void start() {       
278            // falls refinement-Wahrscheinlichkeit größer 0, dann erzeuge psi
279            psi = new Psi((PosNegLP)learningProblem, reasoner);
280            
281            System.out.println();
282            System.out.println("Starting Genetic Programming Learner");
283            System.out.println();
284            
285            System.out.println("Settings:");
286            System.out.println("algorithm type: " + algorithmType);
287            System.out.print("selection type: " + selectionType);
288            if(elitism)
289                    System.out.println(" (elitism activated)");
290            else
291                    System.out.println();
292            System.out.println("number of individuals: " + numberOfIndividuals);
293            if(algorithmType == AlgorithmType.STEADY_STATE)
294                    System.out.println("number of selected individuals: " + numberOfSelectedIndividuals);
295            System.out.println("probability of crossover: " + df.format(crossoverProbability*100) + "%");
296            System.out.println("probability of mutation: " + df.format(mutationProbability*100) + "%");     
297            System.out.println("probability of hill climbing: " + df.format(hillClimbingProbability*100) + "%");
298            System.out.println("probability of refinement: " + df.format(refinementProbability*100) + "%");         
299            System.out.println("number of post convergence generations: " + postConvergenceGenerations);
300            System.out.println();
301            
302            // represents the individuals in the current run
303            individuals = new Program[numberOfIndividuals];
304    
305            if (algorithmType == AlgorithmType.GENERATIONAL) {
306                if (elitism) {
307                    numberOfSelectedIndividuals = numberOfIndividuals - 1;
308                    // ensure that number of selected individuals is even
309                    if (numberOfSelectedIndividuals % 2 == 1)
310                        error("Number of Individuals must be odd when elitism "
311                                + "is used in a generational algorithm");
312    
313                } else {
314                    numberOfSelectedIndividuals = numberOfIndividuals;
315                    if (numberOfSelectedIndividuals % 2 == 1)
316                        error("Number of Individuals must be even when elitism "
317                                + "is not used in a generational algorithm");
318                }
319            }
320    
321            int numberOfNewIndividuals;
322            if (elitism)
323                numberOfNewIndividuals = numberOfSelectedIndividuals + 1;
324            else
325                numberOfNewIndividuals = numberOfSelectedIndividuals;
326    
327            // perform some simple checks to see if configured values make sense
328            if (numberOfIndividuals < 2)
329                error("Number of individuals must be at least 2.");
330            // if (numberOfIndividuals % 8 != 0)
331            //    error("The number of individuals must be divisible by 8.");        
332            if (numberOfSelectedIndividuals % 2 == 1)
333                error("The number of selected individuals must be even.");
334            if ((numberOfSelectedIndividuals < 2)
335                    || (numberOfSelectedIndividuals > numberOfIndividuals))
336                error("Number of selected individuals should be between 2 and "
337                        + numberOfIndividuals);
338    
339            // a comparator for comparing two programs according to their fitness
340            fitnessComparator = new Comparator<Program>() {
341                public int compare(Program p1, Program p2) {
342                    double diff = p1.getFitness() - p2.getFitness();
343                    if (diff > 0)
344                        return 1;
345                    else if (diff < 0)
346                        return -1;
347                    else
348                        return 0;
349                }
350            };
351    
352            startTime = System.nanoTime();        
353            
354            // System.out.println("If Java does not allocate enough memory on your system\n" +
355            //        "use \"java -Xmx128m Start\" to start the program. This allocates\n" +
356            //        "a maximum of 128 MB of memory. 64 MB are sufficient so it is usually\n" +
357            //        "not necessary to do this.\n");
358            
359            createIndividuals();
360    
361            // keep track of the fittest individual
362            // Arrays.sort(individuals, fitnessComparator);
363            fittestIndividual = getFittestIndividual(); //individuals[0];
364            fittestIndividualGeneration = 0;
365    
366            // create a population of individuals and print initial statistics
367            System.out.println("Initial Population:");
368            printStatistics(fittestIndividual);
369    
370            // variables used in the inner loop of the algorithm
371            int[] selectedIndividuals = new int[numberOfSelectedIndividuals];
372            Program[] newIndividuals = new Program[numberOfNewIndividuals];
373            Program[] tmp = new Program[2];
374    
375            // long startTime = System.currentTimeMillis();
376    
377    
378            int generation = 0;
379    
380            // MAIN LOOP
381            do {
382    
383                // sort individuals if necessary
384                if (selectionType == SelectionType.RANK_SELECTION
385                        || algorithmType == AlgorithmType.STEADY_STATE)
386                    Arrays.sort(individuals, fitnessComparator);          
387                
388                boolean showIndividuals = false;
389                
390                // Individuen ausgeben
391                if(showIndividuals) {
392                        System.out.println("GENERATION " + generation);
393                        for(Program p : individuals) {
394                            // TODO: rausfinden warum es Inkonsistenzen bei hybrid GP gibt
395                            // => liegt daran, dass DIG- und KAON2Reasoner bei getAtomicConcepts()
396                            // nicht klonen, sondern die Referenzen zurückgeben; dadurch können
397                            // die parent Links falsch sein; für hybrid GP nicht schlimm, aber für
398                            // gemischte Varianten schon
399                            // => alleine dort zu klonen reicht aber nicht, es müsste stattdessen
400                            // bei der Baumerzeugung geklont werden
401                            // GPUtilities.checkProgram(p);
402                            // if(generation % 5 == 0)
403                            System.out.println(p.getFitness() + " " + p.getTree());
404                            // System.out.println("      ADC " + p.getAdc());
405                        }
406                        System.out.println("<===>");
407                }
408                
409                // apply the configured selection algorithm
410                selectedIndividuals = selectIndividuals(generation);
411    
412                // produce offspring
413                for (int i = 0; i < numberOfSelectedIndividuals; i++) {
414                    double rand = Math.random();
415                    
416                    double crossoverBoundary = crossoverProbability;
417                    double mutationBoundary = crossoverBoundary + mutationProbability;
418                    double hillClimbingBoundary = mutationBoundary + hillClimbingProbability;
419                    double refinementBoundary = hillClimbingBoundary + refinementProbability;
420                    
421                    // wenn nur noch ein Individual zur Auswahl ist, dann
422                    // darf Crossover nicht genommen werden (falls es ausgewaehlt wird,
423                    // dann wird stattdessen reproduction genommen)
424                    if (rand < crossoverBoundary && i+1 != numberOfSelectedIndividuals) {
425                        // crossover
426                        tmp = GPUtilities.crossover(learningProblem,
427                                individuals[selectedIndividuals[i]],
428                                individuals[selectedIndividuals[i + 1]]);
429    
430                        //System.out.println(tmp[0].getTree());
431                        
432                        newIndividuals[i] = tmp[0];
433                        newIndividuals[i + 1] = tmp[1];
434                        
435                        // Incrementing i here is a significant code change! (2007/08/31)
436                        // This is done, because crossover uses two individuals as input and 
437                        // outputs two. Without the increment that second produced child is 
438                        // overwritten, which caused a significant loss in performance.
439                        i++;
440                    // mutation
441                    }  else if(rand >= crossoverBoundary && rand < mutationBoundary) {
442                            newIndividuals[i] = GPUtilities.mutation(learningProblem, reasoner, individuals[selectedIndividuals[i]]);
443                    // hill climbing
444                    } else if(rand >= mutationBoundary && rand < hillClimbingBoundary) {
445                            // System.out.println("hill climbing");
446                            newIndividuals[i] = GPUtilities.hillClimbing(learningProblem, reasoner, individuals[selectedIndividuals[i]]);
447                    // refinement operator
448                    } else if(rand >= hillClimbingBoundary && rand < refinementBoundary) {
449                            newIndividuals[i] = psi.applyOperator(individuals[selectedIndividuals[i]]);
450                    // reproduction
451                    } else {
452                            newIndividuals[i] = individuals[selectedIndividuals[i]];
453                    }
454                    
455                    /* alter Code
456                    else {
457                        // reproduction
458                        newIndividuals[i] = individuals[selectedIndividuals[i]];
459                        newIndividuals[i + 1] = individuals[selectedIndividuals[i + 1]];
460                    }
461    
462                    // mutate individuals
463                    if (Math.random() < mutationProbability) {
464                            // System.out.println(newIndividuals[i].getFitness() + " " +  newIndividuals[i].getTree());
465                        newIndividuals[i] = Utilities.mutation(newIndividuals[i]);
466                        // System.out.println(newIndividuals[i].getFitness() + " " +  newIndividuals[i].getTree());
467                        // System.out.println("====");
468                    }
469    
470                    if (Math.random() < mutationProbability)
471                        newIndividuals[i+1] = Utilities.mutation(newIndividuals[i + 1]);
472                    */
473                }
474    
475                // update fittest individual
476                Program chr = getFittestIndividual();
477                if (chr.getFitness() > fittestIndividual.getFitness()) {
478                    fittestIndividual = chr;
479                    fittestIndividualGeneration = generation;
480                }
481                // update fittest individual
482                /*
483                Program chr2 = getFittestValidIndividual();
484                if (chr2.getFitness() > fittestValidIndividual.getFitness()) {
485                    fittestValidIndividual = chr2;
486                    fittestValidIndividualGeneration = generation;
487                }
488                */
489    
490                // if elitism is used, the fittest individual is copied over to the
491                // new generation automatically
492                if (elitism)
493                    newIndividuals[numberOfNewIndividuals - 1] = fittestIndividual; 
494                
495                // the steady state algorithm replaces the weakest individuals by
496                // the new ones (note that the individuals are ordered by fitness,
497                // so the new individuals can just be copied over at the start of
498                // the array)
499                if (algorithmType == AlgorithmType.STEADY_STATE)
500                    System.arraycopy(newIndividuals, 0, individuals, 0,
501                            numberOfNewIndividuals);
502                // the generational algorithm replaces the whole generation
503                else
504                    System.arraycopy(newIndividuals, 0, individuals, 0,
505                            numberOfIndividuals);
506                
507                if (generation % 5 == 0) {
508                    System.out.println("Generation " + generation);
509                    printStatistics(fittestIndividual);
510                }
511                
512                // alle Individuen auf maximale Konzeptlänge überprüfen um mögliche
513                // Speicherprobleme zu verhindern
514                for(int i=0; i<numberOfIndividuals; i++) {
515                    if(individuals[i].getTree().getLength()>maxConceptLength) {
516                            System.out.println("Warning: GP produced concept longer then " + maxConceptLength + ". Replacing it with TOP.");
517                            individuals[i] = GPUtilities.createProgram(learningProblem, new Thing());
518                    }                       
519                }
520                
521                generation++;
522            } while ( (useFixedNumberOfGenerations && generation < generations)
523                    || (!useFixedNumberOfGenerations && (generation - fittestIndividualGeneration < postConvergenceGenerations)));
524    
525            // fittestIndividual.optimize();        
526            
527            long endTime = System.nanoTime(); // .currentTimeMillis();
528            // R�ckgabewert des Algorithmus speichern
529            bestScore = fittestIndividual.getScore();
530            bestConcept = fittestIndividual.getTree();
531    
532            // nachschauen, ob ev. noch bessere Konzepte im Psi-Cache sind
533            boolean betterValueFoundInPsiCache = false;
534            double bestValue = bestScore.getScoreValue();
535            
536            if(refinementProbability > 0) {
537                    // das Problem ist hier, dass die gecachte Score nicht unbedingt
538                    // der echten Score entsprechen muss, d.h. hier muss die
539                    // Konzeptlänge mit einberechnet werden => deswegen werden
540                    // neue Score-Objekte generiert
541                    Set<Entry<Description,ScorePosNeg>> entrySet = psi.evalCache.entrySet();
542                    for(Entry<Description,ScorePosNeg> entry : entrySet) {
543                            ScorePosNeg tmpScore = entry.getValue();
544                            Description c = entry.getKey();
545                            tmpScore = tmpScore.getModifiedLengthScore(c.getLength());
546                            double tmpScoreValue = tmpScore.getScoreValue();
547                            if(tmpScoreValue>bestValue) {
548                                    bestValue = tmpScoreValue;
549                                    betterValueFoundInPsiCache = true;
550                                    bestScore = tmpScore;
551                                    bestConcept = c;
552                            }
553                                    
554                    }
555            }
556            
557            System.out.println("final report");
558            System.out.println("============");
559            // System.out.println("fittest individual: " + fittestIndividual.getTree());
560            System.out.println("generations: " + generation);
561            System.out.println("fittest individual found after "
562                    + fittestIndividualGeneration + " generations");
563            System.out.println("runtime in ms: " + Helper.prettyPrintNanoSeconds(endTime - startTime));
564            System.out.println("fitness evaluations: "
565                    + GPUtilities.fitnessEvaluations);
566            if(refinementProbability > 0) {
567                    System.out.println("operator applications: " + psi.getNrOfRequests() + " psi, " + GPUtilities.crossover + " crossover, " +
568                                    GPUtilities.mutation + " mutation, " + GPUtilities.hillClimbing + " hillClimbing");
569            }
570            
571            //System.out.println("invalid-to-valid transformations by FROG optimization: "
572            //        + Program.treesOptimized); 
573            System.out.println();
574            printStatistics(fittestIndividual);
575            System.out.println(fittestIndividual.getScore());
576            if(betterValueFoundInPsiCache) {
577                    System.out.println("Found better solution in Psi-Cache:");
578                    System.out.println(bestConcept);
579                    int misClassifications = bestScore.getNotCoveredPositives().size()+
580                    bestScore.getCoveredNegatives().size();
581                    System.out.println("misclassifications: " + misClassifications + ", length " + bestConcept.getLength());
582            }
583              
584            /*
585            Collection<Concept> test = ReasonerComponent.retrievals;
586    //         for(Concept c : )
587            
588            test.removeAll(psi.evalCache.keySet());
589            for(Concept c : test)
590                    System.out.println(c);
591            */
592        }
593    
594        // Anmerkung: beim Lernen von DLs ist es besonders wichtig, dass schon bei der
595        // Initialisierung gute Teilbaeume herauskommen, d.h. nicht notwendigerweise Teilbaeume
596        // der gewuenschten Loesung, aber Baeume die solche Teilbaeume enthalten und außerdem
597        // Fehlklassifikationen vermeiden
598        private void createIndividuals() {
599            // trees are created by the ramped half and half method with a tree depth
600            // between 6 and 9
601            
602            // Lookuptable generieren
603            double[] functionValues = new double[initMaxDepth-initMinDepth+1];
604            double functionSum = 0;
605            double functionValue;
606            for(int i=initMinDepth; i<=initMaxDepth; i++) {
607                    functionValue = initFunction(i);
608                    functionSum += functionValue;
609                    functionValues[i-initMinDepth] = functionSum;
610            }
611            
612            // function-based-half-and-half
613            for(int i = 0; i< numberOfIndividuals; i++) {
614                    boolean grow = (Math.random()>0.5);
615                    
616                    int depth = getLookupTablePosition(functionValues,rand.nextDouble()*functionSum) + initMinDepth;
617                    // int depth = rand.nextInt(initMaxDepth-initMinDepth)+initMinDepth;
618                    
619                    if(grow)
620                            individuals[i] = GPUtilities.createGrowRandomProgram(learningProblem, reasoner, depth, adc);
621                    else
622                            individuals[i] = GPUtilities.createFullRandomProgram(learningProblem, reasoner, depth, adc);            
623            }       
624            
625            /*
626            for(int i = 0; i< numberOfIndividuals; i++) {
627                    double nr = Math.random();
628                    if(Math.random()>0.5) {
629                            if(nr<1/4d)
630                                    individuals[i]= GPUtilities.createFullRandomProgram(learningProblem, 2);
631                            else if(nr<2/4d)
632                                    individuals[i]= GPUtilities.createFullRandomProgram(learningProblem, 3);
633                            else if(nr<3/4d)
634                                    individuals[i]= GPUtilities.createFullRandomProgram(learningProblem, 4);
635                            else
636                                    individuals[i]= GPUtilities.createFullRandomProgram(learningProblem, 5);
637                    } else {
638                            if(nr<1/4d)
639                                    individuals[i]= GPUtilities.createGrowRandomProgram(learningProblem,3);
640                            else if(nr<2/4d)
641                                    individuals[i]= GPUtilities.createGrowRandomProgram(learningProblem,4);
642                            else if(nr<3/4d)
643                                    individuals[i]= GPUtilities.createGrowRandomProgram(learningProblem,5);
644                            else
645                                    individuals[i]= GPUtilities.createGrowRandomProgram(learningProblem,6);                 
646                    }               
647            }
648            */
649            
650            
651            /*
652            for (int i = 0; i < numberOfIndividuals / 2; i = i + 4) {
653                individuals[i] = Utilities.createFullRandomProgram(2);
654                individuals[i + 1] = Utilities.createFullRandomProgram(3);
655                individuals[i + 2] = Utilities.createFullRandomProgram(4);
656                individuals[i + 3] = Utilities.createFullRandomProgram(5);
657            }
658    
659            for (int i = numberOfIndividuals / 2; i < numberOfIndividuals; i = i + 4) {
660                individuals[i] = Utilities.createGrowRandomProgram(3);
661                individuals[i + 1] = Utilities.createGrowRandomProgram(4);
662                individuals[i + 2] = Utilities.createGrowRandomProgram(5);
663                individuals[i + 3] = Utilities.createGrowRandomProgram(6);
664            }
665            */
666        }
667        
668        private double initFunction(int x) {
669            // Funktion f(x)=x
670            return 1;
671        }
672    
673        private int getIndividualsForRankSelection(int generation) {
674            // return a value how many of the best individuals will actually
675            // be used by rank selection (in the standard version this is
676            // numberOfIndividuals)
677            /*
678            double factor = 0.5*(Math.tanh(0.125*(generation-30))+1);
679            if(factor*generation<20)
680                return 50;
681            else
682                return (int)Math.round(factor*generation);
683            */
684            return numberOfSelectedIndividuals;
685        }
686    
687        // es wird einfach nur ein Array von Positionen zur�ckgegeben, d.h. es
688        // werden keine Individuen kopiert etc.
689        // TODO: man muss �berlegen, ob es Vorteile hat hier direkt Programme zu
690        // kopieren => dann m�sste man bei Crossover und Mutation nicht aufpassen,
691        // dass man keine Programmb�ume modifiziert
692        private int[] selectIndividuals(int generation) {
693            int[] positions = new int[numberOfSelectedIndividuals];
694            if (selectionType == SelectionType.FPS) {
695    
696                // select individuals according to FPS
697                double[] lookupTable = getFitnessLookupTable(false);
698                double fitnessSum = lookupTable[numberOfIndividuals - 1];
699                double rand;
700    
701                for (int i = 0; i < numberOfSelectedIndividuals; i++) {
702                    // create a random number between 0 and fitness sum
703                    rand = Math.random() * fitnessSum;
704                    positions[i] = getLookupTablePosition(lookupTable, rand);
705                }
706            } else if (selectionType == SelectionType.RANK_SELECTION) {
707                int individualsForRankSelection = getIndividualsForRankSelection(generation);
708                double[] lookupTable = new double[individualsForRankSelection];
709                double sum = 0;
710                double rand, val;
711                // create a lookup-table
712                for (int i = 0; i < individualsForRankSelection; i++) {
713                    // you can use any function here
714                    // fittestes Individuum 10mal wahrscheinlicher als schlechtestes
715                    // Individuum (relativ schwache Selektion für MLDM-Paper, da
716                    // im hybrid GP immer stetig Richtung Lösung gegangen wird, also
717                    // es sich oft auch lohnt weniger fitte Individuen zu wählen)
718                    val = i + individualsForRankSelection / 9; // 8
719                    // einfachste rank funktion
720                    // val = i;
721                    sum += val;
722                    lookupTable[i] = sum;
723                }
724    
725                for (int i = 0; i < numberOfSelectedIndividuals; i++) {
726                    // create a random number between 0 and sum
727                    rand = Math.random() * sum;
728                    positions[i] = numberOfIndividuals - individualsForRankSelection
729                            + getLookupTablePosition(lookupTable, rand);
730                }
731            } else if (selectionType == SelectionType.TOURNAMENT_SELECTION) {
732                    for(int i = 0; i < numberOfSelectedIndividuals; i++) {
733                    // choose the appropriate number of individuals randomly
734                    int[] randomIndividuals = new int[tournamentSize];
735                    for(int j=0; j<tournamentSize; j++) {
736                            randomIndividuals[j] = rand.nextInt(numberOfIndividuals);
737                    }
738                    
739                    double fitness = individuals[randomIndividuals[0]].getFitness();
740                    int pos = 0;
741                    for (int j = 1; j < tournamentSize; j++) {
742                        if (individuals[randomIndividuals[j]].getFitness() > fitness) {
743                            fitness = individuals[randomIndividuals[j]].getFitness();
744                            pos = j;
745                        }
746                    }               
747                    positions[i] = pos;
748                    }
749            }
750    
751            return positions;
752        }
753    
754        private double[] getFitnessLookupTable(boolean allowNegativeFitness) {
755            // we sum up the fitness for all individuals and store the sums
756            // such that we can later easily determine, which individual we
757            // have chosen (the last entry in the lookup table is the sum
758            // of the fitness of all individuals)
759            double[] lookupTable = new double[numberOfIndividuals];
760            double sum = 0, fitness;
761    
762            // sum up the fitness for all individuals
763            for (int i = 0; i < numberOfIndividuals; i++) {
764                    fitness = individuals[i].getFitness();
765                    if(!allowNegativeFitness && fitness<0)
766                            error("Negative fitness value " + fitness + " in FPS!");
767                sum += fitness;
768                lookupTable[i] = sum;
769            }
770    
771            return lookupTable;
772        }
773    
774        private int getLookupTablePosition(double[] lookupTable, double rand) {
775            // look up the position of the random number in the table
776            // for (int i = 0; i < numberOfIndividuals; i++) {
777            for (int i = 0; i < lookupTable.length; i++) {
778                if (rand <= lookupTable[i])
779                    return i;
780            }
781            // if no position was found, an error has occured
782            throw new Error();
783        }
784    
785        private double getFitnessSum() {
786            double sum = 0;
787    
788            // sum up the fitness for all individuals
789            for (int i = 0; i < numberOfIndividuals; i++) {
790                sum += individuals[i].getFitness();
791            }
792    
793            return sum;
794        }
795    
796        private Program getFittestIndividual() {
797            double fitness = individuals[0].getFitness();
798            int pos = 0;
799            for (int i = 1; i < numberOfIndividuals; i++) {
800                if (individuals[i].getFitness() > fitness) {
801                    fitness = individuals[i].getFitness();
802                    pos = i;
803                }
804            }
805            return individuals[pos];
806        }
807        
808        /*
809        private Program getFittestValidIndividual() {
810            double fitness = 0;
811            // individuals[0].getFitness();
812            int pos = 0;
813            for (int i = 0; i < numberOfIndividuals; i++) {
814                if (individuals[i].getFitness() > fitness
815                        && individuals[i].isValidSolution()) {
816                    fitness = individuals[i].getFitness();
817                    pos = i;
818                }
819            }
820            return individuals[pos];
821        }
822        */
823        
824        private void printStatistics(Program fittestIndividual) {
825            // output statistics: best individual, average fitness, etc.
826            double averageFitness = getFitnessSum() / numberOfIndividuals;
827            Description n = fittestIndividual.getTree();
828    
829            int misClassifications = fittestIndividual.getScore().getNotCoveredPositives().size()
830            + fittestIndividual.getScore().getCoveredNegatives().size();
831            
832            System.out.println("average fitness: " + averageFitness);
833            System.out.println("highest fitness: " + fittestIndividual.getFitness() + " ["+misClassifications+" misclassifcations, length "+n.getLength()+"]");
834            
835            // durchschnittliche Konzeptlänge berechnen (wahrscheinlich zeitaufwändig,
836            // also nicht standardmäßig machen)
837            int conceptLengthSum = 0;
838            for(Program p : individuals)
839                    conceptLengthSum += p.getTree().getLength();
840            double conceptLengthAverage = conceptLengthSum/(double)individuals.length;
841            System.out.println("average concept length: " + df.format(conceptLengthAverage));
842            // long algorithmTime = System.nanoTime() - Main.getAlgorithmStartTime();
843            long algorithmTime = System.nanoTime() - startTime;
844            System.out.println("overall algorithm runtime: " + Helper.prettyPrintNanoSeconds(algorithmTime));
845            // System.out.println("instance checks: " + learningProblem.getReasonerComponent().getNrOfInstanceChecks());
846            // System.out.println("fitness evals: " + Program.fitnessEvaluations);
847            // System.out.println("nr. of individuals: " + individuals.length + " (" + numberOfSelectedIndividuals + " selected)");
848            
849            
850            // für temporäre Performance-Werte
851            // double some = 100*(psi.someTime/(double)algorithmTime);
852            // System.out.println("some: " + df.format(some) + "%");
853            
854            // System.out.println();
855            System.out.println("best definition found: " + n);
856            if(refinementProbability > 0) {
857                    double cacheHitRate=0;
858                    double pdCacheHitRate=0, puCacheHitRate=0;
859                    if(psi.getNrOfRequests()>0) {
860                            cacheHitRate = 100*(psi.getConceptCacheHits()/(double)psi.getNrOfRequests());
861                            pdCacheHitRate = 100*(psi.getPdCacheHits()/(double)psi.getPdRequests());
862                            puCacheHitRate = 100*(psi.getPuCacheHits()/(double)psi.getPuRequests());
863                    }
864                    System.out.println("Psi down cache: " + psi.getPdCache().size() + "; " + psi.getPdRequests() + " requests; hit rate " + df.format(pdCacheHitRate) + "%");
865                    System.out.println("Psi up cache: " + psi.getPuCache().size() + "; " + psi.getPuRequests() + " requests; hit rate " + df.format(puCacheHitRate) + "%");
866                    System.out.println("Psi cache: size " + psi.getCacheSize() + "; " + psi.getNrOfRequests() + " requests; hit rate " + df.format(cacheHitRate) + "%");
867                    
868                    double psiTimePercent = 100*psi.getPsiApplicationTimeNs()/(double)algorithmTime;
869                    double psiWOReasoningTimePercent = 100*(psi.getPsiApplicationTimeNs()-psi.getPsiReasoningTimeNs())/(double)algorithmTime;
870                    // System.out.println(Helper.prettyPrintNanoSeconds(psi.getPsiApplicationTimeNs()));
871                    // System.out.println(Helper.prettyPrintNanoSeconds(psi.getPsiReasoningTimeNs()));
872                    System.out.println("Psi application time percentage: " + df.format(psiTimePercent) + "%; " + df.format(psiWOReasoningTimePercent) + "% excluding reasoning");
873            }
874                    
875            if(adc)
876                    System.out.println("ADC: " + fittestIndividual.getAdc());
877            // TODO: hier muss noch eine Zusammenfassung rein, die sowohl f�r zweiwertiges als auch
878            // dreiwertiges Lernproblem funktioniert
879            // System.out.println("classified positive: " + fittestIndividual.getScore().getDefPosSet());
880            // System.out.println("classified negative: " + fittestIndividual.getScore().getDefNegSet());
881            System.out.println();
882        }
883    
884        /**
885         * Sets the probability that crossover is performed. If crossover is not
886         * performed individuals are cloned, i.e. copied over to the next
887         * generation.
888         * 
889         * @param crossoverProbability
890         *            A number between 0 and 1.
891         */
892        //public void setCrossoverProbability(float crossoverProbability) {
893        //    if (crossoverProbability >= 0.0 && crossoverProbability <= 1.0)
894        //        this.crossoverProbability = crossoverProbability;
895        //    else
896        //        error("Probability for crossover must be a value between 0 and 1.");
897        //}
898    
899        /**
900         * Sets the probability that an individual is mutated.
901         * 
902         * @param mutationProbability
903         *            A number between 0 and 1.
904         */
905        //public void setMutationProbability(float mutationProbability) {
906        //    if (mutationProbability >= 0.0 && mutationProbability <= 1.0)
907        //        this.mutationProbability = mutationProbability;
908        //    else
909        //        error("Probability for mutation must be a value between 0 and 1.");
910        //}
911    
912        /**
913         * Switch for turning elitism on or off. Elitism means that the fittest
914         * individual is copied over to the next generation automatically and cannot
915         * be lost.
916         * 
917         * @param elitism
918         *            True if elitism should be actived, false otherwise.
919         */
920        //public void setElitism(boolean elitism) {
921        //    this.elitism = elitism;
922        //}
923    
924        /**
925         * This method controls how many individuals will actually be selected and
926         * transfered to the mating pool in a steady state algorithm (it is ignored
927         * in a generational algorithm).
928         * 
929         * @param numberOfSelectedIndividuals
930         *            Number of individuals, which will be selected.
931         */
932        //public void setNumberOfSelectedIndividuals(int numberOfSelectedIndividuals) {
933        //    if (algorithmType == AlgorithmType.STEADY_STATE)
934        //        this.numberOfSelectedIndividuals = numberOfSelectedIndividuals;
935        //    else
936        //        System.out.println("Warning: number of selected individuals "
937        //                + "will be ignored in a generational algorithm");
938        //}
939    
940        private void error(String errorMessage) {
941            // prints an error message and exits
942            System.out.println("Error: " + errorMessage);
943            System.out
944                    .println("Please correct all errors and run the algorithm again.");
945            System.exit(0);
946        }
947    
948    //    @Override
949            public ScorePosNeg getSolutionScore() {
950                    return bestScore;
951            }
952    
953            @Override
954            public Description getCurrentlyBestDescription() {
955                    return bestConcept;
956            }    
957        
958            @Override
959            public EvaluatedDescriptionPosNeg getCurrentlyBestEvaluatedDescription() {
960                    // return fittestIndividual.getTree();
961                    return new EvaluatedDescriptionPosNeg(bestConcept,bestScore);
962            }
963    
964            @Override
965            public void stop() {
966                    // TODO Auto-generated method stub
967                    
968            }
969    
970            /* (non-Javadoc)
971             * @see org.dllearner.core.LearningAlgorithm#isRunning()
972             */
973            @Override
974            public boolean isRunning() {
975                    // TODO Auto-generated method stub
976                    return false;
977            }
978    
979        /**
980         * This allows to set the number of individuals in the whole population. A
981         * higher value slows down the algorithm, but will in general improve
982         * accuracy of the results.
983         * 
984         * @param numberOfIndividuals
985         *            Number of individuals in the population.
986         */
987        //public void setNumberOfIndividuals(int numberOfIndividuals) {
988        //    this.numberOfIndividuals = numberOfIndividuals;
989        //}
990    
991        /**
992         * The algorithm converges, if it doesn't find a fitter individual after a
993         * number of generations. This number controls for how many generations the
994         * algorithm will try to find a fitter individual.
995         * 
996         * @param postConvergenceGenerations
997         *            A number specifying after how many generations without a new
998         *            fittest individual the algorithm should stop.
999         */
1000        //public void setPostConvergenceGenerations(int postConvergenceGenerations) {
1001        //    this.postConvergenceGenerations = postConvergenceGenerations;
1002        //}
1003    
1004        /**
1005         * This method switches the type of selection, which is performed. It can be
1006         * either Fitness Proportionate Selection (FPS) or Rank Selection.
1007         * 
1008         * @param selectionType
1009         *            Either FPS or RANK_SELECTION.
1010         */
1011        //public void setSelectionType(SelectionType selectionType) {
1012        //    this.selectionType = selectionType;
1013        //}
1014    
1015        /**
1016         * This switch controls, which type of algorithm is used.
1017         * 
1018         * @param algorithmType
1019         *            STEADY_STATE or GENERATIONAL.
1020         */
1021        //public void setAlgorithmType(AlgorithmType algorithmType) {
1022        //    this.algorithmType = algorithmType;
1023        //}
1024            
1025    }