001    /**
002     * Copyright (C) 2007-2011, Jens Lehmann
003     *
004     * This file is part of DL-Learner.
005     *
006     * DL-Learner is free software; you can redistribute it and/or modify
007     * it under the terms of the GNU General Public License as published by
008     * the Free Software Foundation; either version 3 of the License, or
009     * (at your option) any later version.
010     *
011     * DL-Learner is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014     * GNU General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018     */
019    
020    package org.dllearner.algorithms.celoe;
021    
022    import java.io.File;
023    import java.text.DecimalFormat;
024    import java.util.Collection;
025    import java.util.Iterator;
026    import java.util.LinkedList;
027    import java.util.List;
028    import java.util.Map;
029    import java.util.Set;
030    import java.util.SortedSet;
031    import java.util.TreeSet;
032    
033    import org.apache.log4j.Logger;
034    import org.dllearner.core.ComponentAnn;
035    import org.dllearner.core.ComponentInitException;
036    import org.dllearner.core.EvaluatedDescription;
037    import org.dllearner.core.AbstractCELA;
038    import org.dllearner.core.AbstractLearningProblem;
039    import org.dllearner.core.AbstractReasonerComponent;
040    import org.dllearner.core.configurators.CELOEConfigurator;
041    import org.dllearner.core.options.BooleanConfigOption;
042    import org.dllearner.core.options.CommonConfigMappings;
043    import org.dllearner.core.options.CommonConfigOptions;
044    import org.dllearner.core.options.ConfigOption;
045    import org.dllearner.core.options.DoubleConfigOption;
046    import org.dllearner.core.options.StringConfigOption;
047    import org.dllearner.core.owl.ClassHierarchy;
048    import org.dllearner.core.owl.Description;
049    import org.dllearner.core.owl.Individual;
050    import org.dllearner.core.owl.Intersection;
051    import org.dllearner.core.owl.NamedClass;
052    import org.dllearner.core.owl.Restriction;
053    import org.dllearner.core.owl.Thing;
054    import org.dllearner.learningproblems.ClassLearningProblem;
055    import org.dllearner.learningproblems.PosNegLP;
056    import org.dllearner.learningproblems.PosNegLPStandard;
057    import org.dllearner.learningproblems.PosOnlyLP;
058    import org.dllearner.refinementoperators.OperatorInverter;
059    import org.dllearner.refinementoperators.RefinementOperator;
060    import org.dllearner.refinementoperators.RhoDRDown;
061    import org.dllearner.utilities.Files;
062    import org.dllearner.utilities.Helper;
063    import org.dllearner.utilities.owl.ConceptComparator;
064    import org.dllearner.utilities.owl.ConceptTransformation;
065    import org.dllearner.utilities.owl.DescriptionMinimizer;
066    import org.dllearner.utilities.owl.EvaluatedDescriptionSet;
067    import org.dllearner.utilities.owl.PropertyContext;
068    
069    import com.jamonapi.Monitor;
070    import com.jamonapi.MonitorFactory;
071    
072    /**
073     * The CELOE (Class Expression Learner for Ontology Engineering) algorithm.
074     * It adapts and extends the standard supervised learning algorithm for the
075     * ontology engineering use case. 
076     * 
077     * @author Jens Lehmann
078     *
079     */
080    @ComponentAnn(name="CELOE", shortName="celoe", version=1.0, description="CELOE is an adapted and extended version of the OCEL algorithm applied for the ontology engineering use case. See http://jens-lehmann.org/files/2011/celoe.pdf for reference.")
081    public class CELOE extends AbstractCELA {
082    
083            private static Logger logger = Logger.getLogger(CELOE.class);
084            private CELOEConfigurator configurator;
085            
086            private boolean isRunning = false;
087            private boolean stop = false;   
088            
089    //      private OEHeuristicStable heuristicStable = new OEHeuristicStable();
090    //      private OEHeuristicRuntime heuristicRuntime = new OEHeuristicRuntime();
091            
092            private RefinementOperator operator;
093            private DescriptionMinimizer minimizer;
094            
095            // all nodes in the search tree (used for selecting most promising node)
096            private TreeSet<OENode> nodes;
097            private OEHeuristicRuntime heuristic; // = new OEHeuristicRuntime();
098            // root of search tree
099            private OENode startNode;
100            // the class with which we start the refinement process
101            private Description startClass;
102            
103            // all descriptions in the search tree plus those which were too weak (for fast redundancy check)
104            private TreeSet<Description> descriptions;
105            
106            private EvaluatedDescriptionSet bestEvaluatedDescriptions;
107            
108            // if true, then each solution is evaluated exactly instead of approximately
109            // private boolean exactBestDescriptionEvaluation = false;
110            private boolean singleSuggestionMode;
111            private Description bestDescription;
112            private double bestAccuracy = Double.MIN_VALUE;
113            
114            private NamedClass classToDescribe;
115            // examples are either 1.) instances of the class to describe 2.) positive examples
116            // 3.) union of pos.+neg. examples depending on the learning problem at hand
117            private Set<Individual> examples;
118            
119            // CELOE was originally created for learning classes in ontologies, but also
120            // works for other learning problem types
121            private boolean isClassLearningProblem;
122            private boolean isEquivalenceProblem;
123            
124            private long nanoStartTime;
125            
126            // important parameters
127            private double noise;
128            private double maxDepth;
129            private boolean filterFollowsFromKB;    
130            
131            // less important parameters
132            // forces that one solution cannot be subexpression of another expression; this option is useful to get diversity
133            // but it can also suppress quite useful expressions
134            private boolean forceMutualDifference = false;
135            
136            // utility variables
137            private String baseURI;
138            private Map<String, String> prefixes;
139            private DecimalFormat dfPercent = new DecimalFormat("0.00%");
140            private ConceptComparator descriptionComparator = new ConceptComparator();
141            
142            // statistical variables
143            private int expressionTests = 0;
144            private int minHorizExp = 0;
145            private int maxHorizExp = 0;
146            
147            @Override
148            public CELOEConfigurator getConfigurator() {
149                    return configurator;
150            }
151            
152            public CELOE(AbstractLearningProblem problem, AbstractReasonerComponent reasoner) {
153                    super(problem, reasoner);
154                    configurator = new CELOEConfigurator(this);
155            }
156    
157            public static Collection<Class<? extends AbstractLearningProblem>> supportedLearningProblems() {
158                    Collection<Class<? extends AbstractLearningProblem>> problems = new LinkedList<Class<? extends AbstractLearningProblem>>();
159                    problems.add(AbstractLearningProblem.class);
160                    return problems;
161            }       
162            
163            public static Collection<ConfigOption<?>> createConfigOptions() {
164                    Collection<ConfigOption<?>> options = new LinkedList<ConfigOption<?>>();
165                    options.add(CommonConfigOptions.useAllConstructor());
166                    options.add(CommonConfigOptions.useExistsConstructor());
167                    options.add(CommonConfigOptions.useHasValueConstructor());
168                    options.add(CommonConfigOptions.useDataHasValueConstructor());
169                    options.add(CommonConfigOptions.valueFreqencyThreshold());
170                    options.add(CommonConfigOptions.useCardinalityRestrictions());
171                    options.add(CommonConfigOptions.cardinalityLimit());
172                    // by default, we do not use negation (should be configurable in GUI)
173                    options.add(CommonConfigOptions.useNegation(false));
174                    options.add(CommonConfigOptions.useBooleanDatatypes());
175                    options.add(CommonConfigOptions.useDoubleDatatypes());
176                    options.add(CommonConfigOptions.maxExecutionTimeInSeconds(10));
177                    options.add(CommonConfigOptions.getNoisePercentage());
178                    options.add(CommonConfigOptions.getTerminateOnNoiseReached(false));
179                    options.add(CommonConfigOptions.getMaxDepth(7));
180                    options.add(CommonConfigOptions.maxNrOfResults(10));
181                    options.add(CommonConfigOptions.maxClassDescriptionTests());
182                    options.add(new BooleanConfigOption("singleSuggestionMode", "Use this if you are interested in only one suggestion and your learning problem has many (more than 1000) examples.", false));
183                    options.add(CommonConfigOptions.getInstanceBasedDisjoints());
184                    options.add(new BooleanConfigOption("filterDescriptionsFollowingFromKB", "If true, then the results will not contain suggestions, which already follow logically from the knowledge base. Be careful, since this requires a potentially expensive consistency check for candidate solutions.", false));
185                    options.add(new BooleanConfigOption("reuseExistingDescription", "If true, the algorithm tries to find a good starting point close to an existing definition/super class of the given class in the knowledge base.", false));
186                    options.add(new BooleanConfigOption("writeSearchTree", "specifies whether to write a search tree", false));
187                    options.add(new StringConfigOption("searchTreeFile","file to use for the search tree", "log/searchTree.txt"));
188                    options.add(new BooleanConfigOption("replaceSearchTree","specifies whether to replace the search tree in the log file after each run or append the new search tree", false));
189                    options.add(new DoubleConfigOption("expansionPenaltyFactor","heuristic penalty per syntactic construct used (lower = finds more complex expression, but might miss simple ones)", 0.1));
190                    options.add(CommonConfigOptions.allowedConcepts());
191                    options.add(CommonConfigOptions.ignoredConcepts());
192                    return options;
193            }
194            
195            public static String getName() {
196                    return "CELOE";
197            }
198            
199            @Override
200            public void init() throws ComponentInitException {
201                    
202                    // compute used concepts/roles from allowed/ignored
203                    // concepts/roles
204                    Set<NamedClass> usedConcepts;
205                    Set<NamedClass> allowedConcepts = configurator.getAllowedConcepts()==null ? null : CommonConfigMappings.getAtomicConceptSet(configurator.getAllowedConcepts());
206                    Set<NamedClass> ignoredConcepts = configurator.getIgnoredConcepts()==null ? null : CommonConfigMappings.getAtomicConceptSet(configurator.getIgnoredConcepts());
207                    if(allowedConcepts != null) {
208                            // sanity check to control if no non-existing concepts are in the list
209                            Helper.checkConcepts(reasoner, allowedConcepts);
210                            usedConcepts = allowedConcepts;
211                    } else if(ignoredConcepts != null) {
212                            usedConcepts = Helper.computeConceptsUsingIgnoreList(reasoner, ignoredConcepts);
213                    } else {
214                            usedConcepts = Helper.computeConcepts(reasoner);
215                    }
216                    
217                    // copy class hierarchy and modify it such that each class is only
218                    // reachable via a single path
219    //              ClassHierarchy classHierarchy = reasoner.getClassHierarchy().clone();
220                    ClassHierarchy classHierarchy = reasoner.getClassHierarchy().cloneAndRestrict(usedConcepts);
221                    classHierarchy.thinOutSubsumptionHierarchy();
222                    
223                    heuristic = new OEHeuristicRuntime(configurator);
224                    
225                    minimizer = new DescriptionMinimizer(reasoner);
226                    
227                    startClass = Thing.instance;
228                    
229                    singleSuggestionMode = configurator.getSingleSuggestionMode();
230                    
231                    // create refinement operator
232                    operator = new RhoDRDown(reasoner, classHierarchy, startClass, configurator);
233                    baseURI = reasoner.getBaseURI();
234                    prefixes = reasoner.getPrefixes();              
235                    if(configurator.getWriteSearchTree()) {
236                            File f = new File(configurator.getSearchTreeFile());
237    //                      System.out.println(f.getAbsolutePath());
238                            Files.clearFile(f);
239                    }
240                    
241                    bestEvaluatedDescriptions = new EvaluatedDescriptionSet(configurator.getMaxNrOfResults());
242                    
243                    isClassLearningProblem = (learningProblem instanceof ClassLearningProblem);
244                    
245                    // we put important parameters in class variables
246                    noise = configurator.getNoisePercentage()/100d;
247    //              System.out.println("noise " + noise);
248                    maxDepth = configurator.getMaxDepth();
249                    // (filterFollowsFromKB is automatically set to false if the problem
250                    // is not a class learning problem
251                    filterFollowsFromKB = configurator.getFilterDescriptionsFollowingFromKB()
252                      && isClassLearningProblem;
253                    
254                    // actions specific to ontology engineering
255                    if(isClassLearningProblem) {
256                            ClassLearningProblem problem = (ClassLearningProblem) learningProblem;
257                            classToDescribe = problem.getClassToDescribe();
258                            isEquivalenceProblem = problem.isEquivalenceProblem();
259                            
260                            examples = reasoner.getIndividuals(classToDescribe);
261                            
262                            // start class: intersection of super classes for definitions (since it needs to
263                            // capture all instances), but owl:Thing for learning subclasses (since it is
264                            // superfluous to add super classes in this case)
265                            if(isEquivalenceProblem) {
266                                    Set<Description> existingDefinitions = reasoner.getAssertedDefinitions(classToDescribe);
267                                    if(configurator.getReuseExistingDescription() && (existingDefinitions.size() > 0)) {
268                                            // the existing definition is reused, which in the simplest case means to
269                                            // use it as a start class or, if it is already too specific, generalise it
270                                            
271                                            // pick the longest existing definition as candidate
272                                            Description existingDefinition = null;
273                                            int highestLength = 0;
274                                            for(Description exDef : existingDefinitions) {
275                                                    if(exDef.getLength() > highestLength) {
276                                                            existingDefinition = exDef;
277                                                            highestLength = exDef.getLength();
278                                                    }
279                                            }
280                                            
281                                            LinkedList<Description> startClassCandidates = new LinkedList<Description>();
282                                            startClassCandidates.add(existingDefinition);
283                                            ((RhoDRDown)operator).setDropDisjuncts(true);
284                                            RefinementOperator upwardOperator = new OperatorInverter(operator);
285                                            
286                                            // use upward refinement until we find an appropriate start class
287                                            boolean startClassFound = false;
288                                            Description candidate;
289                                            do {
290                                                    candidate = startClassCandidates.pollFirst();
291                                                    if(((ClassLearningProblem)learningProblem).getRecall(candidate)<1.0) {
292                                                            // add upward refinements to list
293                                                            Set<Description> refinements = upwardOperator.refine(candidate, candidate.getLength());
294    //                                                      System.out.println("ref: " + refinements);
295                                                            LinkedList<Description> refinementList = new LinkedList<Description>(refinements);
296    //                                                      Collections.reverse(refinementList);
297    //                                                      System.out.println("list: " + refinementList);
298                                                            startClassCandidates.addAll(refinementList);
299    //                                                      System.out.println("candidates: " + startClassCandidates);
300                                                    } else {
301                                                            startClassFound = true;
302                                                    }
303                                            } while(!startClassFound);
304                                            startClass = candidate;
305                                            
306                                            if(startClass.equals(existingDefinition)) {
307                                                    logger.info("Reusing existing description " + startClass.toManchesterSyntaxString(baseURI, prefixes) + " as start class for learning algorithm.");
308                                            } else {
309                                                    logger.info("Generalised existing description " + existingDefinition.toManchesterSyntaxString(baseURI, prefixes) + " to " + startClass.toManchesterSyntaxString(baseURI, prefixes) + ", which is used as start class for the learning algorithm.");
310                                            }
311                                            
312    //                                      System.out.println("start class: " + startClass);
313    //                                      System.out.println("existing def: " + existingDefinition);
314    //                                      System.out.println(reasoner.getIndividuals(existingDefinition));
315                                            
316                                            ((RhoDRDown)operator).setDropDisjuncts(false);
317                                            
318                                    } else {
319                                            Set<Description> superClasses = reasoner.getClassHierarchy().getSuperClasses(classToDescribe);
320                                            if(superClasses.size() > 1) {
321                                                    startClass = new Intersection(new LinkedList<Description>(superClasses));
322                                            } else if(superClasses.size() == 1){
323                                                    startClass = (Description) superClasses.toArray()[0];
324                                            } else {
325                                                    startClass = Thing.instance;
326                                                    logger.warn(classToDescribe + " is equivalent to owl:Thing. Usually, it is not " +
327                                                                    "sensible to learn a description in this case.");
328                                            }                                       
329                                    }
330                            }                               
331                    } else if(learningProblem instanceof PosOnlyLP) {
332                            examples = ((PosOnlyLP)learningProblem).getPositiveExamples();
333                    } else if(learningProblem instanceof PosNegLP) {
334                            examples = Helper.union(((PosNegLP)learningProblem).getPositiveExamples(),((PosNegLP)learningProblem).getNegativeExamples());
335                    }
336            }
337    
338            @Override
339            public Description getCurrentlyBestDescription() {
340                    EvaluatedDescription ed = getCurrentlyBestEvaluatedDescription();
341                    return ed == null ? null : ed.getDescription();
342            }
343    
344            @Override
345            public List<Description> getCurrentlyBestDescriptions() {
346                    return bestEvaluatedDescriptions.toDescriptionList();
347            }
348            
349            @Override
350            public EvaluatedDescription getCurrentlyBestEvaluatedDescription() {
351                    return bestEvaluatedDescriptions.getBest();
352            }       
353            
354            @Override
355            public TreeSet<? extends EvaluatedDescription> getCurrentlyBestEvaluatedDescriptions() {
356                    return bestEvaluatedDescriptions.getSet();
357            }       
358            
359            public double getCurrentlyBestAccuracy() {
360                    return bestEvaluatedDescriptions.getBest().getAccuracy();
361            }
362            
363            @Override
364            public void start() {
365    //              System.out.println(configurator.getMaxExecutionTimeInSeconds());
366                    
367                    stop = false;
368                    isRunning = true;
369                    reset();
370                    nanoStartTime = System.nanoTime();
371                    
372                    // highest accuracy so far
373                    double highestAccuracy = 0.0;
374                    OENode nextNode;
375    
376                    addNode(startClass, null);
377                    
378                    int loop = 0;
379                    while (!terminationCriteriaSatisfied()) {
380    //                      System.out.println("loop " + loop);
381                            
382                            if(!singleSuggestionMode && bestEvaluatedDescriptions.getBestAccuracy() > highestAccuracy) {
383                                    highestAccuracy = bestEvaluatedDescriptions.getBestAccuracy();
384                                    logger.info("more accurate (" + dfPercent.format(highestAccuracy) + ") class expression found: " + descriptionToString(bestEvaluatedDescriptions.getBest().getDescription()));  
385                            }
386    
387                            // chose best node according to heuristics
388                            nextNode = getNextNodeToExpand();
389                            int horizExp = nextNode.getHorizontalExpansion();
390                            
391                            // apply operator
392                            Monitor mon = MonitorFactory.start("refineNode");
393                            TreeSet<Description> refinements = refineNode(nextNode);
394                            mon.stop();
395                                    
396    //                      System.out.println("next node: " + nextNode);
397    //                      for(Description refinement : refinements) {
398    //                              System.out.println("refinement: " + refinement);
399    //                      }
400                            
401                            while(refinements.size() != 0) {
402                                    // pick element from set
403                                    Description refinement = refinements.pollFirst();
404                                    int length = refinement.getLength();
405                                    
406                                    // we ignore all refinements with lower length and too high depth
407                                    // (this also avoids duplicate node children)
408                                    if(length > horizExp && refinement.getDepth() <= maxDepth) {
409                                            
410    //                                      System.out.println("potentially adding " + refinement + " to search tree as child of " + nextNode + " " + new Date());
411                                            Monitor mon2 = MonitorFactory.start("addNode");
412                                            addNode(refinement, nextNode);
413                                            mon2.stop();
414                                            // adding nodes is potentially computationally expensive, so we have
415                                            // to check whether max time is exceeded        
416                                            if(terminationCriteriaSatisfied()) {
417                                                    break;
418                                            }
419    //                                      System.out.println("addNode finished" + " " + new Date());
420                                    }
421                    
422    //                              System.out.println("  refinement queue length: " + refinements.size());
423                            }
424                            
425                            updateMinMaxHorizExp(nextNode);
426                            
427                            // writing the search tree (if configured)
428                            if (configurator.getWriteSearchTree()) {
429                                    String treeString = "best node: " + bestEvaluatedDescriptions.getBest() + "\n";
430                                    if (refinements.size() > 1) {
431                                            treeString += "all expanded nodes:\n";
432                                            for (Description n : refinements) {
433                                                    treeString += "   " + n + "\n";
434                                            }
435                                    }
436                                    treeString += startNode.toTreeString(baseURI);
437                                    treeString += "\n";
438    
439                                    if (configurator.getReplaceSearchTree())
440                                            Files.createFile(new File(configurator.getSearchTreeFile()), treeString);
441                                    else
442                                            Files.appendFile(new File(configurator.getSearchTreeFile()), treeString);
443                            }
444                            
445    //                      System.out.println(loop);
446                            loop++;
447                    }
448                    
449                    if (stop) {
450                            logger.info("Algorithm stopped ("+expressionTests+" descriptions tested). " + nodes.size() + " nodes in the search tree.\n");
451                    } else {
452                            logger.info("Algorithm terminated successfully ("+expressionTests+" descriptions tested). "  + nodes.size() + " nodes in the search tree.\n");
453                    }
454    
455                    if(singleSuggestionMode) {
456                            bestEvaluatedDescriptions.add(bestDescription, bestAccuracy, learningProblem);
457                    }               
458                    
459                    // print solution(s)
460                    logger.info("solutions:\n" + getSolutionString());
461                    
462    //              System.out.println(startNode.toTreeString(baseURI));
463                    
464                    isRunning = false;
465    //              System.out.println("isRunning: " + isRunning);
466            }
467    
468            private OENode getNextNodeToExpand() {
469                    // we expand the best node of those, which have not achieved 100% accuracy
470                    // already and have a horizontal expansion equal to their length
471                    // (rationale: further extension is likely to add irrelevant syntactical constructs)
472                    Iterator<OENode> it = nodes.descendingIterator();
473                    while(it.hasNext()) {
474                            OENode node = it.next();
475                            if(node.getAccuracy() < 1.0 || node.getHorizontalExpansion() < node.getDescription().getLength()) {
476                                    return node;
477                            }
478                    }
479                    
480                    // this should practically never be called, since for any reasonable learning
481                    // task, we will always have at least one node with less than 100% accuracy
482                    return nodes.last();
483            }
484            
485            // expand node horizontically
486            private TreeSet<Description> refineNode(OENode node) {
487                    // we have to remove and add the node since its heuristic evaluation changes through the expansion
488                    // (you *must not* include any criteria in the heuristic which are modified outside of this method,
489                    // otherwise you may see rarely occurring but critical false ordering in the nodes set)
490                    nodes.remove(node);
491    //              System.out.println("refining: " + node);
492                    int horizExp = node.getHorizontalExpansion();
493                    TreeSet<Description> refinements = (TreeSet<Description>) operator.refine(node.getDescription(), horizExp+1);
494                    node.incHorizontalExpansion();
495                    node.setRefinementCount(refinements.size());
496                    nodes.add(node);
497                    return refinements;
498            }
499            
500            // add node to search tree if it is not too weak
501            // returns true if node was added and false otherwise
502            private boolean addNode(Description description, OENode parentNode) {
503                    
504    //              System.out.println(description);
505                    
506                    // redundancy check (return if redundant)
507                    boolean nonRedundant = descriptions.add(description);
508                    if(!nonRedundant) {
509                            return false;
510                    }
511                    
512                    // check whether the description is allowed
513                    if(!isDescriptionAllowed(description, parentNode)) {
514                            return false;
515                    }
516                    
517    //              System.out.println("Test " + new Date());
518                    // quality of description (return if too weak)
519                    double accuracy = learningProblem.getAccuracyOrTooWeak(description, noise);
520                    // issue a warning if accuracy is not between 0 and 1 or -1 (too weak)
521                    if(accuracy > 1.0 || (accuracy < 0.0 && accuracy != -1)) {
522                            logger.warn("Invalid accuracy value " + accuracy + " for description " + description + ". This could be caused by a bug in the heuristic measure and should be reported to the DL-Learner bug tracker.");
523                            System.exit(0);
524                    }
525                    
526    //              System.out.println("Test2 " + new Date());
527                    expressionTests++;
528    //              System.out.println("acc: " + accuracy);
529    //              System.out.println(description + " " + accuracy);
530                    if(accuracy == -1) {
531                            return false;
532                    }
533                    
534                    OENode node = new OENode(parentNode, description, accuracy);
535                            
536                    // link to parent (unless start node)
537                    if(parentNode == null) {
538                            startNode = node;
539                    } else {
540                            parentNode.addChild(node);
541                    }
542            
543                    nodes.add(node);
544    //              System.out.println("Test3 " + new Date());
545                    
546                    // in some cases (e.g. mutation) fully evaluating even a single description is too expensive
547                    // due to the high number of examples -- so we just stick to the approximate accuracy
548                    if(singleSuggestionMode) {
549                            if(accuracy > bestAccuracy) {
550                                    bestAccuracy = accuracy;
551                                    bestDescription = description;
552                                    logger.info("more accurate (" + dfPercent.format(bestAccuracy) + ") class expression found: " + descriptionToString(bestDescription)); // + getTemporaryString(bestDescription)); 
553                            }
554                            return true;
555                    } 
556                    
557    //              System.out.println("description " + description + " accuracy " + accuracy);
558                    
559                    // maybe add to best descriptions (method keeps set size fixed);
560                    // we need to make sure that this does not get called more often than
561                    // necessary since rewriting is expensive
562                    boolean isCandidate = !bestEvaluatedDescriptions.isFull();
563                    if(!isCandidate) {
564                            EvaluatedDescription worst = bestEvaluatedDescriptions.getWorst();
565                            double accThreshold = worst.getAccuracy();
566                            isCandidate = 
567                                    (accuracy > accThreshold ||
568                                    (accuracy >= accThreshold && description.getLength() < worst.getDescriptionLength()));
569                    }
570                    
571    //              System.out.println(isCandidate);
572                    
573    //              System.out.println("Test4 " + new Date());
574                    if(isCandidate) {
575                            
576                            Description niceDescription = rewriteNode(node);
577                            ConceptTransformation.transformToOrderedForm(niceDescription, descriptionComparator);
578    //                      Description niceDescription = node.getDescription();
579                            
580                            // another test: none of the other suggested descriptions should be 
581                            // a subdescription of this one unless accuracy is different
582                            // => comment: on the one hand, this appears to be too strict, because once A is a solution then everything containing
583                            // A is not a candidate; on the other hand this suppresses many meaningless extensions of A
584                            boolean shorterDescriptionExists = false;
585                            if(forceMutualDifference) {
586                                    for(EvaluatedDescription ed : bestEvaluatedDescriptions.getSet()) {
587                                            if(Math.abs(ed.getAccuracy()-accuracy) <= 0.00001 && ConceptTransformation.isSubdescription(niceDescription, ed.getDescription())) {
588    //                                              System.out.println("shorter: " + ed.getDescription());
589                                                    shorterDescriptionExists = true;
590                                                    break;
591                                            }
592                                    }                               
593                            }
594                            
595    //                      System.out.println("shorter description? " + shorterDescriptionExists + " nice: " + niceDescription);
596                            
597                            if(!shorterDescriptionExists) {
598                                    if(!filterFollowsFromKB || !((ClassLearningProblem)learningProblem).followsFromKB(niceDescription)) {
599    //                                      System.out.println("Test2");
600                                            bestEvaluatedDescriptions.add(niceDescription, accuracy, learningProblem);
601    //                                      System.out.println("acc: " + accuracy);
602    //                                      System.out.println(bestEvaluatedDescriptions);
603                                    }
604                            }
605                                                    
606    //                      System.out.println(bestEvaluatedDescriptions.getSet().size());
607                    }
608                    
609    //              System.out.println("Test5 " + new Date());
610    //              System.out.println("best evaluated descriptions size: " + bestEvaluatedDescriptions.size() + " worst: " + bestEvaluatedDescriptions.getWorst());
611                    return true;
612            }       
613            
614            // checks whether the description is allowed
615            private boolean isDescriptionAllowed(Description description, OENode parentNode) {
616                    if(isClassLearningProblem) {
617                            if(isEquivalenceProblem) {
618                                    // the class to learn must not appear on the outermost property level
619                                    if(occursOnFirstLevel(description, classToDescribe)) {
620                                            return false;
621                                    }
622                            } else {
623                                    // none of the superclasses of the class to learn must appear on the
624                                    // outermost property level
625                                    TreeSet<Description> toTest = new TreeSet<Description>(descriptionComparator);
626                                    toTest.add(classToDescribe);
627                                    while(!toTest.isEmpty()) {
628                                            Description d = toTest.pollFirst();
629                                            if(occursOnFirstLevel(description, d)) {
630                                                    return false;
631                                            }
632                                            toTest.addAll(reasoner.getClassHierarchy().getSuperClasses(d));
633                                    }
634                            }                       
635                    }
636                    
637                    // perform forall sanity tests
638                    if(parentNode != null && ConceptTransformation.getForallOccurences(description) > ConceptTransformation.getForallOccurences(parentNode.getDescription())) {
639                            // we have an additional \forall construct, so we now fetch the contexts
640                            // in which it occurs
641                            SortedSet<PropertyContext> contexts = ConceptTransformation.getForallContexts(description);
642                            SortedSet<PropertyContext> parentContexts = ConceptTransformation.getForallContexts(parentNode.getDescription());
643                            contexts.removeAll(parentContexts);
644    //                      System.out.println("parent description: " + parentNode.getDescription());
645    //                      System.out.println("description: " + description);
646    //                      System.out.println("contexts: " + contexts);
647                            // we now have to perform sanity checks: if \forall is used, then there
648                            // should be at least on class instance which has a filler at the given context
649                            for(PropertyContext context : contexts) {
650                                    // transform [r,s] to \exists r.\exists s.\top
651                                    Description existentialContext = context.toExistentialContext();
652                                    boolean fillerFound = false;
653                                    for(Individual instance : examples) {
654                                            if(reasoner.hasType(existentialContext, instance)) {
655    //                                              System.out.println(instance + "  " + existentialContext);
656                                                    fillerFound = true;
657                                                    break;
658                                            }
659                                    }
660                                    // if we do not find a filler, this means that putting \forall at
661                                    // that position is not meaningful
662                                    if(!fillerFound) {
663                                            return false;
664                                    }
665                            }
666                    }               
667                    
668                    // we do not want to have negations of sibling classes on the outermost level
669                    // (they are expressed more naturally by saying that the siblings are disjoint,
670                    // so it is reasonable not to include them in solutions)
671    //              Set<Description> siblingClasses = reasoner.getClassHierarchy().getSiblingClasses(classToDescribe);
672    //              for now, we just disable negation
673                    
674                    return true;
675            }
676            
677            // determine whether a named class occurs on the outermost level, i.e. property depth 0
678            // (it can still be at higher depth, e.g. if intersections are nested in unions)
679            private boolean occursOnFirstLevel(Description description, Description clazz) {
680                    if(description instanceof NamedClass) {
681                            if(description.equals(clazz)) {
682                                    return true;
683                            }
684                    } 
685                    
686                    if(description instanceof Restriction) {
687                            return false;
688                    }
689                    
690                    for(Description child : description.getChildren()) {
691                            if(occursOnFirstLevel(child, clazz)) {
692                                    return true;
693                            }
694                    }
695                    
696                    return false;
697            }
698            
699            // check whether the node is a potential solution candidate
700            private Description rewriteNode(OENode node) {
701                    Description description = node.getDescription();
702                    // minimize description (expensive!) - also performes some human friendly rewrites
703                    Description niceDescription = minimizer.minimizeClone(description);
704                    // replace \exists r.\top with \exists r.range(r) which is easier to read for humans
705                    ConceptTransformation.replaceRange(niceDescription, reasoner);
706                    return niceDescription;
707            }
708            
709            private boolean terminationCriteriaSatisfied() {
710                    return 
711                    stop || 
712                    (configurator.getMaxClassDescriptionTests() != 0 && (expressionTests >= configurator.getMaxClassDescriptionTests())) ||
713                    (configurator.getMaxExecutionTimeInSeconds() != 0 && ((System.nanoTime() - nanoStartTime) >= (configurator.getMaxExecutionTimeInSeconds()*1000000000l))) ||
714                    (configurator.getTerminateOnNoiseReached() && (100*getCurrentlyBestAccuracy()>100-configurator.getNoisePercentage()));
715            }
716            
717            private void reset() {
718                    // set all values back to their default values (used for running
719                    // the algorithm more than once)
720                    nodes = new TreeSet<OENode>(heuristic);
721                    descriptions = new TreeSet<Description>(new ConceptComparator());
722                    bestEvaluatedDescriptions.getSet().clear();
723                    expressionTests = 0;
724            }
725            
726            @Override
727            public boolean isRunning() {
728                    return isRunning;
729            }       
730            
731            @Override
732            public void stop() {
733                    stop = true;
734            }
735    
736            public OENode getSearchTreeRoot() {
737                    return startNode;
738            }
739            
740            // central function for printing description
741            private String descriptionToString(Description description) {
742                    return description.toManchesterSyntaxString(baseURI, prefixes);
743            }
744            
745            @SuppressWarnings("unused")
746            private String bestDescriptionToString() {
747                    EvaluatedDescription best = bestEvaluatedDescriptions.getBest();
748                    return best.getDescription().toManchesterSyntaxString(baseURI, prefixes) + " (accuracy: " + dfPercent.format(best.getAccuracy()) + ")";
749            }       
750            
751            private String getSolutionString() {
752                    int current = 1;
753                    String str = "";
754                    for(EvaluatedDescription ed : bestEvaluatedDescriptions.getSet().descendingSet()) {
755                            // temporary code
756                            if(learningProblem instanceof PosNegLPStandard) {
757                                    str += current + ": " + descriptionToString(ed.getDescription()) + " (pred. acc.: " + dfPercent.format(((PosNegLPStandard)learningProblem).getPredAccuracyOrTooWeakExact(ed.getDescription(),1)) + ", F-measure: "+ dfPercent.format(((PosNegLPStandard)learningProblem).getFMeasureOrTooWeakExact(ed.getDescription(),1)) + ")\n";
758                            } else {
759                                    str += current + ": " + descriptionToString(ed.getDescription()) + " " + dfPercent.format(ed.getAccuracy()) + "\n";
760    //                              System.out.println(ed);
761                            }
762                            current++;
763                    }
764                    return str;
765            }
766    
767    //      private String getTemporaryString(Description description) {
768    //              return descriptionToString(description) + " (pred. acc.: " + dfPercent.format(((PosNegLPStandard)learningProblem).getPredAccuracyOrTooWeakExact(description,1)) + ", F-measure: "+ dfPercent.format(((PosNegLPStandard)learningProblem).getFMeasureOrTooWeakExact(description,1)) + ")";
769    //      }
770            
771            private void updateMinMaxHorizExp(OENode node) {
772                    int newHorizExp = node.getHorizontalExpansion();
773                    
774                    // update maximum value
775                    maxHorizExp = Math.max(maxHorizExp, newHorizExp);
776                    
777                    // we just expanded a node with minimum horizontal expansion;
778                    // we need to check whether it was the last one
779                    if(minHorizExp == newHorizExp - 1) {
780                            
781                            // the best accuracy that a node can achieve 
782                            double scoreThreshold = heuristic.getNodeScore(node) + 1 - node.getAccuracy();
783                            
784                            for(OENode n : nodes.descendingSet()) {
785                                    if(n != node) {
786                                            if(n.getHorizontalExpansion() == minHorizExp) {
787                                                    // we can stop instantly when another node with min. 
788                                                    return;
789                                            }
790                                            if(heuristic.getNodeScore(n) < scoreThreshold) {
791                                                    // we can stop traversing nodes when their score is too low
792                                                    break;
793                                            }
794                                    }
795                            }
796                            
797                            // inc. minimum since we found no other node which also has min. horiz. exp.
798                            minHorizExp++;
799                            
800    //                      System.out.println("minimum horizontal expansion is now " + minHorizExp);
801                    }
802            }
803            
804            public int getMaximumHorizontalExpansion() {
805                    return maxHorizExp;
806            }
807    
808            public int getMinimumHorizontalExpansion() {
809                    return minHorizExp;
810            }
811            
812            /**
813             * @return the expressionTests
814             */
815            public int getClassExpressionTests() {
816                    return expressionTests;
817            }       
818            
819    }