001    /**
002     * Copyright (C) 2007-2009, Jens Lehmann
003     *
004     * This file is part of DL-Learner.
005     * 
006     * DL-Learner is free software; you can redistribute it and/or modify
007     * it under the terms of the GNU General Public License as published by
008     * the Free Software Foundation; either version 3 of the License, or
009     * (at your option) any later version.
010     *
011     * DL-Learner is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014     * GNU General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018     *
019     */
020    package org.dllearner.algorithms.refinement2;
021    
022    import java.io.File;
023    import java.text.DecimalFormat;
024    import java.util.HashSet;
025    import java.util.Iterator;
026    import java.util.LinkedList;
027    import java.util.List;
028    import java.util.Map;
029    import java.util.NavigableSet;
030    import java.util.Set;
031    import java.util.SortedSet;
032    import java.util.TreeSet;
033    import java.util.concurrent.ConcurrentSkipListSet;
034    
035    import org.apache.log4j.Logger;
036    import org.dllearner.core.LearningProblem;
037    import org.dllearner.core.ReasonerComponent;
038    import org.dllearner.core.configurators.ROLComponent2Configurator;
039    import org.dllearner.core.owl.Description;
040    import org.dllearner.core.owl.Individual;
041    import org.dllearner.core.owl.Intersection;
042    import org.dllearner.core.owl.Thing;
043    import org.dllearner.core.owl.Union;
044    import org.dllearner.learningproblems.EvaluatedDescriptionPosNeg;
045    import org.dllearner.learningproblems.PosNegLP;
046    import org.dllearner.learningproblems.ScorePosNeg;
047    import org.dllearner.refinementoperators.RefinementOperator;
048    import org.dllearner.refinementoperators.RhoDRDown;
049    import org.dllearner.utilities.Files;
050    import org.dllearner.utilities.Helper;
051    import org.dllearner.utilities.JamonMonitorLogger;
052    import org.dllearner.utilities.owl.ConceptComparator;
053    import org.dllearner.utilities.owl.ConceptTransformation;
054    import org.dllearner.utilities.owl.EvaluatedDescriptionPosNegComparator;
055    
056    import com.jamonapi.Monitor;
057    
058    /**
059     * Implements the 2nd version of the refinement operator based learning approach.
060     * 
061     * @author Jens Lehmann
062     * 
063     */
064    public class ROLearner2 {
065    
066            private static Logger logger = Logger.getLogger(ROLearner2.class);
067            private ROLComponent2Configurator configurator;
068    
069            // basic setup: learning problem and reasoning service
070            private ReasonerComponent rs;
071            // often the learning problems needn't be accessed directly; instead
072            // use the example sets below and the posonly variable
073            private PosNegLP learningProblem;
074            private Description startDescription;
075            private int nrOfExamples;
076            private int nrOfPositiveExamples;
077            private Set<Individual> positiveExamples;
078            private int nrOfNegativeExamples;
079            private Set<Individual> negativeExamples;
080    
081            // noise regulates how many positives can be misclassified and when the
082            // algorithm terminates
083            private double noise = 0.0;
084            private int allowedMisclassifications = 0;
085    
086            // positive only learning options:
087            // if no negatives are given, then one possible strategy is to find a very
088            // special concept still entailing all positive examples;
089            // this is realised by changing the termination criterion: a concept is a
090            // solution if it has been expanded x times (x is
091            // configurable) but no more special concept is found (all are either
092            // equivalent or too weak)
093            private int maxPosOnlyExpansion;
094    
095            // search tree options
096            private boolean writeSearchTree;
097            private File searchTreeFile;
098            private boolean replaceSearchTree = false;
099    
100            // constructs to improve performance
101            private boolean useTooWeakList = true;
102            private boolean useOverlyGeneralList = true;
103            private boolean useShortConceptConstruction = true;
104    
105            // extended Options
106            private long maxExecutionTimeInSeconds;
107            private boolean maxExecutionTimeAlreadyReached = false;
108            private long minExecutionTimeInSeconds = 0;
109            private boolean minExecutionTimeAlreadyReached = false;
110            private int guaranteeXgoodDescriptions = 1;
111            private boolean guaranteeXgoodAlreadyReached = false;
112            private int maxClassDescriptionTests;
113    
114            // if set to false we do not test properness; this may seem wrong
115            // but the disadvantage of properness testing are additional reasoner
116            // queries and a search bias towards ALL r.something because
117            // ALL r.TOP is improper and automatically expanded further
118            private boolean usePropernessChecks = false;
119    
120            // tree traversal means to run through the most promising concepts
121            // and connect them in an intersection to find a solution
122            // (this is called irregularly e.g. every 100 seconds)
123            private boolean useTreeTraversal = false;
124    
125            // if this variable is set to true, then the refinement operator
126            // is applied until all concept of equal length have been found
127            // e.g. TOP -> A1 -> A2 -> A3 is found in one loop; the disadvantage
128            // are potentially more method calls, but the advantage is that 
129            // the algorithm is better in locating relevant concept in the 
130            // subsumption hierarchy (otherwise, if the most general concept
131            // is not promising, it may never get expanded)
132            private boolean forceRefinementLengthIncrease;
133            
134            // candidate reduction: using this mechanism we can simulate
135            // the divide&conquer approach in many ILP programs using a
136            // clause by clause search; after a period of time the candidate
137            // set is reduced to focus CPU time on the most promising concepts
138            private boolean useCandidateReduction = true;
139            private int candidatePostReductionSize = 30;
140    
141            // setting to true gracefully stops the algorithm
142            private boolean stop = false;
143            private boolean isRunning = false;
144    
145            // node from which algorithm has started
146            private ExampleBasedNode startNode;
147    
148            // solution protocol
149            private List<ExampleBasedNode> solutions = new LinkedList<ExampleBasedNode>();
150    
151            // used refinement operator and heuristic (exchangeable)
152            private RhoDRDown operator;
153            // private RefinementOperator operator;
154            // private ExampleBasedHeuristic heuristic;
155    
156            // specifies whether to compute and log benchmark information
157            private boolean computeBenchmarkInformation = false;
158    
159            // comparator used to maintain a stable ordering of nodes, i.e.
160            // an ordering which does not change during the run of the algorithm
161            private NodeComparatorStable nodeComparatorStable = new NodeComparatorStable();
162            // stable candidate set; it has no functional part in the algorithm,
163            // but is a list of the currently best concepts found;
164            // it is very important to use a concurrent set here as other threads will
165            // access it (usual iterating is likely to throw a ConcurrentModificationException)
166            private NavigableSet<ExampleBasedNode> candidatesStable = new ConcurrentSkipListSet<ExampleBasedNode>(
167                            nodeComparatorStable);
168            // evaluated descriptions
169    //      private EvaluatedDescriptionSet evaluatedDescriptions = new EvaluatedDescriptionSet(LearningAlgorithm.MAX_NR_OF_RESULTS);
170    
171            // comparator used to create ordered sets of concepts
172            private ConceptComparator conceptComparator = new ConceptComparator();
173            // comparator for evaluated descriptions
174            private EvaluatedDescriptionPosNegComparator edComparator = new EvaluatedDescriptionPosNegComparator();
175    
176            // utility variables
177            private DecimalFormat df = new DecimalFormat();
178    
179            // candidates for refinement (used for directly accessing
180            // nodes in the search tree)
181            private TreeSet<ExampleBasedNode> candidates;
182    
183            // new nodes found during a run of the algorithm
184            private List<ExampleBasedNode> newCandidates = new LinkedList<ExampleBasedNode>();
185    
186            // all concepts which have been evaluated as being proper refinements
187            private SortedSet<Description> properRefinements = new TreeSet<Description>(conceptComparator);
188    
189            // blacklists
190            private SortedSet<Description> tooWeakList = new TreeSet<Description>(conceptComparator);
191            private SortedSet<Description> overlyGeneralList = new TreeSet<Description>(conceptComparator);
192    
193            // set of expanded nodes (TODO: better explanation)
194            TreeSet<ExampleBasedNode> expandedNodes = new TreeSet<ExampleBasedNode>(nodeComparatorStable);
195    
196            // statistic variables
197            private int maxRecDepth = 0;
198            private int maxNrOfRefinements = 0;
199            private int maxNrOfChildren = 0;
200            private int redundantConcepts = 0;
201            private int propernessTestsReasoner = 0;
202            private int propernessTestsAvoidedByShortConceptConstruction = 0;
203            private int propernessTestsAvoidedByTooWeakList = 0;
204            private int conceptTestsTooWeakList = 0;
205            private int conceptTestsOverlyGeneralList = 0;
206            private int conceptTestsReasoner = 0;
207    
208            // time variables
209            private long runtime;
210            private long algorithmStartTime;
211            private long propernessCalcTimeNs = 0;
212            private long propernessCalcReasoningTimeNs = 0;
213            private long childConceptsDeletionTimeNs = 0;
214            private long refinementCalcTimeNs = 0;
215            private long redundancyCheckTimeNs = 0;
216            private long evaluateSetCreationTimeNs = 0;
217            private long improperConceptsRemovalTimeNs = 0;
218    
219            // prefixes
220            private String baseURI;
221            private Map<String, String> prefixes;
222    
223            public ROLearner2(
224                            ROLComponent2Configurator configurator,
225                            LearningProblem learningProblem,
226                            ReasonerComponent rs,
227                            RefinementOperator operator,
228                            ExampleBasedHeuristic heuristic,
229                            Description startDescription,
230                            // Set<AtomicConcept> allowedConcepts,
231                            // Set<AtomicRole> allowedRoles,
232                            double noise, boolean writeSearchTree, boolean replaceSearchTree, File searchTreeFile,
233                            boolean useTooWeakList, boolean useOverlyGeneralList,
234                            boolean useShortConceptConstruction, boolean usePropernessChecks,
235                            int maxPosOnlyExpansion, int maxExecutionTimeInSeconds, int minExecutionTimeInSeconds,
236                            int guaranteeXgoodDescriptions, int maxClassDescriptionTests, boolean forceRefinementLengthIncrease) {
237    
238                    
239                            PosNegLP lp = (PosNegLP) learningProblem;
240                            this.learningProblem = lp;
241                            positiveExamples = lp.getPositiveExamples();
242                            negativeExamples = lp.getNegativeExamples();
243                            nrOfPositiveExamples = positiveExamples.size();
244                            nrOfNegativeExamples = negativeExamples.size();
245    
246                            // System.out.println(nrOfPositiveExamples);
247                            // System.out.println(nrOfNegativeExamples);
248                            // System.exit(0);
249    
250                    this.configurator = configurator;
251                    nrOfExamples = nrOfPositiveExamples + nrOfNegativeExamples;
252                    this.rs = rs;
253                    this.operator = (RhoDRDown) operator;
254                    this.startDescription = startDescription;
255                    // initialise candidate set with heuristic as ordering
256                    candidates = new TreeSet<ExampleBasedNode>(heuristic);
257                    this.noise = noise;
258                    this.writeSearchTree = writeSearchTree;
259                    this.replaceSearchTree = replaceSearchTree;
260                    this.searchTreeFile = searchTreeFile;
261                    this.useTooWeakList = useTooWeakList;
262                    this.useOverlyGeneralList = useOverlyGeneralList;
263                    this.useShortConceptConstruction = useShortConceptConstruction;
264                    this.usePropernessChecks = usePropernessChecks;
265                    baseURI = rs.getBaseURI();
266                    prefixes = rs.getPrefixes();
267                    this.maxPosOnlyExpansion = maxPosOnlyExpansion;
268                    this.maxExecutionTimeInSeconds = maxExecutionTimeInSeconds;
269                    this.minExecutionTimeInSeconds = minExecutionTimeInSeconds;
270                    this.guaranteeXgoodDescriptions = guaranteeXgoodDescriptions;
271                    this.maxClassDescriptionTests = maxClassDescriptionTests;
272                    this.forceRefinementLengthIncrease = forceRefinementLengthIncrease;
273    
274                    // logger.setLevel(Level.DEBUG);
275            }
276    
277            public void start() {
278                    stop = false;
279                    isRunning = true;
280                    runtime = System.currentTimeMillis();
281                    
282                    // reset values (algorithms may be started several times)
283                    candidates.clear();
284                    candidatesStable.clear();
285                    newCandidates.clear();
286                    solutions.clear();
287                    maxExecutionTimeAlreadyReached = false;
288                    minExecutionTimeAlreadyReached = false;
289                    guaranteeXgoodAlreadyReached = false;           
290                    propernessTestsReasoner = 0;
291                    propernessTestsAvoidedByShortConceptConstruction = 0;
292                    propernessTestsAvoidedByTooWeakList = 0;
293                    conceptTestsTooWeakList = 0;
294                    conceptTestsOverlyGeneralList = 0;
295                    propernessCalcTimeNs = 0;
296                    propernessCalcReasoningTimeNs = 0;
297                    childConceptsDeletionTimeNs = 0;
298                    refinementCalcTimeNs = 0;
299                    redundancyCheckTimeNs = 0;
300                    evaluateSetCreationTimeNs = 0;
301                    improperConceptsRemovalTimeNs = 0;
302                    
303                    Monitor totalLearningTime = JamonMonitorLogger.getTimeMonitor(ROLComponent2.class, "totalLearningTime")
304                                    .start();
305                    // TODO: write a JUnit test for this problem (long-lasting or infinite
306                    // loops because
307                    // redundant children of a node are called recursively when a node is
308                    // extended
309                    // twice)
310                    /*
311                     * // String conceptStr =
312                     * "(\"http://dl-learner.org/carcinogenesis#Compound\" AND (>= 2
313                     * \"http://dl-learner.org/carcinogenesis#hasStructure\".\"http://dl-learner.org/carcinogenesis#Ar_halide\"
314                     * OR ((\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS
315                     * TRUE) AND >= 5 \"http://dl-learner.org/carcinogenesis#hasBond\".
316                     * TOP)))"; // String conceptStr =
317                     * "(\"http://dl-learner.org/carcinogenesis#Compound\" AND
318                     * ((\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS TRUE)
319                     * AND (\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS
320                     * TRUE)))"; String conceptStr =
321                     * "(\"http://dl-learner.org/carcinogenesis#Compound\" AND (>= 3
322                     * \"http://dl-learner.org/carcinogenesis#hasStructure\".\"http://dl-learner.org/carcinogenesis#Halide\"
323                     * OR ((\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS
324                     * TRUE) AND ALL
325                     * \"http://dl-learner.org/carcinogenesis#hasAtom\".TOP)))"; String
326                     * conceptStr2 = "(\"http://dl-learner.org/carcinogenesis#Compound\" AND
327                     * (>= 4
328                     * \"http://dl-learner.org/carcinogenesis#hasStructure\".\"http://dl-learner.org/carcinogenesis#Halide\"
329                     * OR ((\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS
330                     * TRUE) AND ALL
331                     * \"http://dl-learner.org/carcinogenesis#hasAtom\".TOP)))"; try {
332                     * NamedClass struc = new
333                     * NamedClass("http://dl-learner.org/carcinogenesis#Compound");
334                     * Description d = KBParser.parseConcept(conceptStr); Description d2 =
335                     * KBParser.parseConcept(conceptStr2); // SortedSet<Description> ds =
336                     * (SortedSet<Description>) operator.refine(d,15,null,struc); //
337                     * System.out.println(ds);
338                     *  // System.out.println(RhoDRDown.checkIntersection((Intersection)d));
339                     * 
340                     * 
341                     * Set<Individual> coveredNegatives = rs.instanceCheck(d,
342                     * learningProblem.getNegativeExamples()); Set<Individual>
343                     * coveredPositives = rs.instanceCheck(d,
344                     * learningProblem.getPositiveExamples()); ExampleBasedNode ebn = new
345                     * ExampleBasedNode(d); ebn.setCoveredExamples(coveredPositives,
346                     * coveredNegatives);
347                     * 
348                     * properRefinements.add(d2); extendNodeProper(ebn,13);
349                     * extendNodeProper(ebn,14); for(Description refinement:
350                     * ebn.getChildConcepts()) System.out.println("refinement: " +
351                     * refinement);
352                     *  // Individual i = new
353                     * Individual("http://dl-learner.org/carcinogenesis#d101"); //
354                     * for(Individual i : learningProblem.getPositiveExamples()) //
355                     * rs.instanceCheck(ds.last(), i);
356                     * 
357                     * System.out.println("finished"); } catch (ParseException e) { // TODO
358                     * Auto-generated catch block e.printStackTrace(); } System.exit(0);
359                     */
360    
361                    // calculate quality threshold required for a solution
362                    allowedMisclassifications = (int) Math.round(noise * nrOfExamples);
363    
364                    // start search with start class
365                    if (startDescription == null) {
366                            startNode = new ExampleBasedNode(configurator, Thing.instance);
367                            startNode.setCoveredExamples(positiveExamples, negativeExamples);
368                    } else {
369                            startNode = new ExampleBasedNode(configurator, startDescription);
370                            Set<Individual> coveredNegatives = rs.hasType(startDescription, negativeExamples);
371                            Set<Individual> coveredPositives = rs.hasType(startDescription, positiveExamples);
372                            startNode.setCoveredExamples(coveredPositives, coveredNegatives);
373                    }
374    
375                    candidates.add(startNode);
376                    candidatesStable.add(startNode);
377    
378                    ExampleBasedNode bestNode = startNode;
379                    ExampleBasedNode bestNodeStable = startNode;
380    
381                    logger.info("starting top down refinement with: " + startNode.getConcept().toManchesterSyntaxString(baseURI, prefixes) + " (" + df.format(100*startNode.getAccuracy(nrOfPositiveExamples, nrOfNegativeExamples)) + "% accuracy)");
382                    
383                    int loop = 0;
384    
385                    algorithmStartTime = System.nanoTime();
386                    long lastPrintTime = 0;
387                    long lastTreeTraversalTime = System.nanoTime();
388                    long lastReductionTime = System.nanoTime();
389                    // try a traversal after x seconds
390                    long traversalInterval = 300l * 1000000000l;
391                    long reductionInterval = 300l * 1000000000l;
392                    long currentTime;
393                    
394                    while (!isTerminationCriteriaReached()) {
395                    //while ((!solutionFound || !configurator.getTerminateOnNoiseReached() ) && !stop) {
396    
397                            // print statistics at most once a second
398                            currentTime = System.nanoTime();
399                            if (currentTime - lastPrintTime > 3000000000l) {
400                                    printStatistics(false);
401                                    lastPrintTime = currentTime;
402                                    logger.debug("--- loop " + loop + " started ---");
403    //                              logger.info("used memory: " + (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/1024);
404    //                              logger.info("test: " + ExampleBasedNode.exampleMemoryCounter/1024);
405                            }
406    
407                            // traverse the current search tree to find a solution
408                            if (useTreeTraversal && (currentTime - lastTreeTraversalTime > traversalInterval)) {
409                                    traverseTree();
410                                    lastTreeTraversalTime = System.nanoTime();
411                            }
412    
413                            // reduce candidates to focus on promising concepts
414                            if (useCandidateReduction && (currentTime - lastReductionTime > reductionInterval)) {
415                                    reduceCandidates();
416                                    lastReductionTime = System.nanoTime();
417                                    // Logger.getRootLogger().setLevel(Level.TRACE);
418                            }
419    
420                            // System.out.println("next expanded: " +
421                            // candidates.last().getShortDescription(nrOfPositiveExamples,
422                            // nrOfNegativeExamples, baseURI));
423    
424                            // we record when a more accurate node is found and log it
425                            if (bestNodeStable.getCovPosMinusCovNeg() < candidatesStable.last()
426                                            .getCovPosMinusCovNeg()) {
427                                    String acc = new DecimalFormat( ".00%" ).format((candidatesStable.last().getAccuracy(nrOfPositiveExamples, nrOfNegativeExamples)));
428                                    // no handling needed, it will just look ugly in the output
429                                    logger.info("more accurate ("+acc+") class expression found: " + candidatesStable.last().getConcept().toManchesterSyntaxString(baseURI, prefixes));
430                                    bestNodeStable = candidatesStable.last();
431                            }
432    
433                            // chose best node according to heuristics
434                            bestNode = candidates.last();
435                            // extend best node
436                            newCandidates.clear();
437                            // best node is removed temporarily, because extending it can
438                            // change its evaluation
439                            candidates.remove(bestNode);
440                            extendNodeProper(bestNode, bestNode.getHorizontalExpansion() + 1);
441                            candidates.add(bestNode);
442                            // newCandidates has been filled during node expansion
443                            candidates.addAll(newCandidates);
444                            candidatesStable.addAll(newCandidates);
445    
446                            // System.out.println("done");
447    
448                            if (writeSearchTree) {
449                                    // String treeString = "";
450                                    String treeString = "best node: " + bestNode + "\n";
451                                    if (expandedNodes.size() > 1) {
452                                            treeString += "all expanded nodes:\n";
453                                            for (ExampleBasedNode n : expandedNodes) {
454                                                    treeString += "   " + n + "\n";
455                                            }
456                                    }
457                                    expandedNodes.clear();
458                                    treeString += startNode.getTreeString(nrOfPositiveExamples, nrOfNegativeExamples,
459                                                    baseURI);
460                                    treeString += "\n";
461    
462                                    if (replaceSearchTree)
463                                            Files.createFile(searchTreeFile, treeString);
464                                    else
465                                            Files.appendFile(searchTreeFile, treeString);
466                            }
467    
468                            // special situation for positive only learning: the expanded node
469                            // can become a solution (see explanations
470                            // for maxPosOnlyExpansion above)
471                            
472                            // DEPRECATED CODE
473                            if (false
474                                            && bestNode.isPosOnlyCandidate()
475                                            && (bestNode.getHorizontalExpansion() - bestNode.getConcept().getLength() >= maxPosOnlyExpansion)) {
476    
477                                    boolean solution = checkSubtreePosOnly(bestNode);
478    
479                                    if (solution) {
480                                            solutions.add(bestNode);
481                                            ExampleBasedNode bestChild = bestNode.getChildren().last();
482                                            System.out.println("solution: " + bestNode.getConcept());
483                                            System.out.println("maxPosOnlyExpansion: " + maxPosOnlyExpansion);
484                                            System.out.println("best child of this node: " + bestChild);
485                                            if(bestNode.getChildConcepts().size()<100) {
486                                                    System.out.println(bestNode.getChildConcepts());
487                                            }
488                                            System.out.println("TODO: needs to be integrated with other stopping criteria");
489                                            System.out
490                                                            .println("You tried to use this algorithm for positive only learning, which is not recommended (yet).");
491                                            // System.out.println("Exiting.");
492                                            // System.exit(0);
493                                    } else {
494                                            // tag as non-candidate so we do not need to search again
495                                            bestNode.setPosOnlyCandidate(false);
496                                    }
497    
498                            }
499    
500                            // Anzahl Schleifendurchläufe
501                            loop++;
502                    }// end while
503    
504                    if (solutions.size()>0) {
505                    //if (solutionFound) {
506                            int solutionLimit = 20;
507                            // we do not need to print the best node if we display the top 20 solutions below anyway
508    //                      logger.info("best node "
509    //                                      + candidatesStable.last().getShortDescription(nrOfPositiveExamples,
510    //                                                      nrOfNegativeExamples, baseURI));
511                            logger.info("solutions (at most " + solutionLimit + " are shown):");
512                            int show = 1;
513                            String manchester = "MANCHESTER:\n";
514                            String kbSyntax = "KBSyntax:\n";
515                            for (ExampleBasedNode c : solutions) {
516                                    logger.info(show + ": " + c.getConcept().toManchesterSyntaxString(baseURI, prefixes) + " (accuracy " + df.format(100*c.getAccuracy(nrOfPositiveExamples, nrOfNegativeExamples)) + "%, length " + c.getConcept().getLength()
517                                                    + ", depth " + c.getConcept().getDepth() + ")");
518    //                              manchester += show + ": " + c.toManchesterSyntaxString(baseURI, prefixes) + "\n";
519                                    kbSyntax += show + ": " + c.getConcept().toKBSyntaxString() + "\n";
520                                    if (show >= solutionLimit) {
521                                            break;
522                                    }
523                                    show++;
524                            }
525                            logger.debug(manchester);
526                            logger.debug(kbSyntax);
527                    }
528    
529                    logger.debug("size of candidate set: " + candidates.size());
530                    boolean showOrderedSolutions = false;
531                    printBestSolutions(20, showOrderedSolutions);
532    
533                    printStatistics(true);
534    
535                    int conceptTests = conceptTestsReasoner + conceptTestsTooWeakList + conceptTestsOverlyGeneralList;
536                    if (stop) {
537                            logger.info("Algorithm stopped ("+conceptTests+" descriptions tested).\n");
538                    } else {
539                            logger.info("Algorithm terminated succesfully ("+conceptTests+" descriptions tested).\n");
540                    }               
541    
542                    totalLearningTime.stop();
543                    isRunning = false;
544            }
545    
546            // we apply the operator recursively until all proper refinements up
547            // to the maxmimum length are reached
548            private void extendNodeProper(ExampleBasedNode node, int maxLength) {
549                    long propCalcNsStart = System.nanoTime();
550    
551                    if (writeSearchTree)
552                            expandedNodes.add(node);
553    
554                    if (node.getChildren().size() > maxNrOfChildren)
555                            maxNrOfChildren = node.getChildren().size();
556    
557                    extendNodeProper(node, node.getConcept(), maxLength, 0);
558                    node.setHorizontalExpansion(maxLength);
559    
560                    propernessCalcTimeNs += (System.nanoTime() - propCalcNsStart);
561            }
562    
563            // for all refinements of concept up to max length, we check whether they
564            // are properr
565            // and call the method recursively if not
566            // recDepth is used only for statistics
567            private void extendNodeProper(ExampleBasedNode node, Description concept, int maxLength,
568                            int recDepth) {
569    
570    //              System.out.println("node: " + node);
571    //              System.out.println("concept: " + concept);
572                    
573                    // do not execute methods if algorithm has been stopped (this means that
574                    // the algorithm
575                    // will terminate without further reasoning queries)
576                    if (stop)
577                            return;
578    
579                    if (recDepth > maxRecDepth)
580                            maxRecDepth = recDepth;
581    
582                    // compute refinements => we must not delete refinements with low
583                    // horizontal expansion,
584                    // because they are used in recursive calls of this method later on
585                    long refinementCalcTimeNsStart = System.nanoTime();
586                    Set<Description> refinements = operator.refine(concept, maxLength, null);
587                    refinementCalcTimeNs += System.nanoTime() - refinementCalcTimeNsStart;
588    
589                    if (refinements.size() > maxNrOfRefinements)
590                            maxNrOfRefinements = refinements.size();
591    
592                    long childConceptsDeletionTimeNsStart = System.nanoTime();
593                    // entferne aus den refinements alle Konzepte, die bereits Kinder des
594                    // Knotens sind
595                    // for(Node n : node.getChildren()) {
596                    // refinements.remove(n.getConcept());
597                    // }
598    
599                    // das ist viel schneller, allerdings bekommt man ein anderes candidate
600                    // set(??)
601                    refinements.removeAll(node.getChildConcepts());
602    
603                    childConceptsDeletionTimeNs += System.nanoTime() - childConceptsDeletionTimeNsStart;
604    
605                    // if(refinements.size()<30) {
606                    // // System.out.println("refinements: " + refinements);
607                    // for(Description refinement: refinements)
608                    // System.out.println("refinement: " + refinement);
609                    // }
610    
611                    long evaluateSetCreationTimeNsStart = System.nanoTime();
612    
613                    // alle Konzepte, die länger als horizontal expansion sind, müssen
614                    // ausgewertet
615                    // werden
616                    TreeSet<Description> toEvaluateConcepts = new TreeSet<Description>(conceptComparator);
617                    Iterator<Description> it = refinements.iterator();
618                    // for(Concept refinement : refinements) {
619                    while (it.hasNext()) {
620    
621                            Description refinement = it.next();
622                            if (refinement.getLength() > node.getHorizontalExpansion()) {
623                                    // sagt aus, ob festgestellt wurde, ob refinement proper ist
624                                    // (sagt nicht aus, dass das refinement proper ist!)
625                                    boolean propernessDetected = false;
626    
627                                    // 1. short concept construction
628                                    if (useShortConceptConstruction) {
629                                            // kurzes Konzept konstruieren
630                                            Description shortConcept = ConceptTransformation.getShortConcept(refinement,
631                                                            conceptComparator);
632                                            int n = conceptComparator.compare(shortConcept, concept);
633    
634                                            // Konzepte sind gleich also Refinement improper
635                                            if (n == 0) {
636                                                    propernessTestsAvoidedByShortConceptConstruction++;
637                                                    propernessDetected = true;
638    
639    //                                               System.out.println("refinement " + refinement + 
640    //                                                               " can be shortened");
641    //                                               System.exit(0);
642                                            }
643                                    }
644    
645                                    // 2. too weak test
646                                    if (!propernessDetected && useTooWeakList) {
647                                            if (refinement instanceof Intersection) {
648                                                    boolean tooWeakElement = containsTooWeakElement((Intersection) refinement);
649                                                    if (tooWeakElement) {
650                                                            propernessTestsAvoidedByTooWeakList++;
651                                                            conceptTestsTooWeakList++;
652                                                            propernessDetected = true;
653                                                            // tooWeakList.add(refinement);
654    
655                                                            // Knoten wird direkt erzeugt (es ist buganfällig
656                                                            // zwei Plätze
657                                                            // zu haben, an denen Knoten erzeugt werden, aber es
658                                                            // erscheint
659                                                            // hier am sinnvollsten)
660                                                            properRefinements.add(refinement);
661                                                            tooWeakList.add(refinement);
662    
663                                                            ExampleBasedNode newNode = new ExampleBasedNode(configurator, refinement);
664                                                            newNode.setHorizontalExpansion(refinement.getLength() - 1);
665                                                            newNode.setTooWeak(true);
666                                                            newNode
667                                                                            .setQualityEvaluationMethod(ExampleBasedNode.QualityEvaluationMethod.TOO_WEAK_LIST);
668                                                            node.addChild(newNode);
669    
670                                                            // Refinement muss gelöscht werden, da es proper ist
671                                                            it.remove();
672                                                    }
673                                            }
674                                    }
675    
676                                    // properness konnte nicht vorher ermittelt werden
677                                    if (!propernessDetected) {
678                                            toEvaluateConcepts.add(refinement);
679                                            // if(!res) {
680                                            // System.out.println("already in: " + refinement);
681                                            // Comparator comp = toEvaluateConcepts.comparator();
682                                            // for(Description d : toEvaluateConcepts) {
683                                            // if(comp.compare(d,refinement)==0)
684                                            // System.out.println("see: " + d);
685                                            // }
686                                            // }
687                                    }
688    
689                            }
690    
691                            // System.out.println("handled " + refinement + " length: " +
692                            // refinement.getLength() + " (new size: " +
693                            // toEvaluateConcepts.size() + ")");
694    
695                    }
696                    evaluateSetCreationTimeNs += System.nanoTime() - evaluateSetCreationTimeNsStart;
697    
698                    // System.out.println("intermediate 1 " +
699                    // node.getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples,
700                    // baseURI));
701    
702                    // System.out.println(toEvaluateConcepts.size());
703    
704                    Set<Description> improperConcepts = null;
705                    if (toEvaluateConcepts.size() > 0) {
706                            // Test aller Konzepte auf properness (mit DIG in nur einer Anfrage)
707                            if (usePropernessChecks) {
708                                    long propCalcReasoningStart = System.nanoTime();
709                                    improperConcepts = rs.isSuperClassOf(toEvaluateConcepts, concept);
710                                    propernessTestsReasoner += toEvaluateConcepts.size();
711                                    // boolean isProper =
712                                    // !learningProblem.getReasonerComponent().subsumes(refinement,
713                                    // concept);
714                                    propernessCalcReasoningTimeNs += System.nanoTime() - propCalcReasoningStart;
715                            }
716                    }
717    
718                    // if(toEvaluateConcepts.size()<10)
719                    // System.out.println("to evaluate: " + toEvaluateConcepts);
720                    // else
721                    // System.out.println("to evaluate: more than 10");
722    
723                    long improperConceptsRemovalTimeNsStart = System.nanoTime();
724                    // die improper Konzepte werden von den auszuwertenden gelöscht, d.h.
725                    // alle proper concepts bleiben übrig (einfache Umbenennung)
726                    if (improperConcepts != null)
727                            toEvaluateConcepts.removeAll(improperConcepts);
728                    Set<Description> properConcepts = toEvaluateConcepts;
729                    // alle proper concepts von refinements löschen
730                    refinements.removeAll(properConcepts);
731                    improperConceptsRemovalTimeNs += System.nanoTime() - improperConceptsRemovalTimeNsStart;
732    
733                    // if(refinements.size()<10)
734                    // System.out.println("refinements: " + refinements);
735                    // else
736                    // System.out.println("refinements: more than 10");
737                    //              
738                    // System.out.println("improper concepts: " + improperConcepts);
739    
740                    for (Description refinement : properConcepts) {
741                            long redundancyCheckTimeNsStart = System.nanoTime();
742                            boolean nonRedundant = properRefinements.add(refinement);
743                            redundancyCheckTimeNs += System.nanoTime() - redundancyCheckTimeNsStart;
744    
745                            if (!nonRedundant)
746                                    redundantConcepts++;
747    
748                            // es wird nur ein neuer Knoten erzeugt, falls das Konzept nicht
749                            // schon existiert
750                            if (nonRedundant) {
751    
752                                    // newly created node
753                                    ExampleBasedNode newNode = new ExampleBasedNode(configurator, refinement);
754                                    // die -1 ist wichtig, da sonst keine gleich langen Refinements
755                                    // für den neuen Knoten erlaubt wären z.B. person => male
756                                    newNode.setHorizontalExpansion(refinement.getLength() - 1);
757    
758                                    boolean qualityKnown = false;
759                                    int quality = -2;
760    
761                                    // overly general list verwenden
762                                    if (useOverlyGeneralList && refinement instanceof Union) {
763                                            if (containsOverlyGeneralElement((Union) refinement)) {
764                                                    conceptTestsOverlyGeneralList++;
765                                                    // quality = getNumberOfNegatives();
766                                                    quality = nrOfNegativeExamples;
767                                                    qualityKnown = true;
768                                                    newNode
769                                                                    .setQualityEvaluationMethod(ExampleBasedNode.QualityEvaluationMethod.OVERLY_GENERAL_LIST);
770                                                    newNode.setCoveredExamples(positiveExamples, negativeExamples);
771                                            }
772    
773                                    }
774    
775                                    // Qualität des Knotens auswerten
776                                    if (!qualityKnown) {
777                                            long propCalcReasoningStart2 = System.nanoTime();
778                                            conceptTestsReasoner++;
779    
780                                            // quality = coveredNegativesOrTooWeak(refinement);
781    
782                                            // determine individuals which have not been covered yet
783                                            // (more efficient than full retrieval)
784                                            Set<Individual> coveredPositives = node.getCoveredPositives();
785                                            Set<Individual> newlyCoveredPositives = new HashSet<Individual>();
786    
787                                            // calculate how many pos. examples are not covered by the
788                                            // parent node of the refinement
789                                            int misclassifiedPositives = nrOfPositiveExamples - coveredPositives.size();
790    
791                                            // iterate through all covered examples (examples which are
792                                            // not
793                                            // covered do not need to be tested, because they remain
794                                            // uncovered);
795                                            // DIG will be slow if we send each reasoner request
796                                            // individually
797                                            // (however if we send everything in one request, too many
798                                            // instance checks
799                                            // are performed => rely on fast instance checker)
800                                            for (Individual i : coveredPositives) {
801                                                    // TODO: move code to a separate function
802                                                    if (quality != -1) {
803                                                            boolean covered = rs.hasType(refinement, i);
804                                                            if (!covered)
805                                                                    misclassifiedPositives++;
806                                                            else
807                                                                    newlyCoveredPositives.add(i);
808    
809                                                            if (misclassifiedPositives > allowedMisclassifications)
810                                                                    quality = -1;
811    
812                                                    }
813                                            }
814    
815                                            Set<Individual> newlyCoveredNegatives = null;
816                                            if (quality != -1) {
817                                                    Set<Individual> coveredNegatives = node.getCoveredNegatives();
818                                                    newlyCoveredNegatives = new HashSet<Individual>();
819    
820                                                    for (Individual i : coveredNegatives) {
821                                                            boolean covered = rs.hasType(refinement, i);
822                                                            if (covered)
823                                                                    newlyCoveredNegatives.add(i);
824                                                    }
825                                            }
826    
827                                            propernessCalcReasoningTimeNs += System.nanoTime() - propCalcReasoningStart2;
828                                            newNode
829                                                            .setQualityEvaluationMethod(ExampleBasedNode.QualityEvaluationMethod.REASONER);
830                                            if (quality != -1) {
831                                                    // quality is the number of misclassifications (if it is
832                                                    // not too weak)
833                                                    quality = (nrOfPositiveExamples - newlyCoveredPositives.size())
834                                                                    + newlyCoveredNegatives.size();
835                                                    newNode.setCoveredExamples(newlyCoveredPositives, newlyCoveredNegatives);
836                                            }
837    
838                                    }
839    
840                                    if (quality == -1) {
841                                            newNode.setTooWeak(true);
842                                            // Blacklist für too weak concepts
843                                            tooWeakList.add(refinement);
844                                    } else {
845                                            // Lösung gefunden
846                                            if (quality >= 0 && quality <= allowedMisclassifications) {
847                                                    solutions.add(newNode);
848                                            }
849    
850                                            newCandidates.add(newNode);
851    
852                                            // we need to make sure that all positives are covered
853                                            // before adding something to the overly general list
854                                            if ((newNode.getCoveredPositives().size() == nrOfPositiveExamples)
855                                                            && quality == nrOfNegativeExamples)
856                                                    overlyGeneralList.add(refinement);
857    
858                                    }
859    
860                                    // System.out.println(newNode.getConcept() + " " + quality);
861                                    node.addChild(newNode);
862                                    
863                                    // it is often useful to continue expanding until a longer node is
864                                    // reached (to replace atomic concepts with more specific ones)
865                                    if(forceRefinementLengthIncrease && !newNode.isTooWeak()) {
866                                            // extend node again if its concept has the same length 
867                                            if(node.getConcept().getLength() == newNode.getConcept().getLength()) {
868                                                    extendNodeProper(newNode, refinement, maxLength, recDepth + 1);
869                                            }
870                                    }                               
871                                    
872                            }
873                    }
874    
875                    // es sind jetzt noch alle Konzepte übrig, die improper refinements sind
876                    // auf jedem dieser Konzepte wird die Funktion erneut aufgerufen, da
877                    // sich
878                    // proper refinements ergeben könnten
879                    for (Description refinement : refinements) {
880                            // for(int i=0; i<=recDepth; i++)
881                            // System.out.print(" ");
882                            // System.out.println("call: " + refinement + " [maxLength " +
883                            // maxLength + ", rec depth " + recDepth + "]");
884    
885                            // check for redundancy (otherwise we may run into very
886                            // time-intensive loops,
887                            // see planned JUnit test case $x)
888    
889                            long redundancyCheckTimeNsStart = System.nanoTime();
890                            boolean redundant = properRefinements.contains(refinement);
891                            redundancyCheckTimeNs += System.nanoTime() - redundancyCheckTimeNsStart;
892    
893                            if (!redundant) {
894    //                              System.out.println("node " + node);
895    //                              System.out.println("refinement " + refinement);
896                                    
897                                    extendNodeProper(node, refinement, maxLength, recDepth + 1);
898                            }
899                            
900                            // for(int i=0; i<=recDepth; i++)
901                            // System.out.print(" ");
902                            // System.out.println("finished: " + refinement + " [maxLength " +
903                            // maxLength + "]");
904                            // System.exit(0);
905                    }
906            }
907    
908            private void printStatistics(boolean finalStats) {
909                    // TODO: viele Tests haben ergeben, dass man nie 100% mit der
910                    // Zeitmessung abdecken
911                    // kann (zum einen weil Stringausgabe verzögert erfolgt und zum anderen
912                    // weil
913                    // Funktionsaufrufe, garbage collection, Zeitmessung selbst auch Zeit
914                    // benötigt);
915                    // es empfiehlt sich folgendes Vorgehen:
916                    // - Messung der Zeit eines Loops im Algorithmus
917                    // - Messung der Zeit für alle node extensions innerhalb eines Loops
918                    // => als Normalisierungsbasis empfehlen sich dann die Loopzeit statt
919                    // Algorithmuslaufzeit
920                    // ... momentan kann es aber auch erstmal so lassen
921    
922                    long algorithmRuntime = System.nanoTime() - algorithmStartTime;
923    
924                    if (!finalStats) {
925    
926                            ExampleBasedNode bestNode = candidatesStable.last();
927                            // double accuracy = 100 * ((bestNode.getCoveredPositives().size()
928                            // + nrOfNegativeExamples -
929                            // bestNode.getCoveredNegatives().size())/(double)nrOfExamples);
930                            // Refinementoperator auf Konzept anwenden
931                            // String bestNodeString = "currently best node: " + bestNode + "
932                            // accuracy: " + df.format(accuracy) + "%";
933                            logger.debug("start node: "
934                                            + startNode.getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples,
935                                                            baseURI));
936                            String bestNodeString = "currently best node: "
937                                            + bestNode.getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples,
938                                                            baseURI);
939                            String bestNodeStringKBSyntax = "currently best node KBSyntax: "
940                                            + bestNode.getConcept().toKBSyntaxString();
941    
942                            // searchTree += bestNodeString + "\n";
943                            logger.debug(bestNodeString);
944                            logger.trace(bestNode.getStats(nrOfPositiveExamples, nrOfNegativeExamples));
945                            logger.debug(bestNodeStringKBSyntax);
946                            if (bestNode.getCoveredNegatives().size() <= 5)
947                                    logger.trace("covered negs: " + bestNode.getCoveredNegatives());
948                            String expandedNodeString = "next expanded node: "
949                                            + candidates.last().getShortDescription(nrOfPositiveExamples,
950                                                            nrOfNegativeExamples, baseURI);
951                            // searchTree += expandedNodeString + "\n";
952                            logger.debug(expandedNodeString);
953                            logger.debug("algorithm runtime " + Helper.prettyPrintNanoSeconds(algorithmRuntime));
954                            logger.debug("size of candidate set: " + candidates.size());
955                            // System.out.println("properness max recursion depth: " +
956                            // maxRecDepth);
957                            // System.out.println("max. number of one-step refinements: " +
958                            // maxNrOfRefinements);
959                            // System.out.println("max. number of children of a node: " +
960                            // maxNrOfChildren);
961                            logger.debug("subsumption time: "
962                                            + Helper.prettyPrintNanoSeconds(rs.getSubsumptionReasoningTimeNs()));
963                            logger.debug("instance check time: "
964                                            + Helper.prettyPrintNanoSeconds(rs.getInstanceCheckReasoningTimeNs()));
965                            logger.debug("retrieval time: "
966                                            + Helper.prettyPrintNanoSeconds(rs.getRetrievalReasoningTimeNs()));
967                    }
968    
969                    if (computeBenchmarkInformation) {
970    
971                            long reasoningTime = rs.getOverallReasoningTimeNs();
972                            double reasoningPercentage = 100 * reasoningTime / (double) algorithmRuntime;
973                            long propWithoutReasoning = propernessCalcTimeNs - propernessCalcReasoningTimeNs;
974                            double propPercentage = 100 * propWithoutReasoning / (double) algorithmRuntime;
975                            double deletionPercentage = 100 * childConceptsDeletionTimeNs
976                                            / (double) algorithmRuntime;
977                            long subTime = rs.getSubsumptionReasoningTimeNs();
978                            double subPercentage = 100 * subTime / (double) algorithmRuntime;
979                            double refinementPercentage = 100 * refinementCalcTimeNs / (double) algorithmRuntime;
980                            double redundancyCheckPercentage = 100 * redundancyCheckTimeNs
981                                            / (double) algorithmRuntime;
982                            double evaluateSetCreationTimePercentage = 100 * evaluateSetCreationTimeNs
983                                            / (double) algorithmRuntime;
984                            double improperConceptsRemovalTimePercentage = 100 * improperConceptsRemovalTimeNs
985                                            / (double) algorithmRuntime;
986                            double mComputationTimePercentage = 100 * operator.mComputationTimeNs
987                                            / (double) algorithmRuntime;
988                            double topComputationTimePercentage = 100 * operator.topComputationTimeNs
989                                            / (double) algorithmRuntime;
990                            double cleanTimePercentage = 100 * ConceptTransformation.cleaningTimeNs
991                                            / (double) algorithmRuntime;
992                            double onnfTimePercentage = 100 * ConceptTransformation.onnfTimeNs
993                                            / (double) algorithmRuntime;
994                            double shorteningTimePercentage = 100 * ConceptTransformation.shorteningTimeNs
995                                            / (double) algorithmRuntime;
996    
997                            logger.debug("reasoning percentage: " + df.format(reasoningPercentage) + "%");
998                            logger.debug("   subsumption check time: " + df.format(subPercentage) + "%");
999                            logger.debug("proper calculation percentage (wo. reasoning): "
1000                                            + df.format(propPercentage) + "%");
1001                            logger.debug("   deletion time percentage: " + df.format(deletionPercentage) + "%");
1002                            logger.debug("   refinement calculation percentage: " + df.format(refinementPercentage)
1003                                            + "%");
1004                            logger.debug("      m calculation percentage: " + df.format(mComputationTimePercentage)
1005                                            + "%");
1006                            logger.debug("      top calculation percentage: "
1007                                            + df.format(topComputationTimePercentage) + "%");
1008                            logger.debug("   redundancy check percentage: " + df.format(redundancyCheckPercentage)
1009                                            + "%");
1010                            logger.debug("   evaluate set creation time percentage: "
1011                                            + df.format(evaluateSetCreationTimePercentage) + "%");
1012                            logger.debug("   improper concepts removal time percentage: "
1013                                            + df.format(improperConceptsRemovalTimePercentage) + "%");
1014                            logger.debug("clean time percentage: " + df.format(cleanTimePercentage) + "%");
1015                            logger.debug("onnf time percentage: " + df.format(onnfTimePercentage) + "%");
1016                            logger
1017                                            .debug("shortening time percentage: " + df.format(shorteningTimePercentage)
1018                                                            + "%");
1019                    }
1020    
1021                    logger.debug("properness tests (reasoner/short concept/too weak list): "
1022                                    + propernessTestsReasoner + "/" + propernessTestsAvoidedByShortConceptConstruction
1023                                    + "/" + propernessTestsAvoidedByTooWeakList);
1024                    logger
1025                                    .debug("concept tests (reasoner/too weak list/overly general list/redundant concepts): "
1026                                                    + conceptTestsReasoner
1027                                                    + "/"
1028                                                    + conceptTestsTooWeakList
1029                                                    + "/"
1030                                                    + conceptTestsOverlyGeneralList + "/" + redundantConcepts);
1031            }
1032    
1033            // @SuppressWarnings({"unused"})
1034            // private int coveredNegativesOrTooWeak(Description concept) {
1035            // if(posOnly)
1036            // return
1037            // posOnlyLearningProblem.coveredPseudoNegativeExamplesOrTooWeak(concept);
1038            // else
1039            // return learningProblem.coveredNegativeExamplesOrTooWeak(concept);
1040            // }
1041    
1042            // private int getNumberOfNegatives() {
1043            // if(posOnly)
1044            // return posOnlyLearningProblem.getPseudoNegatives().size();
1045            // else
1046            // return learningProblem.getNegativeExamples().size();
1047            // }
1048    
1049            private boolean containsTooWeakElement(Intersection mc) {
1050                    for (Description child : mc.getChildren()) {
1051                            if (tooWeakList.contains(child))
1052                                    return true;
1053                    }
1054                    return false;
1055            }
1056    
1057            private boolean containsOverlyGeneralElement(Union md) {
1058                    for (Description child : md.getChildren()) {
1059                            if (overlyGeneralList.contains(child))
1060                                    return true;
1061                    }
1062                    return false;
1063            }
1064    
1065            // TODO: investigate whether it makes sense not to store all individuals
1066            // in the nodes, but instead perform instance checks in tree traversal
1067            // (it is only run in large intervals so it shouldn't be too expensive)
1068            private void traverseTree() {
1069                    // ExampleBasedNode startNode = candidatesStable.last();
1070                    ExampleBasedNode startNode = findBestTraversalStartNode();
1071                    Description currentDescription = startNode.getConcept();
1072                    Set<Individual> currentCoveredPos = startNode.getCoveredPositives();
1073                    Set<Individual> currentCoveredNeg = startNode.getCoveredNegatives();
1074                    double currentAccuracy = startNode.getAccuracy(nrOfPositiveExamples, nrOfNegativeExamples);
1075                    int currentMisclassifications = nrOfPositiveExamples - currentCoveredPos.size()
1076                                    + currentCoveredNeg.size();
1077                    logger.debug("tree traversal start node "
1078                                    + startNode
1079                                                    .getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples, baseURI));
1080                    logger.debug("tree traversal start accuracy: " + currentAccuracy);
1081                    int i = 0;
1082                    // start from the most promising nodes
1083                    NavigableSet<ExampleBasedNode> reverseView = candidatesStable.descendingSet();
1084                    for (ExampleBasedNode currNode : reverseView) {
1085                            // compute covered positives and negatives
1086                            SortedSet<Individual> newCoveredPositives = new TreeSet<Individual>(currentCoveredPos);
1087                            newCoveredPositives.retainAll(currNode.getCoveredPositives());
1088                            SortedSet<Individual> newCoveredNegatives = new TreeSet<Individual>(currentCoveredNeg);
1089                            newCoveredNegatives.retainAll(currNode.getCoveredNegatives());
1090    
1091                            // compute the accuracy we would get by adding this node
1092                            double accuracy = (newCoveredPositives.size() + nrOfNegativeExamples - newCoveredNegatives
1093                                            .size())
1094                                            / (double) (nrOfPositiveExamples + nrOfNegativeExamples);
1095                            int misclassifications = nrOfPositiveExamples - newCoveredPositives.size()
1096                                            + newCoveredNegatives.size();
1097                            int misclassifiedPositives = nrOfPositiveExamples - newCoveredPositives.size();
1098    
1099                            int lostPositives = currentCoveredPos.size() - newCoveredPositives.size();
1100    
1101                            // TODO: maybe we should also consider a minimum improvement when
1102                            // adding something
1103                            // otherwise we could overfit
1104                            // we give double weith to lost positives, i.e. when one positive is
1105                            // lost at least
1106                            // two negatives need to be uncovered
1107                            boolean consider = (misclassifications + lostPositives < currentMisclassifications)
1108                                            && (misclassifiedPositives <= allowedMisclassifications);
1109                            // boolean consider = (misclassifications <
1110                            // currentMisclassifications) &&
1111                            // (misclassifiedPositives <= allowedMisclassifications);
1112    
1113                            // concept has been chosen, so construct it
1114                            if (consider) {
1115    
1116                                    // construct a new concept as intersection of both
1117                                    Intersection mc = new Intersection(currentDescription, currNode.getConcept());
1118    
1119                                    ConceptTransformation.cleanConceptNonRecursive(mc);
1120                                    ConceptTransformation.transformToOrderedNegationNormalFormNonRecursive(mc,
1121                                                    conceptComparator);
1122    
1123                                    // System.out.println("extended concept to: " + mc);
1124                                    logger.debug("misclassifications: " + misclassifications);
1125                                    logger.debug("misclassified positives: " + misclassifiedPositives);
1126                                    logger.debug("accuracy: " + accuracy);
1127    
1128                                    // update variables
1129                                    currentDescription = mc;
1130                                    currentCoveredPos = newCoveredPositives;
1131                                    currentCoveredNeg = newCoveredNegatives;
1132                                    currentMisclassifications = misclassifications;
1133                                    currentAccuracy = accuracy;
1134    
1135                                    if (accuracy > 1 - noise) {
1136                                            logger.info("traversal found " + mc);
1137                                            logger.info("accuracy: " + accuracy);
1138                                            System.exit(0);
1139                                    }
1140                            }
1141    
1142                            i++;
1143                            if (i == 1000)
1144                                    break;
1145                    }
1146    
1147            }
1148    
1149            // we look for a node covering many positives and hopefully
1150            // few negatives; we give a strong penalty on uncovered positives
1151            private ExampleBasedNode findBestTraversalStartNode() {
1152                    // 2 points for each covered pos + 1 point for each uncovered neg
1153                    int currScore = 0;
1154                    int i = 0;
1155                    ExampleBasedNode currNode = null;
1156                    NavigableSet<ExampleBasedNode> reverseView = candidatesStable.descendingSet();
1157                    for (ExampleBasedNode node : reverseView) {
1158                            int score = 2 * node.getCoveredPositives().size()
1159                                            + (nrOfNegativeExamples - node.getCoveredNegatives().size());
1160                            if (score > currScore) {
1161                                    currScore = score;
1162                                    currNode = node;
1163                            }
1164                            i++;
1165                            // limit search because stable candidate set can grow very large
1166                            if (i == 10000)
1167                                    break;
1168                    }
1169                    return currNode;
1170            }
1171    
1172            private void reduceCandidates() {
1173                    Iterator<ExampleBasedNode> it = candidatesStable.descendingIterator();
1174                    Set<ExampleBasedNode> promisingNodes = new HashSet<ExampleBasedNode>();
1175                    int i = 0;
1176                    while (it.hasNext() && promisingNodes.size() < candidatePostReductionSize) {
1177                            ExampleBasedNode node = it.next();
1178                            // System.out.println(node.getShortDescription(nrOfPositiveExamples,
1179                            // nrOfNegativeExamples, baseURI));
1180                            // first criterion: the considered node should have an accuracy gain
1181                            // over its parent
1182                            // (avoids to use only the most promising node + all its refinements
1183                            // with equal accuracy)
1184                            boolean hasAccuracyGain = (node.getParent() == null)
1185                                            || (node.getCoveredPositives().size() != node.getParent().getCoveredPositives()
1186                                                            .size())
1187                                            || (node.getCoveredNegatives().size() != node.getParent().getCoveredNegatives()
1188                                                            .size());
1189                            // second criterion: uncovered positives; it does not make much
1190                            // sense to pick nodes with
1191                            // low potential for reaching a solution (already at the limit of
1192                            // misclassified positives)
1193                            int misclassifiedPositives = nrOfPositiveExamples - node.getCoveredPositives().size();
1194                            boolean hasRefinementPotential = (misclassifiedPositives <= Math
1195                                            .floor(0.65d * allowedMisclassifications));
1196                            boolean keep = hasAccuracyGain && hasRefinementPotential;
1197                            if (keep) {
1198                                    promisingNodes.add(node);
1199                            }
1200                            i++;
1201                    }
1202                    candidates.retainAll(promisingNodes);
1203                    logger.debug("searched " + i + " nodes and picked the following promising descriptions:");
1204                    for (ExampleBasedNode node : promisingNodes)
1205                            logger.debug(node.getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples,
1206                                            baseURI));
1207            }
1208    
1209            /*
1210             * private Set<Individual> computeQuality(Description refinement, Set<Individual>
1211             * coveredPositives) { Set<Individual> ret = new TreeSet<Individual>();
1212             * int misclassifications; for(Individual i : coveredPositives) { boolean
1213             * covered = rs.instanceCheck(refinement, i); if(!covered)
1214             * misclassifications++; else ret.add(i);
1215             * 
1216             * if(misclassifications > allowedMisclassifications) return null; } }
1217             */
1218    
1219            public void stop() {
1220                    stop = true;
1221            }
1222    
1223            public Description getBestSolution() {
1224                    return candidatesStable.last().getConcept();
1225            }
1226    
1227            public List<Description> getCurrentlyBestDescriptions() {
1228                    List<Description> best = new LinkedList<Description>();
1229                    int i = 0;
1230                    int nrOfSolutions = 200;
1231                    for (ExampleBasedNode n : candidatesStable.descendingSet()) {
1232                            best.add(n.getConcept());
1233                            if (i == nrOfSolutions)
1234                                    return best;
1235                            i++;
1236                    }
1237                    return best;
1238            }
1239            
1240            public SortedSet<EvaluatedDescriptionPosNeg> getCurrentlyBestEvaluatedDescriptions() {
1241                    Iterator<ExampleBasedNode> it = candidatesStable.descendingIterator();
1242                    int count = 0;
1243                    SortedSet<EvaluatedDescriptionPosNeg> cbd = new TreeSet<EvaluatedDescriptionPosNeg>(edComparator);
1244                    while(it.hasNext()) {
1245                            ExampleBasedNode eb = it.next();
1246                            cbd.add(new EvaluatedDescriptionPosNeg(eb.getConcept(), getScore(eb.getConcept())));
1247                            // return a maximum of 200 elements (we need a maximum, because the
1248                            // candidate set can be very large)
1249                            if (count > 200)
1250                                    return cbd;
1251                            count++;
1252                    }
1253                    return cbd;
1254            }
1255    
1256            public void printBestSolutions(int nrOfSolutions, boolean showOrderedSolutions) {
1257                    // QUALITY: could be optimized
1258                    if (!logger.isTraceEnabled())
1259                            return;
1260                    // if(!logger.getLevel().toString().equalsIgnoreCase("TRACE"))return;
1261                    if (nrOfSolutions == 0)
1262                            nrOfSolutions = candidatesStable.size();
1263                    int i = 0;
1264                    for (ExampleBasedNode n : candidatesStable.descendingSet()) {
1265                            if (n.getAccuracy(nrOfPositiveExamples, nrOfNegativeExamples) < 1)
1266                                    break;
1267                            logger.trace("best: "
1268                                            + n.getShortDescription(nrOfPositiveExamples, nrOfNegativeExamples, baseURI));
1269                            if (i == nrOfSolutions)
1270                                    break;
1271                            i++;
1272                    }
1273    
1274                    if (showOrderedSolutions) {
1275                            logger.trace("ordered by generality (most special solutions first):");
1276                            SubsumptionComparator sc = new SubsumptionComparator(rs);
1277                            TreeSet<Description> solutionsOrderedBySubsumption = new TreeSet<Description>(sc);
1278    //                      solutionsOrderedBySubsumption.addAll(solutions);
1279                            for (Description d : solutionsOrderedBySubsumption)
1280                                    logger.trace("special: " + d);
1281                            throw new Error("implementation needs to be updated to show ordered solutions");                        
1282                    }
1283                    /*
1284                     * for (int j = 0; j < solutions.size(); j++) { Description d =
1285                     * solutions.get(j); logger.trace(d.toString()); }
1286                     */
1287    
1288            }
1289    
1290            public ScorePosNeg getSolutionScore() {
1291                    return (ScorePosNeg) learningProblem.computeScore(getBestSolution());
1292            }
1293    
1294            private ScorePosNeg getScore(Description d) {
1295                    return (ScorePosNeg) learningProblem.computeScore(d);
1296            }
1297    
1298            public ExampleBasedNode getStartNode() {
1299                    return startNode;
1300            }
1301    
1302            // returns true if there is any meaningful node in the subtree
1303            private boolean checkSubtreePosOnly(ExampleBasedNode node) {
1304                    for (ExampleBasedNode refinement : node.getChildren()) {
1305    
1306                            if (!node.isTooWeak()) {
1307                                    // refinement meaningful
1308                                    if (isPosOnlyRefinementMeaningful(node, refinement))
1309                                            return true;
1310    
1311                                    // subtree with refinement as root contains a meaningful node
1312                                    if (checkSubtreePosOnly(refinement))
1313                                            return true;
1314                            }
1315    
1316                    }
1317                    return false;
1318            }
1319    
1320            // returns whether the refinement is "meaningful", i.e. the refinement
1321            // actually represents a different concept
1322            // than its parent; this is needed to determine when a positive only
1323            // learning algorithm should stop (when a node
1324            // has been expaned x times without yielding any meaningful refinements, it
1325            // is considered a possible solution)
1326            private boolean isPosOnlyRefinementMeaningful(ExampleBasedNode node, ExampleBasedNode refinement) {
1327                    Description d1 = node.getConcept();
1328                    Description d2 = refinement.getConcept();
1329                    // check whether d2 can be shortened, e.g. male AND male => male
1330                    Description shortConcept = ConceptTransformation.getShortConcept(d2, conceptComparator);
1331                    if (conceptComparator.compare(d1, shortConcept) != 0)
1332                            return false;
1333                    return true;
1334            }
1335    
1336            
1337            
1338            /**
1339             * In this function it is calculated if the algorithm should stop.
1340             * This is not always depends whether an actual solution was found
1341             * The algorithm stops if:
1342             * 1. the object attribute stop is set to true (possibly by an outside source)
1343             * 2. the maximimum execution time is reached
1344             * 3. the maximum number of class description tests is reached
1345             *
1346             * Continuation criteria and result improvement
1347             * The algorithm continues (although it would normally stop) if
1348             * 1. Minimum execution time is not reached (default 0)
1349             * 2. not enough good solutions are found (default 1)
1350             * otherwise it stops
1351             * 
1352             * @return true if the algorithm should stop, this is mostly indepent of the question if a solution was found
1353             */
1354            private boolean isTerminationCriteriaReached(){
1355                    if(this.stop){
1356                            return true;
1357                    }
1358                    long totalTimeNeeded = System.currentTimeMillis() - this.runtime;
1359                    long maxMilliSeconds = maxExecutionTimeInSeconds * 1000;
1360                    long minMilliSeconds = minExecutionTimeInSeconds * 1000;
1361                    int conceptTests = conceptTestsReasoner + conceptTestsTooWeakList + conceptTestsOverlyGeneralList;
1362                    boolean result = false;
1363                    
1364                    //ignore default
1365                    if (maxExecutionTimeInSeconds == 0)
1366                            result = false;
1367                    //alreadyReached
1368                    else if (maxExecutionTimeAlreadyReached)
1369                            return true;
1370                    //test
1371                    else if (maxMilliSeconds < totalTimeNeeded) {
1372                            this.stop();
1373                            logger.info("Maximum time (" + maxExecutionTimeInSeconds
1374                                            + " seconds) reached, stopping now...");
1375                            maxExecutionTimeAlreadyReached = true;
1376                            return true;
1377                    }
1378                    
1379                    //ignore default
1380                    if(maxClassDescriptionTests == 0) 
1381                            result = false;
1382                    //test
1383                    else if(conceptTests >= maxClassDescriptionTests){
1384                            logger.info("Maximum Class Description tests (" + maxClassDescriptionTests
1385                                            + " tests [actual: "+conceptTests+"]) reached, stopping now...");
1386                            return true;
1387                    }
1388                    
1389                    
1390                    if (guaranteeXgoodAlreadyReached){
1391                            result = true;
1392                    } else if(solutions.size() >= guaranteeXgoodDescriptions) {
1393                                    if(guaranteeXgoodDescriptions != 1) {
1394                                    logger.info("Minimum number (" + guaranteeXgoodDescriptions
1395                                                    + ") of good descriptions reached.");
1396                                    }
1397                                    guaranteeXgoodAlreadyReached = true;
1398                                    result = true;
1399                    }
1400                    
1401                                    
1402                    if (minExecutionTimeAlreadyReached){
1403                            result = result && true;
1404                    }else if(minMilliSeconds < totalTimeNeeded) {
1405                            if(minExecutionTimeInSeconds != 0) {
1406                                    logger.info("Minimum time (" + minExecutionTimeInSeconds
1407                                                    + " seconds) reached.");
1408                            }
1409                            minExecutionTimeAlreadyReached = true;
1410                            result = result && true;
1411                    } 
1412                    
1413                    return result;
1414            
1415            }
1416            
1417            public boolean isRunning() {
1418                    return isRunning;
1419            }
1420    
1421    }