001    /**
002     * Copyright (C) 2007-2011, Jens Lehmann
003     *
004     * This file is part of DL-Learner.
005     *
006     * DL-Learner is free software; you can redistribute it and/or modify
007     * it under the terms of the GNU General Public License as published by
008     * the Free Software Foundation; either version 3 of the License, or
009     * (at your option) any later version.
010     *
011     * DL-Learner is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014     * GNU General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018     */
019    
020    package org.dllearner.learningproblems;
021    
022    import java.util.Collection;
023    import java.util.Collections;
024    import java.util.Iterator;
025    import java.util.LinkedList;
026    import java.util.List;
027    import java.util.Random;
028    import java.util.Set;
029    import java.util.TreeSet;
030    
031    import org.apache.log4j.Logger;
032    import org.dllearner.core.ComponentInitException;
033    import org.dllearner.core.ComponentManager;
034    import org.dllearner.core.AbstractLearningProblem;
035    import org.dllearner.core.AbstractReasonerComponent;
036    import org.dllearner.core.configurators.ClassLearningProblemConfigurator;
037    import org.dllearner.core.options.BooleanConfigOption;
038    import org.dllearner.core.options.CommonConfigOptions;
039    import org.dllearner.core.options.ConfigOption;
040    import org.dllearner.core.options.DoubleConfigOption;
041    import org.dllearner.core.options.StringConfigOption;
042    import org.dllearner.core.options.URLConfigOption;
043    import org.dllearner.core.owl.Axiom;
044    import org.dllearner.core.owl.Description;
045    import org.dllearner.core.owl.EquivalentClassesAxiom;
046    import org.dllearner.core.owl.Individual;
047    import org.dllearner.core.owl.NamedClass;
048    import org.dllearner.core.owl.Negation;
049    import org.dllearner.core.owl.SubClassAxiom;
050    import org.dllearner.learningproblems.Heuristics.HeuristicType;
051    import org.dllearner.utilities.Helper;
052    
053    /**
054     * The problem of learning the description of an existing class
055     * in an OWL ontology.
056     * 
057     * @author Jens Lehmann
058     *
059     */
060    public class ClassLearningProblem extends AbstractLearningProblem {
061            
062            private static Logger logger = Logger.getLogger(ClassLearningProblem.class);
063        private long nanoStartTime;
064            private static int maxExecutionTimeInSeconds = 10;
065            
066            private NamedClass classToDescribe;
067            private List<Individual> classInstances;
068            private TreeSet<Individual> classInstancesSet;
069            private boolean equivalence = true;
070            private ClassLearningProblemConfigurator configurator;
071            // approximation of accuracy
072            private double approxDelta = 0.05;
073            
074            private boolean useApproximations;
075            
076            // factor for higher weight on recall (needed for subclass learning)
077            private double coverageFactor;
078            
079            // instances of super classes excluding instances of the class itself
080            private List<Individual> superClassInstances;
081            // instances of super classes including instances of the class itself
082            private List<Individual> classAndSuperClassInstances;
083            // specific variables for generalised F-measure
084            private TreeSet<Individual> negatedClassInstances;
085            
086            private HeuristicType heuristic = HeuristicType.AMEASURE;
087            
088            @Override
089            public ClassLearningProblemConfigurator getConfigurator(){
090                    return configurator;
091            }       
092            
093            public ClassLearningProblem(AbstractReasonerComponent reasoner) {
094                    super(reasoner);
095                    configurator = new ClassLearningProblemConfigurator(this);
096            }
097            
098            public static Collection<ConfigOption<?>> createConfigOptions() {
099                    Collection<ConfigOption<?>> options = new LinkedList<ConfigOption<?>>();
100                    URLConfigOption classToDescribeOption = new URLConfigOption("classToDescribe", "class of which a description should be learned", null, true, false);
101                    classToDescribeOption.setRefersToOWLClass(true);
102                    options.add(classToDescribeOption);
103                    StringConfigOption type = new StringConfigOption("type", "whether to learn an equivalence class or super class axiom","equivalence"); //  or domain/range of a property.
104                    type.setAllowedValues(new String[] {"equivalence", "superClass"}); // , "domain", "range"});
105                    options.add(type);      
106                    BooleanConfigOption approx = new BooleanConfigOption("useApproximations", "whether to use stochastic approximations for computing accuracy", true);
107                    options.add(approx);
108                    DoubleConfigOption approxAccuracy = new DoubleConfigOption("approxAccuracy", "accuracy of the approximation (only for expert use)", 0.05);
109                    options.add(approxAccuracy);
110                    StringConfigOption accMethod = new StringConfigOption("accuracyMethod", "Specifies, which method/function to use for computing accuracy.","standard"); //  or domain/range of a property.
111                    accMethod.setAllowedValues(new String[] {"standard", "fmeasure", "pred_acc", "generalised_fmeasure", "jaccard"});
112                    options.add(accMethod);
113                    BooleanConfigOption consistency = new BooleanConfigOption("checkConsistency", "Specify whether to check consistency for solution candidates. This is convenient for user interfaces, but can be performance intensive.", true);
114                    options.add(consistency);       
115                    options.add(CommonConfigOptions.maxExecutionTimeInSeconds(10));
116                    DoubleConfigOption betaSC = new DoubleConfigOption("betaSC", "Higher values of beta rate recall higher than precision or in other words, covering the instances of the class to describe is more important even at the cost of covering additional instances. The actual implementation depends on the selected heuristic. This values is used only for super class learning.", 3.0);
117                    options.add(betaSC);
118                    DoubleConfigOption betaEq = new DoubleConfigOption("betaEq", "Higher values of beta rate recall higher than precision or in other words, covering the instances of the class to describe is more important even at the cost of covering additional instances. The actual implementation depends on the selected heuristic. This values is used only for equivalence class learning.", 1.0);
119                    options.add(betaEq);
120                    return options;
121            }
122    
123            public static String getName() {
124                    return "class learning problem";
125            }       
126            
127            @Override
128            public void init() throws ComponentInitException {
129                    classToDescribe = new NamedClass(configurator.getClassToDescribe().toString());
130                    useApproximations = configurator.getUseApproximations();
131                    
132                    String accM = configurator.getAccuracyMethod();
133                    if(accM.equals("standard")) {
134                            heuristic = HeuristicType.AMEASURE;
135                    } else if(accM.equals("fmeasure")) {
136                            heuristic = HeuristicType.FMEASURE;
137                    } else if(accM.equals("generalised_fmeasure")) {
138                            heuristic = HeuristicType.GEN_FMEASURE;
139                    } else if(accM.equals("jaccard")) {
140                            heuristic = HeuristicType.JACCARD;
141                    } else if(accM.equals("pred_acc")) {
142                            heuristic = HeuristicType.PRED_ACC;
143                    }
144                    
145                    if(useApproximations && heuristic.equals(HeuristicType.PRED_ACC)) {
146                            System.err.println("Approximating predictive accuracy is an experimental feature. USE IT AT YOUR OWN RISK. If you consider to use it for anything serious, please extend the unit tests at org.dllearner.test.junit.HeuristicTests first to verify that it works.");
147                    }               
148                    
149                    if(useApproximations && !(heuristic.equals(HeuristicType.PRED_ACC) || heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE))) {
150                            throw new ComponentInitException("Approximations only supported for F-Measure or Standard-Measure. It is unsupported for \"" + accM + ".\"");
151                    }
152                    
153    //              useFMeasure = configurator.getAccuracyMethod().equals("fmeasure");
154                    approxDelta = configurator.getApproxAccuracy();
155                    
156                    if(!reasoner.getNamedClasses().contains(classToDescribe)) {
157                            throw new ComponentInitException("The class \"" + configurator.getClassToDescribe() + "\" does not exist. Make sure you spelled it correctly.");
158                    }
159                    
160                    classInstances = new LinkedList<Individual>(reasoner.getIndividuals(classToDescribe));
161                    // sanity check
162                    if(classInstances.size() == 0) {
163                            throw new ComponentInitException("Class " + classToDescribe + " has 0 instances according to \"" + ComponentManager.getInstance().getComponentName(reasoner.getClass()) + "\". Cannot perform class learning with 0 instances.");
164                    }
165                    
166                    classInstancesSet = new TreeSet<Individual>(classInstances);
167                    equivalence = (configurator.getType().equals("equivalence"));
168                    maxExecutionTimeInSeconds = configurator.getMaxExecutionTimeInSeconds();
169                    
170                    if(equivalence) {
171                            coverageFactor = configurator.getBetaEq();
172                    } else {
173                            coverageFactor = configurator.getBetaSC();
174                    }
175                    
176                    // we compute the instances of the super class to perform
177                    // optimisations later on
178                    Set<Description> superClasses = reasoner.getClassHierarchy().getSuperClasses(classToDescribe);
179                    TreeSet<Individual> superClassInstancesTmp = new TreeSet<Individual>(reasoner.getIndividuals());
180                    for(Description superClass : superClasses) {
181                            superClassInstancesTmp.retainAll(reasoner.getIndividuals(superClass));
182                    }
183                    // we create one list, which includes instances of the class (an instance of the class is also instance of all super classes) ...
184                    classAndSuperClassInstances = new LinkedList<Individual>(superClassInstancesTmp);
185                    // ... and a second list not including them
186                    superClassInstancesTmp.removeAll(classInstances);
187                    // since we use the instance list for approximations, we want to avoid
188                    // any bias through URI names, so we shuffle the list once pseudo-randomly
189                    superClassInstances = new LinkedList<Individual>(superClassInstancesTmp);
190                    Random rand = new Random(1);
191                    Collections.shuffle(classInstances, rand);
192                    Collections.shuffle(superClassInstances, rand);
193                    
194                    if(heuristic.equals(HeuristicType.GEN_FMEASURE)) {
195                            Description classToDescribeNeg = new Negation(classToDescribe);
196                            negatedClassInstances = new TreeSet<Individual>();
197                            for(Individual ind : superClassInstances) {
198                                    if(reasoner.hasType(classToDescribeNeg, ind)) {
199                                            negatedClassInstances.add(ind);
200                                    }
201                            }
202    //                      System.out.println("negated class instances: " + negatedClassInstances);
203                    }
204                    
205    //              System.out.println(classInstances.size() + " " + superClassInstances.size());
206            }
207                    
208            @Override
209            public ClassScore computeScore(Description description) {
210                    
211                    // TODO: reuse code to ensure that we never return inconsistent results
212                    // between getAccuracy, getAccuracyOrTooWeak and computeScore
213                    
214                    // overhang
215                    Set<Individual> additionalInstances = new TreeSet<Individual>();
216                    for(Individual ind : superClassInstances) {
217                            if(reasoner.hasType(description, ind)) {
218                                    additionalInstances.add(ind);
219                            }
220                    }
221                    
222                    // coverage
223                    Set<Individual> coveredInstances = new TreeSet<Individual>();
224                    for(Individual ind : classInstances) {
225                            if(reasoner.hasType(description, ind)) {
226                                    coveredInstances.add(ind);
227                            }
228                    }
229                    
230                    double recall = coveredInstances.size()/(double)classInstances.size();
231                    double precision = (additionalInstances.size() + coveredInstances.size() == 0) ? 0 : coveredInstances.size()/(double)(coveredInstances.size()+additionalInstances.size());
232                    // for each description with less than 100% coverage, we check whether it is
233                    // leads to an inconsistent knowledge base
234                    
235                    double acc = 0;
236                    if(heuristic.equals(HeuristicType.FMEASURE)) {
237                            acc = getFMeasure(recall, precision);
238                    } else if(heuristic.equals(HeuristicType.AMEASURE)) {
239                            acc = Heuristics.getAScore(recall, precision, coverageFactor);
240                    } else {
241                            // TODO: some superfluous instance checks are required to compute accuracy => 
242                            // move accuracy computation here if possible 
243                            acc = getAccuracyOrTooWeakExact(description, 1);
244                    }
245                    
246                    if(configurator.getCheckConsistency()) {
247                            
248                            // we check whether the axiom already follows from the knowledge base
249    //                      boolean followsFromKB = reasoner.isSuperClassOf(description, classToDescribe);                  
250                            
251    //                      boolean followsFromKB = equivalence ? reasoner.isEquivalentClass(description, classToDescribe) : reasoner.isSuperClassOf(description, classToDescribe);
252                            boolean followsFromKB = followsFromKB(description);
253                            
254                            // workaround due to a bug (see http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319)
255    //                      boolean isConsistent = coverage >= 0.999999 || isConsistent(description);
256                            // (if the axiom follows, then the knowledge base remains consistent)
257                            boolean isConsistent = followsFromKB || isConsistent(description);
258                            
259    //                      double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage, protusion);
260                            return new ClassScore(coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc, isConsistent, followsFromKB);
261                    
262                    } else {
263                            return new ClassScore(coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc);
264                    }
265            }       
266            
267            public boolean isEquivalenceProblem() {
268                    return equivalence;
269            }
270    
271            /* (non-Javadoc)
272             * @see org.dllearner.core.LearningProblem#getAccuracy(org.dllearner.core.owl.Description)
273             */
274            @Override
275            public double getAccuracy(Description description) {
276                    // a noise value of 1.0 means that we never return too weak (-1.0) 
277                    return getAccuracyOrTooWeak(description, 1.0);
278            }
279    
280            @Override
281            public double getAccuracyOrTooWeak(Description description, double noise) {
282                    // delegates to the appropriate methods
283                    return useApproximations ? getAccuracyOrTooWeakApprox(description, noise) : getAccuracyOrTooWeakExact(description, noise);              
284            }
285            
286            // instead of using the standard operation, we use optimisation
287            // and approximation here
288            public double getAccuracyOrTooWeakApprox(Description description, double noise) {
289                    if(heuristic.equals(HeuristicType.FMEASURE)) {
290                            // we abort when there are too many uncovered positives
291                            int maxNotCovered = (int) Math.ceil(noise*classInstances.size());
292                            int instancesCovered = 0;
293                            int instancesNotCovered = 0;
294                            
295                            for(Individual ind : classInstances) {
296                                    if(reasoner.hasType(description, ind)) {
297                                            instancesCovered++;
298                                    } else {
299                                            instancesNotCovered ++;
300                                            if(instancesNotCovered > maxNotCovered) {
301                                                    return -1;
302                                            }
303                                    }
304                            }       
305                            
306                            double recall = instancesCovered/(double)classInstances.size();
307                            
308                            int testsPerformed = 0;
309                            int instancesDescription = 0;
310                            
311                            for(Individual ind : superClassInstances) {
312    
313                                    if(reasoner.hasType(description, ind)) {
314                                            instancesDescription++;
315                                    }
316                                    testsPerformed++;
317                                    
318                                    // check whether approximation is sufficiently accurate
319                                    double[] approx = Heuristics.getFScoreApproximation(instancesCovered, recall, coverageFactor, superClassInstances.size(), testsPerformed, instancesDescription);
320                                    if(approx[1]<approxDelta) {
321                                            return approx[0];
322                                    }
323                                    
324                            }               
325                            
326                            // standard computation (no approximation)
327                            double precision = instancesCovered/(double)(instancesDescription+instancesCovered);
328    //                      if(instancesCovered + instancesDescription == 0) {
329    //                              precision = 0;
330    //                      }
331                            return Heuristics.getFScore(recall, precision, coverageFactor);
332                            
333                    } else if(heuristic.equals(HeuristicType.AMEASURE)) {
334                            // the F-MEASURE implementation is now separate (different optimisation
335                            // strategy)
336                            
337                            // we abort when there are too many uncovered positives
338                            int maxNotCovered = (int) Math.ceil(noise*classInstances.size());
339                            int instancesCovered = 0;
340                            int instancesNotCovered = 0;
341                            int total = 0;
342                            boolean estimatedA = false;
343                            
344                            double lowerBorderA = 0;
345                            int lowerEstimateA = 0;
346                            double upperBorderA = 1;
347                            int upperEstimateA = classInstances.size();
348                            
349                            for(Individual ind : classInstances) {
350                                    if(reasoner.hasType(description, ind)) {
351                                            instancesCovered++;
352                                    } else {
353                                            instancesNotCovered ++;
354                                            if(instancesNotCovered > maxNotCovered) {
355                                                    return -1;
356                                            }
357                                    }
358                                    
359                                    // approximation step (starting after 10 tests)
360                                    total = instancesCovered + instancesNotCovered;
361                                    if(total > 10) {
362                                            // compute confidence interval
363                                            double p1 = p1(instancesCovered, total);
364                                            double p2 = p3(p1, total);
365                                            lowerBorderA = Math.max(0, p1 - p2);
366                                            upperBorderA = Math.min(1, p1 + p2);
367                                            double size = upperBorderA - lowerBorderA;
368                                            // if the interval has a size smaller than 10%, we can be confident
369                                            if(size < 2 * approxDelta) {
370                                                    // we have to distinguish the cases that the accuracy limit is
371                                                    // below, within, or above the limit and that the mean is below
372                                                    // or above the limit
373                                                    double mean = instancesCovered/(double)total;
374                                                    
375                                                    // we can estimate the best possible concept to reach with downward refinement
376                                                    // by setting precision to 1 and recall = mean stays as it is
377                                                    double optimumEstimate = heuristic.equals(HeuristicType.FMEASURE) ? ((1+Math.sqrt(coverageFactor))*mean)/(Math.sqrt(coverageFactor)+1) : (coverageFactor*mean+1)/(double)(coverageFactor+1);
378                                                    
379                                                    // if the mean is greater than the required minimum, we can accept;
380                                                    // we also accept if the interval is small and close to the minimum
381                                                    // (worst case is to accept a few inaccurate descriptions)
382                                                    if(optimumEstimate > 1-noise-0.03) {
383    //                                                              || (upperBorderA > mean && size < 0.03)) {
384                                                            instancesCovered = (int) (instancesCovered/(double)total * classInstances.size());
385                                                            upperEstimateA = (int) (upperBorderA * classInstances.size());
386                                                            lowerEstimateA = (int) (lowerBorderA * classInstances.size());
387                                                            estimatedA = true;
388                                                            break;
389                                                    }
390                                                    
391                                                    // reject only if the upper border is far away (we are very
392                                                    // certain not to lose a potential solution)
393    //                                              if(upperBorderA + 0.1 < 1-noise) {
394                                                    double optimumEstimateUpperBorder = heuristic.equals(HeuristicType.FMEASURE) ? ((1+Math.sqrt(coverageFactor))*(upperBorderA+0.1))/(Math.sqrt(coverageFactor)+1) : (coverageFactor*(upperBorderA+0.1)+1)/(double)(coverageFactor+1);
395                                                    if(optimumEstimateUpperBorder < 1 - noise) {
396                                                            return -1;
397                                                    }
398                                            }                               
399                                    }
400                            }       
401                            
402                            double recall = instancesCovered/(double)classInstances.size();
403                            
404    //                      MonitorFactory.add("estimatedA","count", estimatedA ? 1 : 0);
405    //                      MonitorFactory.add("aInstances","count", total);
406                            
407                            // we know that a definition candidate is always subclass of the
408                            // intersection of all super classes, so we test only the relevant instances
409                            // (leads to undesired effects for descriptions not following this rule,
410                            // but improves performance a lot);
411                            // for learning a superclass of a defined class, similar observations apply;
412    
413    
414                            int testsPerformed = 0;
415                            int instancesDescription = 0;
416    //                      boolean estimatedB = false;
417                            
418                            for(Individual ind : superClassInstances) {
419    
420                                    if(reasoner.hasType(description, ind)) {
421                                            instancesDescription++;
422                                    }
423                                    
424                                    testsPerformed++;
425                                    
426                                    if(testsPerformed > 10) {
427                                            
428                                            // compute confidence interval
429                                            double p1 = p1(instancesDescription, testsPerformed);
430                                            double p2 = p3(p1, testsPerformed);
431                                            double lowerBorder = Math.max(0, p1 - p2);
432                                            double upperBorder = Math.min(1, p1 + p2);
433                                            int lowerEstimate = (int) (lowerBorder * superClassInstances.size());
434                                            int upperEstimate = (int) (upperBorder * superClassInstances.size());
435                                            
436                                            double size;
437                                            if(estimatedA) {
438    //                                              size = 1/(coverageFactor+1) * (coverageFactor * (upperBorderA-lowerBorderA) + Math.sqrt(upperEstimateA/(upperEstimateA+lowerEstimate)) + Math.sqrt(lowerEstimateA/(lowerEstimateA+upperEstimate)));
439                                                    size = heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(upperBorderA, upperEstimateA/(double)(upperEstimateA+lowerEstimate)) - getFMeasure(lowerBorderA, lowerEstimateA/(double)(lowerEstimateA+upperEstimate)) : Heuristics.getAScore(upperBorderA, upperEstimateA/(double)(upperEstimateA+lowerEstimate), coverageFactor) - Heuristics.getAScore(lowerBorderA, lowerEstimateA/(double)(lowerEstimateA+upperEstimate),coverageFactor);                                   
440                                            } else {
441    //                                              size = 1/(coverageFactor+1) * (coverageFactor * coverage + Math.sqrt(instancesCovered/(instancesCovered+lowerEstimate)) + Math.sqrt(instancesCovered/(instancesCovered+upperEstimate)));
442                                                    size = heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, instancesCovered/(double)(instancesCovered+lowerEstimate)) - getFMeasure(recall, instancesCovered/(double)(instancesCovered+upperEstimate)) : Heuristics.getAScore(recall, instancesCovered/(double)(instancesCovered+lowerEstimate),coverageFactor) - Heuristics.getAScore(recall, instancesCovered/(double)(instancesCovered+upperEstimate),coverageFactor);
443                                            }
444                                            
445                                            if(size < 0.1) {
446    //                                              System.out.println(instancesDescription + " of " + testsPerformed);
447    //                                              System.out.println("interval from " + lowerEstimate + " to " + upperEstimate);
448    //                                              System.out.println("size: " + size);
449                                                    
450    //                                              estimatedB = true;
451                                                    // calculate total number of instances
452                                                    instancesDescription = (int) (instancesDescription/(double)testsPerformed * superClassInstances.size());
453                                                    break;
454                                            }
455                                    }
456                            }
457                            
458                            // since we measured/estimated accuracy only on instances outside A (superClassInstances
459                            // does not include instances of A), we need to add it in the denominator
460                            double precision = instancesCovered/(double)(instancesDescription+instancesCovered);
461                            if(instancesCovered + instancesDescription == 0) {
462                                    precision = 0;
463                            }
464                            
465                            return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : Heuristics.getAScore(recall, precision, coverageFactor);
466                                                    
467                    } else if(heuristic.equals(HeuristicType.FMEASURE)) {
468                            int maxNotCovered = (int) Math.ceil(noise*classInstances.size());
469                            
470                            int notCoveredPos = 0;
471    //                      int notCoveredNeg = 0;
472                            
473                            int posClassifiedAsPos = 0;
474                            int negClassifiedAsNeg = 0;
475                            
476                            int nrOfPosChecks = 0;
477                            int nrOfNegChecks = 0;
478                            
479                            // special case: we test positive and negative examples in turn
480                            Iterator<Individual> itPos = classInstances.iterator();
481                            Iterator<Individual> itNeg = superClassInstances.iterator();
482                            
483                            do {
484                                    // in each loop we pick 0 or 1 positives and 0 or 1 negative
485                                    // and classify it
486                                    
487                                    if(itPos.hasNext()) {
488                                            Individual posExample = itPos.next();
489    //                                      System.out.println(posExample);
490                                            
491                                            if(reasoner.hasType(description, posExample)) {
492                                                    posClassifiedAsPos++;
493                                            } else {
494                                                    notCoveredPos++;
495                                            }
496                                            nrOfPosChecks++;
497                                            
498                                            // take noise into account
499                                            if(notCoveredPos > maxNotCovered) {
500                                                    return -1;
501                                            }
502                                    }
503                                    
504                                    if(itNeg.hasNext()) {
505                                            Individual negExample = itNeg.next();
506                                            if(!reasoner.hasType(description, negExample)) {
507                                                    negClassifiedAsNeg++;
508                                            }
509                                            nrOfNegChecks++;
510                                    }
511                            
512                                    // compute how accurate our current approximation is and return if it is sufficiently accurate
513                                    double approx[] = Heuristics.getPredAccApproximation(classInstances.size(), superClassInstances.size(), 1, nrOfPosChecks, posClassifiedAsPos, nrOfNegChecks, negClassifiedAsNeg);
514                                    if(approx[1]<approxDelta) {
515    //                                      System.out.println(approx[0]);
516                                            return approx[0];
517                                    }
518                                    
519                            } while(itPos.hasNext() || itNeg.hasNext());
520                            
521                            double ret = Heuristics.getPredictiveAccuracy(classInstances.size(), superClassInstances.size(), posClassifiedAsPos, negClassifiedAsNeg, 1);
522                            return ret;                     
523                    } else {
524                            throw new Error("Approximation for " + heuristic + " not implemented.");
525                    }
526                    
527            }
528            
529    
530            // exact computation for 5 heuristics; each one adapted to super class learning;
531            // each one takes the noise parameter into account
532            public double getAccuracyOrTooWeakExact(Description description, double noise) {
533    
534                    nanoStartTime = System.nanoTime();
535                    
536                    if(heuristic.equals(HeuristicType.JACCARD)) {
537                            
538                            // computing R(A)
539                            TreeSet<Individual> coveredInstancesSet = new TreeSet<Individual>();
540                            for(Individual ind : classInstances) {
541                                    if(reasoner.hasType(description, ind)) {
542                                            coveredInstancesSet.add(ind);
543                                    }
544                                    if(terminationTimeExpired()){
545                                            return 0;
546                                    }
547                            }                               
548                            
549                            // if even the optimal case (no additional instances covered) is not sufficient, 
550                            // the concept is too weak
551                            if(coveredInstancesSet.size() / (double) classInstances.size() <= 1 - noise) {
552                                    return -1;
553                            }
554                            
555                            // computing R(C) restricted to relevant instances
556                            TreeSet<Individual> additionalInstancesSet = new TreeSet<Individual>();
557                            for(Individual ind : superClassInstances) {
558                                    if(reasoner.hasType(description, ind)) {
559                                            additionalInstancesSet.add(ind);
560                                    }
561                                    if(terminationTimeExpired()){
562                                            return 0;
563                                    }
564                            }
565                                            
566                            Set<Individual> union = Helper.union(classInstancesSet, additionalInstancesSet);
567                            return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size());
568                            
569                    } else if (heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE) || heuristic.equals(HeuristicType.PRED_ACC)) {
570                            
571                            // computing R(C) restricted to relevant instances
572                            int additionalInstances = 0;
573                            for(Individual ind : superClassInstances) {
574                                    if(reasoner.hasType(description, ind)) {
575                                            additionalInstances++;
576                                    }
577                                    if(terminationTimeExpired()){
578                                            return 0;
579                                    }
580                            }
581                            
582                            // computing R(A)
583                            int coveredInstances = 0;
584                            for(Individual ind : classInstances) {
585                                    if(reasoner.hasType(description, ind)) {
586                                            coveredInstances++;
587                                    }
588                                    if(terminationTimeExpired()){
589                                            return 0;
590                                    }
591                            }
592                            
593                            double recall = coveredInstances/(double)classInstances.size();
594                            
595                            // noise computation is incorrect
596    //                      if(recall < 1 - noise) {
597    //                              return -1;
598    //                      }
599                            
600                            double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances);
601    
602                            if(heuristic.equals(HeuristicType.AMEASURE)) {
603                                    // best reachable concept has same recall and precision 1:
604                                    // 1/t+1 * (t*r + 1)
605                                    if((coverageFactor*recall+1)/(double)(coverageFactor+1) <(1-noise)) {
606                                            return -1;
607                                    } else {
608                                            return Heuristics.getAScore(recall, precision, coverageFactor);
609                                    }
610                            } else if(heuristic.equals(HeuristicType.FMEASURE)) {
611                                    // best reachable concept has same recall and precision 1:
612                                    if(((1+Math.sqrt(coverageFactor))*recall)/(Math.sqrt(coverageFactor)+1)<1-noise) {
613                                            return -1;
614                                    } else {
615                                            return getFMeasure(recall, precision);
616                                    }
617                            } else if(heuristic.equals(HeuristicType.PRED_ACC)) {
618                                    if((coverageFactor * coveredInstances + superClassInstances.size()) / (double) (coverageFactor * classInstances.size() + superClassInstances.size()) < 1 -noise) {
619                                            return -1;
620                                    } else {
621                                            // correctly classified divided by all examples
622                                            return (coverageFactor * coveredInstances + superClassInstances.size() - additionalInstances) / (double) (coverageFactor * classInstances.size() + superClassInstances.size());                                 
623                                    }
624                            }
625    
626    //                      return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : getAccuracy(recall, precision);                      
627                    } else if (heuristic.equals(HeuristicType.GEN_FMEASURE)) {
628                            
629                            // implementation is based on:
630                            // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf
631                            // default negation should be turned off when using fast instance checker
632                            
633                            // compute I_C (negated and non-negated concepts separately)
634                            TreeSet<Individual> icPos = new TreeSet<Individual>();
635                            TreeSet<Individual> icNeg = new TreeSet<Individual>();
636                            Description descriptionNeg = new Negation(description);
637                            // loop through all relevant instances
638                            for(Individual ind : classAndSuperClassInstances) {
639                                    if(reasoner.hasType(description, ind)) {
640                                            icPos.add(ind);
641                                    } else if(reasoner.hasType(descriptionNeg, ind)) {
642                                            icNeg.add(ind);
643                                    }
644                                    if(terminationTimeExpired()){
645                                            return 0;
646                                    }
647                            }
648                            
649                            // semantic precision
650                            // first compute I_C \cap Cn(DC)
651                            // it seems that in our setting, we can ignore Cn, because the examples (class instances)
652                            // are already part of the background knowledge
653                            Set<Individual> tmp1Pos = Helper.intersection(icPos, classInstancesSet);
654                            Set<Individual> tmp1Neg = Helper.intersection(icNeg, negatedClassInstances);
655                            int tmp1Size = tmp1Pos.size() + tmp1Neg.size();
656                            
657                            // Cn(I_C) \cap D_C is the same set if we ignore Cn ...
658                            
659                            int icSize = icPos.size() + icNeg.size();
660                            double prec = (icSize == 0) ? 0 : tmp1Size / (double) icSize;
661                            double rec = tmp1Size / (double) (classInstances.size() + negatedClassInstances.size());
662                            
663    //                      System.out.println(description);
664                            
665    //                      System.out.println("I_C pos: " + icPos);
666    //                      System.out.println("I_C neg: " + icNeg);
667    //                      System.out.println("class instances: " + classInstances);
668    //                      System.out.println("negated class instances: " + negatedClassInstances);
669                            
670    //                      System.out.println(prec);
671    //                      System.out.println(rec);
672    //                      System.out.println(coverageFactor);
673                            
674                            // too weak: see F-measure above
675                            // => does not work for generalised F-measure, because even very general 
676                            // concepts do not have a recall of 1
677    //                      if(((1+Math.sqrt(coverageFactor))*rec)/(Math.sqrt(coverageFactor)+1)<1-noise) {
678    //                              return -1;
679    //                      }
680                            // we only return too weak if there is no recall
681                            if(rec <= 0.0000001) {
682                                    return -1;
683                            }
684                            
685                            return getFMeasure(rec,prec);
686                    }
687                    
688                    throw new Error("ClassLearningProblem error: not implemented");
689            }
690            
691            private boolean terminationTimeExpired(){
692                    boolean val = ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSeconds*1000000000l));
693                    if(val) {
694                            logger.warn("Description test aborted, because it took longer than " + maxExecutionTimeInSeconds + " seconds.");
695                    }
696                    return val;
697            }
698            
699    //      @Deprecated
700    //      public double getAccuracyOrTooWeakStandard(Description description, double minAccuracy) {
701    //              // since we have to perform a retrieval operation anyway, we cannot easily
702    //              // get a benefit from the accuracy limit
703    //              double accuracy = getAccuracy(description);
704    //              if(accuracy >= minAccuracy) {
705    //                      return accuracy;
706    //              } else {
707    //                      return -1;
708    //              }
709    //      }
710            
711            // please note that getting recall and precision wastes some computational
712            // resource, because both methods need to compute the covered instances
713            public double getRecall(Description description) {
714                    int coveredInstances = 0;
715                    for(Individual ind : classInstances) {
716                            if(reasoner.hasType(description, ind)) {
717                                    coveredInstances++;
718                            }
719                    }               
720                    return coveredInstances/(double)classInstances.size();
721            }
722            
723            public double getPrecision(Description description) {
724    
725                    int additionalInstances = 0;
726                    for(Individual ind : superClassInstances) {
727                            if(reasoner.hasType(description, ind)) {
728                                    additionalInstances++;
729                            }
730                    }
731                    
732                    int coveredInstances = 0;
733                    for(Individual ind : classInstances) {
734                            if(reasoner.hasType(description, ind)) {
735                                    coveredInstances++;
736                            }
737                    }
738    
739                    return (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances);
740            }
741            
742            public double getPredictiveAccuracy() {
743                    return 0;
744            }
745            
746            // see http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf
747            // for all methods below (currently dummies)
748            public double getMatchRate() {
749                    return 0;
750            }
751            
752            public double getOmissionError() {
753                    return 0;
754            }
755            
756            public double getInductionRate() {
757                    return 0;
758            }
759            
760            public double getComissionError() {
761                    return 0;
762            }
763            
764            public double getGeneralisedRecall() {
765                    return 0;
766            }       
767            
768            public double getGeneralisedPrecision() {
769                    return 0;
770            }               
771            
772            @SuppressWarnings("unused")
773            private double getInverseJaccardDistance(TreeSet<Individual> set1, TreeSet<Individual> set2) {
774                    Set<Individual> intersection = Helper.intersection(set1, set2);
775                    Set<Individual> union = Helper.union(set1, set2);
776                    return 1 - (union.size() - intersection.size()) / (double) union.size();
777            }
778            
779            // computes accuracy from coverage and protusion (changing this function may
780            // make it necessary to change the appoximation too) => not the case anymore
781    //      private double getAccuracy(double recall, double precision) {
782    //              return (coverageFactor * coverage + Math.sqrt(protusion)) / (coverageFactor + 1);
783                    // log: changed from precision^^0.5 (root) to precision^^0.8 as the root is too optimistic in some cases
784    //              return (coverageFactor * recall + Math.pow(precision, 0.8)) / (coverageFactor + 1);
785    //      }
786            
787            private double getFMeasure(double recall, double precision) {
788                    // balanced F measure
789    //              return (precision + recall == 0) ? 0 : 2 * precision * recall / (precision + recall);
790                    // see e.g. http://en.wikipedia.org/wiki/F-measure
791                    return (precision + recall == 0) ? 0 :
792                      ( (1+Math.sqrt(coverageFactor)) * (precision * recall)
793                                    / (Math.sqrt(coverageFactor) * precision + recall) ); 
794            }
795            
796            // see paper: expression used in confidence interval estimation
797            public static double p3(double p1, int total) {
798                    return 1.96 * Math.sqrt(p1*(1-p1)/(total+4));
799            }               
800            
801            // see paper: expression used in confidence interval estimation
802    //      private static double p2(int success, int total) {
803    //              double p1 = p1(success, total);
804    //              return 1.96 * Math.sqrt(p1*(1-p1)/(total+4));
805    //      }       
806            
807            // see paper: p'
808            public static double p1(int success, int total) {
809                    return (success+2)/(double)(total+4);
810            }
811            
812            /**
813             * @return the classToDescribe
814             */
815            public NamedClass getClassToDescribe() {
816                    return classToDescribe;
817            }
818    
819            /* (non-Javadoc)
820             * @see org.dllearner.core.LearningProblem#evaluate(org.dllearner.core.owl.Description)
821             */
822            @Override
823            public EvaluatedDescriptionClass evaluate(Description description) {
824                    ClassScore score = computeScore(description);
825                    return new EvaluatedDescriptionClass(description, score);
826            }
827    
828            /**
829             * @return the isConsistent
830             */
831            public boolean isConsistent(Description description) {
832                    Axiom axiom;
833                    if(equivalence) {
834                            axiom = new EquivalentClassesAxiom(classToDescribe, description);
835                    } else {
836                            axiom = new SubClassAxiom(classToDescribe, description);
837                    }
838                    return reasoner.remainsSatisfiable(axiom);
839            }
840            
841            public boolean followsFromKB(Description description) {
842                    return equivalence ? reasoner.isEquivalentClass(description, classToDescribe) : reasoner.isSuperClassOf(description, classToDescribe);
843            }
844    }