001    /**
002     * Copyright (C) 2007-2011, Jens Lehmann
003     *
004     * This file is part of DL-Learner.
005     *
006     * DL-Learner is free software; you can redistribute it and/or modify
007     * it under the terms of the GNU General Public License as published by
008     * the Free Software Foundation; either version 3 of the License, or
009     * (at your option) any later version.
010     *
011     * DL-Learner is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014     * GNU General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018     */
019    
020    package org.dllearner.learningproblems;
021    
022    import java.util.Collection;
023    import java.util.Iterator;
024    import java.util.LinkedList;
025    import java.util.Set;
026    import java.util.SortedSet;
027    import java.util.TreeSet;
028    
029    import org.dllearner.core.EvaluatedDescription;
030    import org.dllearner.core.AbstractReasonerComponent;
031    import org.dllearner.core.configurators.PosNegLPStandardConfigurator;
032    import org.dllearner.core.options.BooleanConfigOption;
033    import org.dllearner.core.options.ConfigOption;
034    import org.dllearner.core.options.DoubleConfigOption;
035    import org.dllearner.core.options.StringConfigOption;
036    import org.dllearner.core.owl.Description;
037    import org.dllearner.core.owl.Individual;
038    import org.dllearner.learningproblems.Heuristics.HeuristicType;
039    import org.dllearner.utilities.Helper;
040    
041    /**
042     * The aim of this learning problem is to learn a concept definition such that
043     * the positive examples and the negative examples do not follow. It is
044     * 2-valued, because we only distinguish between covered and non-covered
045     * examples. (A 3-valued problem distinguishes between covered examples,
046     * examples covered by the negation of the concept, and all other examples.) The
047     * 2-valued learning problem is often more useful for Description Logics due to
048     * (the Open World Assumption and) the fact that negative knowledge, e.g. that a
049     * person does not have a child, is or cannot be expressed.
050     * 
051     * @author Jens Lehmann
052     * 
053     */
054    public class PosNegLPStandard extends PosNegLP {
055            
056            private PosNegLPStandardConfigurator configurator;
057            
058            // approximation and F-measure
059            // (taken from class learning => super class instances corresponds to negative examples
060            // and class instances to positive examples)
061            private double approxDelta = 0.05;
062            private boolean useApproximations;
063    //      private boolean useFMeasure;    
064            private boolean useOldDIGOptions = false;
065            
066            private HeuristicType heuristic = HeuristicType.PRED_ACC;
067            
068            @Override
069            public PosNegLPStandardConfigurator getConfigurator() {
070                    return configurator;
071            }
072    
073            public PosNegLPStandard(AbstractReasonerComponent reasoningService) {
074                    super(reasoningService);
075                    this.configurator = new PosNegLPStandardConfigurator(this);
076            }
077    
078            public PosNegLPStandard(AbstractReasonerComponent reasoningService, SortedSet<Individual> positiveExamples, SortedSet<Individual> negativeExamples) {
079                    super(reasoningService);
080                    this.positiveExamples = positiveExamples;
081                    this.negativeExamples = negativeExamples;
082                    this.configurator = new PosNegLPStandardConfigurator(this);
083            }
084            
085            @Override
086            public void init() {
087                    super.init();
088                    useApproximations = configurator.getUseApproximations();
089                    approxDelta = configurator.getApproxAccuracy();
090                    
091                    String accM = configurator.getAccuracyMethod();
092                    if(accM.equals("standard")) {
093                            heuristic = HeuristicType.AMEASURE;
094                    } else if(accM.equals("fmeasure")) {
095                            heuristic = HeuristicType.FMEASURE;
096                    } else if(accM.equals("generalised_fmeasure")) {
097                            heuristic = HeuristicType.GEN_FMEASURE;
098                    } else if(accM.equals("jaccard")) {
099                            heuristic = HeuristicType.JACCARD;
100                    } else if(accM.equals("pred_acc")) {
101                            heuristic = HeuristicType.PRED_ACC;
102                    }
103                    
104    //              useFMeasure = configurator.getAccuracyMethod().equals("fmeasure");
105                    
106    //              if((!useApproximations && useFMeasure) || (useApproximations && !useFMeasure)) {
107    //                      System.err.println("Currently F measure can only be used in combination with approximated reasoning.");
108    //                      System.exit(0);
109    //              }
110                    
111                    if(useApproximations && heuristic.equals(HeuristicType.PRED_ACC)) {
112                            System.err.println("Approximating predictive accuracy is an experimental feature. USE IT AT YOUR OWN RISK. If you consider to use it for anything serious, please extend the unit tests at org.dllearner.test.junit.HeuristicTests first and verify that it works.");
113                    }
114                    
115            }
116            
117            /*
118             * (non-Javadoc)
119             * 
120             * @see org.dllearner.core.Component#getName()
121             */
122            public static String getName() {
123                    return "pos neg learning problem";
124            }
125            
126            public static Collection<ConfigOption<?>> createConfigOptions() {
127                    Collection<ConfigOption<?>> options = new LinkedList<ConfigOption<?>>(PosNegLP.createConfigOptions());
128                    BooleanConfigOption approx = new BooleanConfigOption("useApproximations", "whether to use stochastic approximations for computing accuracy", false);
129                    options.add(approx);
130                    DoubleConfigOption approxAccuracy = new DoubleConfigOption("approxAccuracy", "accuracy of the approximation (only for expert use)", 0.05);
131                    options.add(approxAccuracy);
132                    StringConfigOption accMethod = new StringConfigOption("accuracyMethod", "Specifies, which method/function to use for computing accuracy.","predacc"); //  or domain/range of a property.
133                    accMethod.setAllowedValues(new String[] {"fmeasure", "predacc"});
134                    options.add(accMethod);         
135                    return options;
136            }
137            
138            /**
139             * This method computes (using the reasoner) whether a concept is too weak.
140             * If it is not weak, it returns the number of covered negative examples. It
141             * can use retrieval or instance checks for classification.
142             * 
143             * @see org.dllearner.learningproblems.PosNegLP.UseMultiInstanceChecks
144             * TODO: Performance could be slightly improved by counting the number of
145             *       covers instead of using sets and counting their size.
146             * @param concept
147             *            The concept to test.
148             * @return -1 if concept is too weak and the number of covered negative
149             *         examples otherwise.
150             */
151            @Override
152            public int coveredNegativeExamplesOrTooWeak(Description concept) {
153                    
154                    if (useRetrievalForClassification) {
155                            SortedSet<Individual> posClassified = reasoner.getIndividuals(concept);
156                            SortedSet<Individual> negAsPos = Helper.intersection(negativeExamples, posClassified);
157                            SortedSet<Individual> posAsNeg = new TreeSet<Individual>();
158    
159                            // the set is constructed piecewise to avoid expensive set
160                            // operations
161                            // on a large number of individuals
162                            for (Individual posExample : positiveExamples) {
163                                    if (!posClassified.contains(posExample))
164                                            posAsNeg.add(posExample);
165                            }
166    
167                            // too weak
168                            if (posAsNeg.size() > 0)
169                                    return -1;
170                            // number of covered negatives
171                            else
172                                    return negAsPos.size();
173                    } else {
174                            if (useMultiInstanceChecks != UseMultiInstanceChecks.NEVER) {
175                                    // two checks
176                                    if (useMultiInstanceChecks == UseMultiInstanceChecks.TWOCHECKS) {
177                                            Set<Individual> s = reasoner.hasType(concept, positiveExamples);
178                                            // if the concept is too weak, then do not query negative
179                                            // examples
180                                            if (s.size() != positiveExamples.size())
181                                                    return -1;
182                                            else {
183                                                    s = reasoner.hasType(concept, negativeExamples);
184                                                    return s.size();
185                                            }
186                                            // one check
187                                    } else {
188                                            Set<Individual> s = reasoner.hasType(concept, allExamples);
189                                            // test whether all positive examples are covered
190                                            if (s.containsAll(positiveExamples))
191                                                    return s.size() - positiveExamples.size();
192                                            else
193                                                    return -1;
194                                    }
195                            } else {
196                                    // SortedSet<Individual> posAsNeg = new TreeSet<Individual>();
197                                    SortedSet<Individual> negAsPos = new TreeSet<Individual>();
198    
199                                    for (Individual example : positiveExamples) {
200                                            if (!reasoner.hasType(concept, example))
201                                                    return -1;
202                                            // posAsNeg.add(example);
203                                    }
204                                    for (Individual example : negativeExamples) {
205                                            if (reasoner.hasType(concept, example))
206                                                    negAsPos.add(example);
207                                    }
208    
209                                    return negAsPos.size();
210                            }
211                    }
212            }
213    
214            /**
215             * Computes score of a given concept using the reasoner. Either retrieval or
216             * instance check are used. For the latter, this method treats
217             * <code>UseMultiInstanceChecks.TWO_CHECKS</code> as if it were 
218             * <code>UseMultiInstanceChecks.ONE_CHECKS</code> (it does not make much sense
219             * to implement TWO_CHECKS in this function, because we have to test all
220             * examples to create a score object anyway).
221             * 
222             * NOTE: The options above are no longer supported, because of interface changes (the options
223             * are more or less only relevant in combination with DIG).
224             * 
225             * @see org.dllearner.learningproblems.PosNegLP.UseMultiInstanceChecks
226             * @param concept
227             *            The concept to test.
228             * @return Corresponding Score object.
229             */
230            @Override
231            public ScorePosNeg computeScore(Description concept) {
232                    if(useOldDIGOptions) {
233                            if (useRetrievalForClassification) {
234                                    SortedSet<Individual> posClassified = reasoner.getIndividuals(concept);
235                                    SortedSet<Individual> posAsPos = Helper.intersection(positiveExamples, posClassified);
236                                    SortedSet<Individual> negAsPos = Helper.intersection(negativeExamples, posClassified);
237                                    SortedSet<Individual> posAsNeg = new TreeSet<Individual>();
238    
239                                    // piecewise set construction
240                                    for (Individual posExample : positiveExamples) {
241                                            if (!posClassified.contains(posExample))
242                                                    posAsNeg.add(posExample);
243                                    }
244                                    SortedSet<Individual> negAsNeg = new TreeSet<Individual>();
245                                    for (Individual negExample : negativeExamples) {
246                                            if (!posClassified.contains(negExample))
247                                                    negAsNeg.add(negExample);
248                                    }
249                                    return new ScoreTwoValued(concept.getLength(), percentPerLengthUnit, posAsPos, posAsNeg, negAsPos, negAsNeg);
250                            // instance checks for classification
251                            } else {                
252                                    SortedSet<Individual> posAsPos = new TreeSet<Individual>();
253                                    SortedSet<Individual> posAsNeg = new TreeSet<Individual>();
254                                    SortedSet<Individual> negAsPos = new TreeSet<Individual>();
255                                    SortedSet<Individual> negAsNeg = new TreeSet<Individual>();
256                                    
257                                    if (useMultiInstanceChecks != UseMultiInstanceChecks.NEVER) {
258                                            SortedSet<Individual> posClassified = reasoner.hasType(concept,
259                                                            allExamples);
260                                            SortedSet<Individual> negClassified = Helper.difference(allExamples,
261                                                            posClassified);
262                                            posAsPos = Helper.intersection(positiveExamples, posClassified);
263                                            posAsNeg = Helper.intersection(positiveExamples, negClassified);
264                                            negAsPos = Helper.intersection(negativeExamples, posClassified);
265                                            negAsNeg = Helper.intersection(negativeExamples, negClassified);
266                                            
267                                            // System.out.println("pos classified: " + posClassified);
268                                            
269                                            return new ScoreTwoValued(concept.getLength(), percentPerLengthUnit, posAsPos, posAsNeg, negAsPos,
270                                                            negAsNeg);
271                                    } else {
272                                            
273                                            for (Individual example : positiveExamples) {
274                                                    if (reasoner.hasType(concept, example)) {
275                                                            posAsPos.add(example);
276                                                    } else {
277                                                            posAsNeg.add(example);
278                                                    }
279                                            }
280                                            for (Individual example : negativeExamples) {
281                                                    if (reasoner.hasType(concept, example))
282                                                            negAsPos.add(example);
283                                                    else
284                                                            negAsNeg.add(example);
285                                            }
286                                            return new ScoreTwoValued(concept.getLength(), percentPerLengthUnit, posAsPos, posAsNeg, negAsPos,
287                                                            negAsNeg);
288                                    }
289                            }
290                    } else {
291                            
292                            SortedSet<Individual> posAsPos = new TreeSet<Individual>();
293                            SortedSet<Individual> posAsNeg = new TreeSet<Individual>();
294                            SortedSet<Individual> negAsPos = new TreeSet<Individual>();
295                            SortedSet<Individual> negAsNeg = new TreeSet<Individual>();
296                            
297                            for (Individual example : positiveExamples) {
298                                    if (reasoner.hasType(concept, example)) {
299                                            posAsPos.add(example);
300                                    } else {
301                                            posAsNeg.add(example);
302                                    }
303                            }
304                            for (Individual example : negativeExamples) {
305                                    if (reasoner.hasType(concept, example))
306                                            negAsPos.add(example);
307                                    else
308                                            negAsNeg.add(example);
309                            }
310                            
311                            // TODO: this computes accuracy twice - more elegant method should be implemented 
312                            double accuracy = getAccuracyOrTooWeakExact(concept,1);
313                            
314                            return new ScoreTwoValued(concept.getLength(), percentPerLengthUnit, posAsPos, posAsNeg, negAsPos,
315                                                    negAsNeg, accuracy);
316                    }
317    
318            }
319    
320            /* (non-Javadoc)
321             * @see org.dllearner.core.LearningProblem#getAccuracy(org.dllearner.core.owl.Description)
322             */
323            @Override
324            public double getAccuracy(Description description) {
325                    // a noise value of 1.0 means that we never return too weak (-1.0) 
326                    return getAccuracyOrTooWeak(description, 1.0);          
327                    /*                              
328                    int coveredPos = 0;
329                    int coveredNeg = 0;
330                    
331                    for (Individual example : positiveExamples) {
332                            if (reasoner.hasType(description, example)) {
333                                    coveredPos++;
334                            } 
335                    }
336                    for (Individual example : negativeExamples) {
337                            if (reasoner.hasType(description, example)) {
338                                    coveredNeg++;
339                            }
340                    }
341                    
342                    return coveredPos + negativeExamples.size() - coveredNeg / (double) allExamples.size();
343                    */
344            }
345    
346            @Override
347            public double getAccuracyOrTooWeak(Description description, double noise) {     
348                    // delegates to the appropriate methods
349                    return useApproximations ? getAccuracyOrTooWeakApprox(description, noise) : getAccuracyOrTooWeakExact(description, noise);                              
350            }       
351            
352            public double getAccuracyOrTooWeakApprox(Description description, double noise) {
353                    if(heuristic.equals(HeuristicType.PRED_ACC)) {
354                            int maxNotCovered = (int) Math.ceil(noise*positiveExamples.size());
355                            
356                            int notCoveredPos = 0;
357    //                      int notCoveredNeg = 0;
358                            
359                            int posClassifiedAsPos = 0;
360                            int negClassifiedAsNeg = 0;
361                            
362                            int nrOfPosChecks = 0;
363                            int nrOfNegChecks = 0;
364                            
365                            // special case: we test positive and negative examples in turn
366                            Iterator<Individual> itPos = positiveExamples.iterator();
367                            Iterator<Individual> itNeg = negativeExamples.iterator();
368                            
369                            do {
370                                    // in each loop we pick 0 or 1 positives and 0 or 1 negative
371                                    // and classify it
372                                    
373                                    if(itPos.hasNext()) {
374                                            Individual posExample = itPos.next();
375    //                                      System.out.println(posExample);
376                                            
377                                            if(reasoner.hasType(description, posExample)) {
378                                                    posClassifiedAsPos++;
379                                            } else {
380                                                    notCoveredPos++;
381                                            }
382                                            nrOfPosChecks++;
383                                            
384                                            // take noise into account
385                                            if(notCoveredPos > maxNotCovered) {
386                                                    return -1;
387                                            }
388                                    }
389                                    
390                                    if(itNeg.hasNext()) {
391                                            Individual negExample = itNeg.next();
392                                            if(!reasoner.hasType(description, negExample)) {
393                                                    negClassifiedAsNeg++;
394                                            }
395                                            nrOfNegChecks++;
396                                    }
397                            
398                                    // compute how accurate our current approximation is and return if it is sufficiently accurate
399                                    double approx[] = Heuristics.getPredAccApproximation(positiveExamples.size(), negativeExamples.size(), 1, nrOfPosChecks, posClassifiedAsPos, nrOfNegChecks, negClassifiedAsNeg);
400                                    if(approx[1]<approxDelta) {
401    //                                      System.out.println(approx[0]);
402                                            return approx[0];
403                                    }
404                                    
405                            } while(itPos.hasNext() || itNeg.hasNext());
406                            
407                            double ret = Heuristics.getPredictiveAccuracy(positiveExamples.size(), negativeExamples.size(), posClassifiedAsPos, negClassifiedAsNeg, 1);
408                            return ret;
409                                            
410                    } else if(heuristic.equals(HeuristicType.FMEASURE)) {
411    //                      System.out.println("Testing " + description);
412                            
413                            // we abort when there are too many uncovered positives
414                            int maxNotCovered = (int) Math.ceil(noise*positiveExamples.size());
415                            int instancesCovered = 0;
416                            int instancesNotCovered = 0;
417                            
418                            for(Individual ind : positiveExamples) {
419                                    if(reasoner.hasType(description, ind)) {
420                                            instancesCovered++;
421                                    } else {
422                                            instancesNotCovered ++;
423                                            if(instancesNotCovered > maxNotCovered) {
424                                                    return -1;
425                                            }
426                                    }
427                            }       
428                            
429                            double recall = instancesCovered/(double)positiveExamples.size();
430                            
431                            int testsPerformed = 0;
432                            int instancesDescription = 0;
433                            
434                            for(Individual ind : negativeExamples) {
435    
436                                    if(reasoner.hasType(description, ind)) {
437                                            instancesDescription++;
438                                    }
439                                    testsPerformed++;
440                                    
441                                    // check whether approximation is sufficiently accurate
442                                    double[] approx = Heuristics.getFScoreApproximation(instancesCovered, recall, 1, negativeExamples.size(), testsPerformed, instancesDescription);
443                                    if(approx[1]<approxDelta) {
444                                            return approx[0];
445                                    }
446                                    
447                            }               
448                            
449                            // standard computation (no approximation)
450                            double precision = instancesCovered/(double)(instancesDescription+instancesCovered);
451    //                      if(instancesCovered + instancesDescription == 0) {
452    //                              precision = 0;
453    //                      }
454                            return Heuristics.getFScore(recall, precision, 1);                      
455                    } else {
456                            throw new Error("Approximation for " + heuristic + " not implemented.");
457                    }
458            }
459            
460            public double getAccuracyOrTooWeakExact(Description description, double noise) {
461                    if(heuristic.equals(HeuristicType.PRED_ACC)) {
462                            return getPredAccuracyOrTooWeakExact(description, noise);
463                    } else if(heuristic.equals(HeuristicType.FMEASURE)) {
464                            return getFMeasureOrTooWeakExact(description, noise);
465                            /*
466                            // computing R(C) restricted to relevant instances
467                            int additionalInstances = 0;
468                            for(Individual ind : negativeExamples) {
469                                    if(reasoner.hasType(description, ind)) {
470                                            additionalInstances++;
471                                    }
472                            }
473                            
474                            // computing R(A)
475                            int coveredInstances = 0;
476                            for(Individual ind : positiveExamples) {
477                                    if(reasoner.hasType(description, ind)) {
478                                            coveredInstances++;
479                                    }
480                            }
481                            
482                            double recall = coveredInstances/(double)positiveExamples.size();
483                            double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances);                    
484                            
485                            return Heuristics.getFScore(recall, precision);
486                            */
487                    } else {
488                            throw new Error("Heuristic " + heuristic + " not implemented.");
489                    }               
490            }
491            
492            /* (non-Javadoc)
493             * @see org.dllearner.core.LearningProblem#getAccuracyOrTooWeak(org.dllearner.core.owl.Description, double)
494             */
495            public double getPredAccuracyOrTooWeakExact(Description description, double noise) {
496                    
497                    int maxNotCovered = (int) Math.ceil(noise*positiveExamples.size());
498                    
499                    int notCoveredPos = 0;
500                    int notCoveredNeg = 0;
501                    
502                    for (Individual example : positiveExamples) {
503                            if (!reasoner.hasType(description, example)) {
504                                    notCoveredPos++;
505                                    if(notCoveredPos >= maxNotCovered) {
506                                            return -1;
507                                    }
508                            } 
509                    }
510                    for (Individual example : negativeExamples) {
511                            if (!reasoner.hasType(description, example)) {
512                                    notCoveredNeg++;
513                            }
514                    }
515                    
516    //              if(useFMeasure) {
517    //                      double recall = (positiveExamples.size() - notCoveredPos) / (double) positiveExamples.size();
518    //                      double precision = (positiveExamples.size() - notCoveredPos) / (double) (allExamples.size() - notCoveredPos - notCoveredNeg);
519    //                      return getFMeasure(recall, precision);
520    //              } else {
521                            return (positiveExamples.size() - notCoveredPos + notCoveredNeg) / (double) allExamples.size();
522    //              }
523            }
524    
525            public double getFMeasureOrTooWeakExact(Description description, double noise) {
526                    int additionalInstances = 0;
527                    for(Individual ind : negativeExamples) {
528                            if(reasoner.hasType(description, ind)) {
529                                    additionalInstances++;
530                            }
531                    }
532                    
533                    int coveredInstances = 0;
534                    for(Individual ind : positiveExamples) {
535                            if(reasoner.hasType(description, ind)) {
536                                    coveredInstances++;
537                            }
538                    }
539                    
540                    double recall = coveredInstances/(double)positiveExamples.size();
541                    
542                    if(recall < 1 - noise) {
543                            return -1;
544                    }
545                    
546                    double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances);
547                    
548    //              return getFMeasure(recall, precision);
549                    return Heuristics.getFScore(recall, precision);         
550            }
551            
552            // instead of using the standard operation, we use optimisation
553            // and approximation here;
554            // now deprecated because the Heuristics helper class is used
555            @Deprecated
556            public double getFMeasureOrTooWeakApprox(Description description, double noise) {
557                    // we abort when there are too many uncovered positives
558                    int maxNotCovered = (int) Math.ceil(noise*positiveExamples.size());
559                    int instancesCovered = 0;
560                    int instancesNotCovered = 0;
561                    int total = 0;
562                    boolean estimatedA = false;
563                    
564                    double lowerBorderA = 0;
565                    int lowerEstimateA = 0;
566                    double upperBorderA = 1;
567                    int upperEstimateA = positiveExamples.size();
568                    
569                    for(Individual ind : positiveExamples) {
570                            if(reasoner.hasType(description, ind)) {
571                                    instancesCovered++;
572                            } else {
573                                    instancesNotCovered ++;
574                                    if(instancesNotCovered > maxNotCovered) {
575                                            return -1;
576                                    }
577                            }
578                            
579                            // approximation step (starting after 10 tests)
580                            total = instancesCovered + instancesNotCovered;
581                            if(total > 10) {
582                                    // compute confidence interval
583                                    double p1 = ClassLearningProblem.p1(instancesCovered, total);
584                                    double p2 = ClassLearningProblem.p3(p1, total);
585                                    lowerBorderA = Math.max(0, p1 - p2);
586                                    upperBorderA = Math.min(1, p1 + p2);
587                                    double size = upperBorderA - lowerBorderA;
588                                    // if the interval has a size smaller than 10%, we can be confident
589                                    if(size < 2 * approxDelta) {
590                                            // we have to distinguish the cases that the accuracy limit is
591                                            // below, within, or above the limit and that the mean is below
592                                            // or above the limit
593                                            double mean = instancesCovered/(double)total;
594                                            
595                                            // if the mean is greater than the required minimum, we can accept;
596                                            // we also accept if the interval is small and close to the minimum
597                                            // (worst case is to accept a few inaccurate descriptions)
598                                            if(mean > 1-noise || (upperBorderA > mean && size < 0.03)) {
599                                                    instancesCovered = (int) (instancesCovered/(double)total * positiveExamples.size());
600                                                    upperEstimateA = (int) (upperBorderA * positiveExamples.size());
601                                                    lowerEstimateA = (int) (lowerBorderA * positiveExamples.size());
602                                                    estimatedA = true;
603                                                    break;
604                                            }
605                                            
606                                            // reject only if the upper border is far away (we are very
607                                            // certain not to lose a potential solution)
608                                            if(upperBorderA + 0.1 < 1-noise) {
609                                                    return -1;
610                                            }
611                                    }                               
612                            }
613                    }       
614                    
615                    double recall = instancesCovered/(double)positiveExamples.size();
616                    
617    //              MonitorFactory.add("estimatedA","count", estimatedA ? 1 : 0);
618    //              MonitorFactory.add("aInstances","count", total);
619                    
620                    // we know that a definition candidate is always subclass of the
621                    // intersection of all super classes, so we test only the relevant instances
622                    // (leads to undesired effects for descriptions not following this rule,
623                    // but improves performance a lot);
624                    // for learning a superclass of a defined class, similar observations apply;
625    
626    
627                    int testsPerformed = 0;
628                    int instancesDescription = 0;
629    //              boolean estimatedB = false;
630                    
631                    for(Individual ind : negativeExamples) {
632    
633                            if(reasoner.hasType(description, ind)) {
634                                    instancesDescription++;
635                            }
636                            
637                            testsPerformed++;
638                            
639                            if(testsPerformed > 10) {
640                                    
641                                    // compute confidence interval
642                                    double p1 = ClassLearningProblem.p1(instancesDescription, testsPerformed);
643                                    double p2 = ClassLearningProblem.p3(p1, testsPerformed);
644                                    double lowerBorder = Math.max(0, p1 - p2);
645                                    double upperBorder = Math.min(1, p1 + p2);
646                                    int lowerEstimate = (int) (lowerBorder * negativeExamples.size());
647                                    int upperEstimate = (int) (upperBorder * negativeExamples.size());
648                                    
649                                    double size;
650                                    if(estimatedA) {
651                                            size = getFMeasure(upperBorderA, upperEstimateA/(double)(upperEstimateA+lowerEstimate)) - getFMeasure(lowerBorderA, lowerEstimateA/(double)(lowerEstimateA+upperEstimate));                                     
652                                    } else {
653                                            size = getFMeasure(recall, instancesCovered/(double)(instancesCovered+lowerEstimate)) - getFMeasure(recall, instancesCovered/(double)(instancesCovered+upperEstimate));
654                                    }
655                                    
656                                    if(size < 0.1) {
657                                            instancesDescription = (int) (instancesDescription/(double)testsPerformed * negativeExamples.size());
658                                            break;
659                                    }
660                            }
661                    }
662                    
663                    double precision = instancesCovered/(double)(instancesDescription+instancesCovered);
664                    if(instancesCovered + instancesDescription == 0) {
665                            precision = 0;
666                    }       
667    
668    //              System.out.println("description: " + description);
669    //              System.out.println("recall: " + recall);
670    //              System.out.println("precision: " + precision);
671    //              System.out.println("F-measure: " + getFMeasure(recall, precision));
672    //              System.out.println("exact: " + getAccuracyOrTooWeakExact(description, noise));
673                    
674                    return getFMeasure(recall, precision);
675            }
676                    
677            
678            /* (non-Javadoc)
679             * @see org.dllearner.core.LearningProblem#evaluate(org.dllearner.core.owl.Description)
680             */
681            @Override
682            public EvaluatedDescription evaluate(Description description) {
683                    ScorePosNeg score = computeScore(description);
684                    return new EvaluatedDescriptionPosNeg(description, score);
685            }
686    
687            private double getFMeasure(double recall, double precision) {
688                    return 2 * precision * recall / (precision + recall);
689            }       
690            
691    }