001 /**
002 * Copyright (C) 2007-2011, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program. If not, see <http://www.gnu.org/licenses/>.
018 */
019
020 package org.dllearner.learningproblems;
021
022 import java.util.Collection;
023 import java.util.Collections;
024 import java.util.Iterator;
025 import java.util.LinkedList;
026 import java.util.List;
027 import java.util.Random;
028 import java.util.Set;
029 import java.util.TreeSet;
030
031 import org.apache.log4j.Logger;
032 import org.dllearner.core.ComponentInitException;
033 import org.dllearner.core.ComponentManager;
034 import org.dllearner.core.AbstractLearningProblem;
035 import org.dllearner.core.AbstractReasonerComponent;
036 import org.dllearner.core.configurators.ClassLearningProblemConfigurator;
037 import org.dllearner.core.options.BooleanConfigOption;
038 import org.dllearner.core.options.CommonConfigOptions;
039 import org.dllearner.core.options.ConfigOption;
040 import org.dllearner.core.options.DoubleConfigOption;
041 import org.dllearner.core.options.StringConfigOption;
042 import org.dllearner.core.options.URLConfigOption;
043 import org.dllearner.core.owl.Axiom;
044 import org.dllearner.core.owl.Description;
045 import org.dllearner.core.owl.EquivalentClassesAxiom;
046 import org.dllearner.core.owl.Individual;
047 import org.dllearner.core.owl.NamedClass;
048 import org.dllearner.core.owl.Negation;
049 import org.dllearner.core.owl.SubClassAxiom;
050 import org.dllearner.learningproblems.Heuristics.HeuristicType;
051 import org.dllearner.utilities.Helper;
052
053 /**
054 * The problem of learning the description of an existing class
055 * in an OWL ontology.
056 *
057 * @author Jens Lehmann
058 *
059 */
060 public class ClassLearningProblem extends AbstractLearningProblem {
061
062 private static Logger logger = Logger.getLogger(ClassLearningProblem.class);
063 private long nanoStartTime;
064 private static int maxExecutionTimeInSeconds = 10;
065
066 private NamedClass classToDescribe;
067 private List<Individual> classInstances;
068 private TreeSet<Individual> classInstancesSet;
069 private boolean equivalence = true;
070 private ClassLearningProblemConfigurator configurator;
071 // approximation of accuracy
072 private double approxDelta = 0.05;
073
074 private boolean useApproximations;
075
076 // factor for higher weight on recall (needed for subclass learning)
077 private double coverageFactor;
078
079 // instances of super classes excluding instances of the class itself
080 private List<Individual> superClassInstances;
081 // instances of super classes including instances of the class itself
082 private List<Individual> classAndSuperClassInstances;
083 // specific variables for generalised F-measure
084 private TreeSet<Individual> negatedClassInstances;
085
086 private HeuristicType heuristic = HeuristicType.AMEASURE;
087
088 @Override
089 public ClassLearningProblemConfigurator getConfigurator(){
090 return configurator;
091 }
092
093 public ClassLearningProblem(AbstractReasonerComponent reasoner) {
094 super(reasoner);
095 configurator = new ClassLearningProblemConfigurator(this);
096 }
097
098 public static Collection<ConfigOption<?>> createConfigOptions() {
099 Collection<ConfigOption<?>> options = new LinkedList<ConfigOption<?>>();
100 URLConfigOption classToDescribeOption = new URLConfigOption("classToDescribe", "class of which a description should be learned", null, true, false);
101 classToDescribeOption.setRefersToOWLClass(true);
102 options.add(classToDescribeOption);
103 StringConfigOption type = new StringConfigOption("type", "whether to learn an equivalence class or super class axiom","equivalence"); // or domain/range of a property.
104 type.setAllowedValues(new String[] {"equivalence", "superClass"}); // , "domain", "range"});
105 options.add(type);
106 BooleanConfigOption approx = new BooleanConfigOption("useApproximations", "whether to use stochastic approximations for computing accuracy", true);
107 options.add(approx);
108 DoubleConfigOption approxAccuracy = new DoubleConfigOption("approxAccuracy", "accuracy of the approximation (only for expert use)", 0.05);
109 options.add(approxAccuracy);
110 StringConfigOption accMethod = new StringConfigOption("accuracyMethod", "Specifies, which method/function to use for computing accuracy.","standard"); // or domain/range of a property.
111 accMethod.setAllowedValues(new String[] {"standard", "fmeasure", "pred_acc", "generalised_fmeasure", "jaccard"});
112 options.add(accMethod);
113 BooleanConfigOption consistency = new BooleanConfigOption("checkConsistency", "Specify whether to check consistency for solution candidates. This is convenient for user interfaces, but can be performance intensive.", true);
114 options.add(consistency);
115 options.add(CommonConfigOptions.maxExecutionTimeInSeconds(10));
116 DoubleConfigOption betaSC = new DoubleConfigOption("betaSC", "Higher values of beta rate recall higher than precision or in other words, covering the instances of the class to describe is more important even at the cost of covering additional instances. The actual implementation depends on the selected heuristic. This values is used only for super class learning.", 3.0);
117 options.add(betaSC);
118 DoubleConfigOption betaEq = new DoubleConfigOption("betaEq", "Higher values of beta rate recall higher than precision or in other words, covering the instances of the class to describe is more important even at the cost of covering additional instances. The actual implementation depends on the selected heuristic. This values is used only for equivalence class learning.", 1.0);
119 options.add(betaEq);
120 return options;
121 }
122
123 public static String getName() {
124 return "class learning problem";
125 }
126
127 @Override
128 public void init() throws ComponentInitException {
129 classToDescribe = new NamedClass(configurator.getClassToDescribe().toString());
130 useApproximations = configurator.getUseApproximations();
131
132 String accM = configurator.getAccuracyMethod();
133 if(accM.equals("standard")) {
134 heuristic = HeuristicType.AMEASURE;
135 } else if(accM.equals("fmeasure")) {
136 heuristic = HeuristicType.FMEASURE;
137 } else if(accM.equals("generalised_fmeasure")) {
138 heuristic = HeuristicType.GEN_FMEASURE;
139 } else if(accM.equals("jaccard")) {
140 heuristic = HeuristicType.JACCARD;
141 } else if(accM.equals("pred_acc")) {
142 heuristic = HeuristicType.PRED_ACC;
143 }
144
145 if(useApproximations && heuristic.equals(HeuristicType.PRED_ACC)) {
146 System.err.println("Approximating predictive accuracy is an experimental feature. USE IT AT YOUR OWN RISK. If you consider to use it for anything serious, please extend the unit tests at org.dllearner.test.junit.HeuristicTests first to verify that it works.");
147 }
148
149 if(useApproximations && !(heuristic.equals(HeuristicType.PRED_ACC) || heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE))) {
150 throw new ComponentInitException("Approximations only supported for F-Measure or Standard-Measure. It is unsupported for \"" + accM + ".\"");
151 }
152
153 // useFMeasure = configurator.getAccuracyMethod().equals("fmeasure");
154 approxDelta = configurator.getApproxAccuracy();
155
156 if(!reasoner.getNamedClasses().contains(classToDescribe)) {
157 throw new ComponentInitException("The class \"" + configurator.getClassToDescribe() + "\" does not exist. Make sure you spelled it correctly.");
158 }
159
160 classInstances = new LinkedList<Individual>(reasoner.getIndividuals(classToDescribe));
161 // sanity check
162 if(classInstances.size() == 0) {
163 throw new ComponentInitException("Class " + classToDescribe + " has 0 instances according to \"" + ComponentManager.getInstance().getComponentName(reasoner.getClass()) + "\". Cannot perform class learning with 0 instances.");
164 }
165
166 classInstancesSet = new TreeSet<Individual>(classInstances);
167 equivalence = (configurator.getType().equals("equivalence"));
168 maxExecutionTimeInSeconds = configurator.getMaxExecutionTimeInSeconds();
169
170 if(equivalence) {
171 coverageFactor = configurator.getBetaEq();
172 } else {
173 coverageFactor = configurator.getBetaSC();
174 }
175
176 // we compute the instances of the super class to perform
177 // optimisations later on
178 Set<Description> superClasses = reasoner.getClassHierarchy().getSuperClasses(classToDescribe);
179 TreeSet<Individual> superClassInstancesTmp = new TreeSet<Individual>(reasoner.getIndividuals());
180 for(Description superClass : superClasses) {
181 superClassInstancesTmp.retainAll(reasoner.getIndividuals(superClass));
182 }
183 // we create one list, which includes instances of the class (an instance of the class is also instance of all super classes) ...
184 classAndSuperClassInstances = new LinkedList<Individual>(superClassInstancesTmp);
185 // ... and a second list not including them
186 superClassInstancesTmp.removeAll(classInstances);
187 // since we use the instance list for approximations, we want to avoid
188 // any bias through URI names, so we shuffle the list once pseudo-randomly
189 superClassInstances = new LinkedList<Individual>(superClassInstancesTmp);
190 Random rand = new Random(1);
191 Collections.shuffle(classInstances, rand);
192 Collections.shuffle(superClassInstances, rand);
193
194 if(heuristic.equals(HeuristicType.GEN_FMEASURE)) {
195 Description classToDescribeNeg = new Negation(classToDescribe);
196 negatedClassInstances = new TreeSet<Individual>();
197 for(Individual ind : superClassInstances) {
198 if(reasoner.hasType(classToDescribeNeg, ind)) {
199 negatedClassInstances.add(ind);
200 }
201 }
202 // System.out.println("negated class instances: " + negatedClassInstances);
203 }
204
205 // System.out.println(classInstances.size() + " " + superClassInstances.size());
206 }
207
208 @Override
209 public ClassScore computeScore(Description description) {
210
211 // TODO: reuse code to ensure that we never return inconsistent results
212 // between getAccuracy, getAccuracyOrTooWeak and computeScore
213
214 // overhang
215 Set<Individual> additionalInstances = new TreeSet<Individual>();
216 for(Individual ind : superClassInstances) {
217 if(reasoner.hasType(description, ind)) {
218 additionalInstances.add(ind);
219 }
220 }
221
222 // coverage
223 Set<Individual> coveredInstances = new TreeSet<Individual>();
224 for(Individual ind : classInstances) {
225 if(reasoner.hasType(description, ind)) {
226 coveredInstances.add(ind);
227 }
228 }
229
230 double recall = coveredInstances.size()/(double)classInstances.size();
231 double precision = (additionalInstances.size() + coveredInstances.size() == 0) ? 0 : coveredInstances.size()/(double)(coveredInstances.size()+additionalInstances.size());
232 // for each description with less than 100% coverage, we check whether it is
233 // leads to an inconsistent knowledge base
234
235 double acc = 0;
236 if(heuristic.equals(HeuristicType.FMEASURE)) {
237 acc = getFMeasure(recall, precision);
238 } else if(heuristic.equals(HeuristicType.AMEASURE)) {
239 acc = Heuristics.getAScore(recall, precision, coverageFactor);
240 } else {
241 // TODO: some superfluous instance checks are required to compute accuracy =>
242 // move accuracy computation here if possible
243 acc = getAccuracyOrTooWeakExact(description, 1);
244 }
245
246 if(configurator.getCheckConsistency()) {
247
248 // we check whether the axiom already follows from the knowledge base
249 // boolean followsFromKB = reasoner.isSuperClassOf(description, classToDescribe);
250
251 // boolean followsFromKB = equivalence ? reasoner.isEquivalentClass(description, classToDescribe) : reasoner.isSuperClassOf(description, classToDescribe);
252 boolean followsFromKB = followsFromKB(description);
253
254 // workaround due to a bug (see http://sourceforge.net/tracker/?func=detail&aid=2866610&group_id=203619&atid=986319)
255 // boolean isConsistent = coverage >= 0.999999 || isConsistent(description);
256 // (if the axiom follows, then the knowledge base remains consistent)
257 boolean isConsistent = followsFromKB || isConsistent(description);
258
259 // double acc = useFMeasure ? getFMeasure(coverage, protusion) : getAccuracy(coverage, protusion);
260 return new ClassScore(coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc, isConsistent, followsFromKB);
261
262 } else {
263 return new ClassScore(coveredInstances, Helper.difference(classInstancesSet, coveredInstances), recall, additionalInstances, precision, acc);
264 }
265 }
266
267 public boolean isEquivalenceProblem() {
268 return equivalence;
269 }
270
271 /* (non-Javadoc)
272 * @see org.dllearner.core.LearningProblem#getAccuracy(org.dllearner.core.owl.Description)
273 */
274 @Override
275 public double getAccuracy(Description description) {
276 // a noise value of 1.0 means that we never return too weak (-1.0)
277 return getAccuracyOrTooWeak(description, 1.0);
278 }
279
280 @Override
281 public double getAccuracyOrTooWeak(Description description, double noise) {
282 // delegates to the appropriate methods
283 return useApproximations ? getAccuracyOrTooWeakApprox(description, noise) : getAccuracyOrTooWeakExact(description, noise);
284 }
285
286 // instead of using the standard operation, we use optimisation
287 // and approximation here
288 public double getAccuracyOrTooWeakApprox(Description description, double noise) {
289 if(heuristic.equals(HeuristicType.FMEASURE)) {
290 // we abort when there are too many uncovered positives
291 int maxNotCovered = (int) Math.ceil(noise*classInstances.size());
292 int instancesCovered = 0;
293 int instancesNotCovered = 0;
294
295 for(Individual ind : classInstances) {
296 if(reasoner.hasType(description, ind)) {
297 instancesCovered++;
298 } else {
299 instancesNotCovered ++;
300 if(instancesNotCovered > maxNotCovered) {
301 return -1;
302 }
303 }
304 }
305
306 double recall = instancesCovered/(double)classInstances.size();
307
308 int testsPerformed = 0;
309 int instancesDescription = 0;
310
311 for(Individual ind : superClassInstances) {
312
313 if(reasoner.hasType(description, ind)) {
314 instancesDescription++;
315 }
316 testsPerformed++;
317
318 // check whether approximation is sufficiently accurate
319 double[] approx = Heuristics.getFScoreApproximation(instancesCovered, recall, coverageFactor, superClassInstances.size(), testsPerformed, instancesDescription);
320 if(approx[1]<approxDelta) {
321 return approx[0];
322 }
323
324 }
325
326 // standard computation (no approximation)
327 double precision = instancesCovered/(double)(instancesDescription+instancesCovered);
328 // if(instancesCovered + instancesDescription == 0) {
329 // precision = 0;
330 // }
331 return Heuristics.getFScore(recall, precision, coverageFactor);
332
333 } else if(heuristic.equals(HeuristicType.AMEASURE)) {
334 // the F-MEASURE implementation is now separate (different optimisation
335 // strategy)
336
337 // we abort when there are too many uncovered positives
338 int maxNotCovered = (int) Math.ceil(noise*classInstances.size());
339 int instancesCovered = 0;
340 int instancesNotCovered = 0;
341 int total = 0;
342 boolean estimatedA = false;
343
344 double lowerBorderA = 0;
345 int lowerEstimateA = 0;
346 double upperBorderA = 1;
347 int upperEstimateA = classInstances.size();
348
349 for(Individual ind : classInstances) {
350 if(reasoner.hasType(description, ind)) {
351 instancesCovered++;
352 } else {
353 instancesNotCovered ++;
354 if(instancesNotCovered > maxNotCovered) {
355 return -1;
356 }
357 }
358
359 // approximation step (starting after 10 tests)
360 total = instancesCovered + instancesNotCovered;
361 if(total > 10) {
362 // compute confidence interval
363 double p1 = p1(instancesCovered, total);
364 double p2 = p3(p1, total);
365 lowerBorderA = Math.max(0, p1 - p2);
366 upperBorderA = Math.min(1, p1 + p2);
367 double size = upperBorderA - lowerBorderA;
368 // if the interval has a size smaller than 10%, we can be confident
369 if(size < 2 * approxDelta) {
370 // we have to distinguish the cases that the accuracy limit is
371 // below, within, or above the limit and that the mean is below
372 // or above the limit
373 double mean = instancesCovered/(double)total;
374
375 // we can estimate the best possible concept to reach with downward refinement
376 // by setting precision to 1 and recall = mean stays as it is
377 double optimumEstimate = heuristic.equals(HeuristicType.FMEASURE) ? ((1+Math.sqrt(coverageFactor))*mean)/(Math.sqrt(coverageFactor)+1) : (coverageFactor*mean+1)/(double)(coverageFactor+1);
378
379 // if the mean is greater than the required minimum, we can accept;
380 // we also accept if the interval is small and close to the minimum
381 // (worst case is to accept a few inaccurate descriptions)
382 if(optimumEstimate > 1-noise-0.03) {
383 // || (upperBorderA > mean && size < 0.03)) {
384 instancesCovered = (int) (instancesCovered/(double)total * classInstances.size());
385 upperEstimateA = (int) (upperBorderA * classInstances.size());
386 lowerEstimateA = (int) (lowerBorderA * classInstances.size());
387 estimatedA = true;
388 break;
389 }
390
391 // reject only if the upper border is far away (we are very
392 // certain not to lose a potential solution)
393 // if(upperBorderA + 0.1 < 1-noise) {
394 double optimumEstimateUpperBorder = heuristic.equals(HeuristicType.FMEASURE) ? ((1+Math.sqrt(coverageFactor))*(upperBorderA+0.1))/(Math.sqrt(coverageFactor)+1) : (coverageFactor*(upperBorderA+0.1)+1)/(double)(coverageFactor+1);
395 if(optimumEstimateUpperBorder < 1 - noise) {
396 return -1;
397 }
398 }
399 }
400 }
401
402 double recall = instancesCovered/(double)classInstances.size();
403
404 // MonitorFactory.add("estimatedA","count", estimatedA ? 1 : 0);
405 // MonitorFactory.add("aInstances","count", total);
406
407 // we know that a definition candidate is always subclass of the
408 // intersection of all super classes, so we test only the relevant instances
409 // (leads to undesired effects for descriptions not following this rule,
410 // but improves performance a lot);
411 // for learning a superclass of a defined class, similar observations apply;
412
413
414 int testsPerformed = 0;
415 int instancesDescription = 0;
416 // boolean estimatedB = false;
417
418 for(Individual ind : superClassInstances) {
419
420 if(reasoner.hasType(description, ind)) {
421 instancesDescription++;
422 }
423
424 testsPerformed++;
425
426 if(testsPerformed > 10) {
427
428 // compute confidence interval
429 double p1 = p1(instancesDescription, testsPerformed);
430 double p2 = p3(p1, testsPerformed);
431 double lowerBorder = Math.max(0, p1 - p2);
432 double upperBorder = Math.min(1, p1 + p2);
433 int lowerEstimate = (int) (lowerBorder * superClassInstances.size());
434 int upperEstimate = (int) (upperBorder * superClassInstances.size());
435
436 double size;
437 if(estimatedA) {
438 // size = 1/(coverageFactor+1) * (coverageFactor * (upperBorderA-lowerBorderA) + Math.sqrt(upperEstimateA/(upperEstimateA+lowerEstimate)) + Math.sqrt(lowerEstimateA/(lowerEstimateA+upperEstimate)));
439 size = heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(upperBorderA, upperEstimateA/(double)(upperEstimateA+lowerEstimate)) - getFMeasure(lowerBorderA, lowerEstimateA/(double)(lowerEstimateA+upperEstimate)) : Heuristics.getAScore(upperBorderA, upperEstimateA/(double)(upperEstimateA+lowerEstimate), coverageFactor) - Heuristics.getAScore(lowerBorderA, lowerEstimateA/(double)(lowerEstimateA+upperEstimate),coverageFactor);
440 } else {
441 // size = 1/(coverageFactor+1) * (coverageFactor * coverage + Math.sqrt(instancesCovered/(instancesCovered+lowerEstimate)) + Math.sqrt(instancesCovered/(instancesCovered+upperEstimate)));
442 size = heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, instancesCovered/(double)(instancesCovered+lowerEstimate)) - getFMeasure(recall, instancesCovered/(double)(instancesCovered+upperEstimate)) : Heuristics.getAScore(recall, instancesCovered/(double)(instancesCovered+lowerEstimate),coverageFactor) - Heuristics.getAScore(recall, instancesCovered/(double)(instancesCovered+upperEstimate),coverageFactor);
443 }
444
445 if(size < 0.1) {
446 // System.out.println(instancesDescription + " of " + testsPerformed);
447 // System.out.println("interval from " + lowerEstimate + " to " + upperEstimate);
448 // System.out.println("size: " + size);
449
450 // estimatedB = true;
451 // calculate total number of instances
452 instancesDescription = (int) (instancesDescription/(double)testsPerformed * superClassInstances.size());
453 break;
454 }
455 }
456 }
457
458 // since we measured/estimated accuracy only on instances outside A (superClassInstances
459 // does not include instances of A), we need to add it in the denominator
460 double precision = instancesCovered/(double)(instancesDescription+instancesCovered);
461 if(instancesCovered + instancesDescription == 0) {
462 precision = 0;
463 }
464
465 return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : Heuristics.getAScore(recall, precision, coverageFactor);
466
467 } else if(heuristic.equals(HeuristicType.FMEASURE)) {
468 int maxNotCovered = (int) Math.ceil(noise*classInstances.size());
469
470 int notCoveredPos = 0;
471 // int notCoveredNeg = 0;
472
473 int posClassifiedAsPos = 0;
474 int negClassifiedAsNeg = 0;
475
476 int nrOfPosChecks = 0;
477 int nrOfNegChecks = 0;
478
479 // special case: we test positive and negative examples in turn
480 Iterator<Individual> itPos = classInstances.iterator();
481 Iterator<Individual> itNeg = superClassInstances.iterator();
482
483 do {
484 // in each loop we pick 0 or 1 positives and 0 or 1 negative
485 // and classify it
486
487 if(itPos.hasNext()) {
488 Individual posExample = itPos.next();
489 // System.out.println(posExample);
490
491 if(reasoner.hasType(description, posExample)) {
492 posClassifiedAsPos++;
493 } else {
494 notCoveredPos++;
495 }
496 nrOfPosChecks++;
497
498 // take noise into account
499 if(notCoveredPos > maxNotCovered) {
500 return -1;
501 }
502 }
503
504 if(itNeg.hasNext()) {
505 Individual negExample = itNeg.next();
506 if(!reasoner.hasType(description, negExample)) {
507 negClassifiedAsNeg++;
508 }
509 nrOfNegChecks++;
510 }
511
512 // compute how accurate our current approximation is and return if it is sufficiently accurate
513 double approx[] = Heuristics.getPredAccApproximation(classInstances.size(), superClassInstances.size(), 1, nrOfPosChecks, posClassifiedAsPos, nrOfNegChecks, negClassifiedAsNeg);
514 if(approx[1]<approxDelta) {
515 // System.out.println(approx[0]);
516 return approx[0];
517 }
518
519 } while(itPos.hasNext() || itNeg.hasNext());
520
521 double ret = Heuristics.getPredictiveAccuracy(classInstances.size(), superClassInstances.size(), posClassifiedAsPos, negClassifiedAsNeg, 1);
522 return ret;
523 } else {
524 throw new Error("Approximation for " + heuristic + " not implemented.");
525 }
526
527 }
528
529
530 // exact computation for 5 heuristics; each one adapted to super class learning;
531 // each one takes the noise parameter into account
532 public double getAccuracyOrTooWeakExact(Description description, double noise) {
533
534 nanoStartTime = System.nanoTime();
535
536 if(heuristic.equals(HeuristicType.JACCARD)) {
537
538 // computing R(A)
539 TreeSet<Individual> coveredInstancesSet = new TreeSet<Individual>();
540 for(Individual ind : classInstances) {
541 if(reasoner.hasType(description, ind)) {
542 coveredInstancesSet.add(ind);
543 }
544 if(terminationTimeExpired()){
545 return 0;
546 }
547 }
548
549 // if even the optimal case (no additional instances covered) is not sufficient,
550 // the concept is too weak
551 if(coveredInstancesSet.size() / (double) classInstances.size() <= 1 - noise) {
552 return -1;
553 }
554
555 // computing R(C) restricted to relevant instances
556 TreeSet<Individual> additionalInstancesSet = new TreeSet<Individual>();
557 for(Individual ind : superClassInstances) {
558 if(reasoner.hasType(description, ind)) {
559 additionalInstancesSet.add(ind);
560 }
561 if(terminationTimeExpired()){
562 return 0;
563 }
564 }
565
566 Set<Individual> union = Helper.union(classInstancesSet, additionalInstancesSet);
567 return Heuristics.getJaccardCoefficient(coveredInstancesSet.size(), union.size());
568
569 } else if (heuristic.equals(HeuristicType.AMEASURE) || heuristic.equals(HeuristicType.FMEASURE) || heuristic.equals(HeuristicType.PRED_ACC)) {
570
571 // computing R(C) restricted to relevant instances
572 int additionalInstances = 0;
573 for(Individual ind : superClassInstances) {
574 if(reasoner.hasType(description, ind)) {
575 additionalInstances++;
576 }
577 if(terminationTimeExpired()){
578 return 0;
579 }
580 }
581
582 // computing R(A)
583 int coveredInstances = 0;
584 for(Individual ind : classInstances) {
585 if(reasoner.hasType(description, ind)) {
586 coveredInstances++;
587 }
588 if(terminationTimeExpired()){
589 return 0;
590 }
591 }
592
593 double recall = coveredInstances/(double)classInstances.size();
594
595 // noise computation is incorrect
596 // if(recall < 1 - noise) {
597 // return -1;
598 // }
599
600 double precision = (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances);
601
602 if(heuristic.equals(HeuristicType.AMEASURE)) {
603 // best reachable concept has same recall and precision 1:
604 // 1/t+1 * (t*r + 1)
605 if((coverageFactor*recall+1)/(double)(coverageFactor+1) <(1-noise)) {
606 return -1;
607 } else {
608 return Heuristics.getAScore(recall, precision, coverageFactor);
609 }
610 } else if(heuristic.equals(HeuristicType.FMEASURE)) {
611 // best reachable concept has same recall and precision 1:
612 if(((1+Math.sqrt(coverageFactor))*recall)/(Math.sqrt(coverageFactor)+1)<1-noise) {
613 return -1;
614 } else {
615 return getFMeasure(recall, precision);
616 }
617 } else if(heuristic.equals(HeuristicType.PRED_ACC)) {
618 if((coverageFactor * coveredInstances + superClassInstances.size()) / (double) (coverageFactor * classInstances.size() + superClassInstances.size()) < 1 -noise) {
619 return -1;
620 } else {
621 // correctly classified divided by all examples
622 return (coverageFactor * coveredInstances + superClassInstances.size() - additionalInstances) / (double) (coverageFactor * classInstances.size() + superClassInstances.size());
623 }
624 }
625
626 // return heuristic.equals(HeuristicType.FMEASURE) ? getFMeasure(recall, precision) : getAccuracy(recall, precision);
627 } else if (heuristic.equals(HeuristicType.GEN_FMEASURE)) {
628
629 // implementation is based on:
630 // http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf
631 // default negation should be turned off when using fast instance checker
632
633 // compute I_C (negated and non-negated concepts separately)
634 TreeSet<Individual> icPos = new TreeSet<Individual>();
635 TreeSet<Individual> icNeg = new TreeSet<Individual>();
636 Description descriptionNeg = new Negation(description);
637 // loop through all relevant instances
638 for(Individual ind : classAndSuperClassInstances) {
639 if(reasoner.hasType(description, ind)) {
640 icPos.add(ind);
641 } else if(reasoner.hasType(descriptionNeg, ind)) {
642 icNeg.add(ind);
643 }
644 if(terminationTimeExpired()){
645 return 0;
646 }
647 }
648
649 // semantic precision
650 // first compute I_C \cap Cn(DC)
651 // it seems that in our setting, we can ignore Cn, because the examples (class instances)
652 // are already part of the background knowledge
653 Set<Individual> tmp1Pos = Helper.intersection(icPos, classInstancesSet);
654 Set<Individual> tmp1Neg = Helper.intersection(icNeg, negatedClassInstances);
655 int tmp1Size = tmp1Pos.size() + tmp1Neg.size();
656
657 // Cn(I_C) \cap D_C is the same set if we ignore Cn ...
658
659 int icSize = icPos.size() + icNeg.size();
660 double prec = (icSize == 0) ? 0 : tmp1Size / (double) icSize;
661 double rec = tmp1Size / (double) (classInstances.size() + negatedClassInstances.size());
662
663 // System.out.println(description);
664
665 // System.out.println("I_C pos: " + icPos);
666 // System.out.println("I_C neg: " + icNeg);
667 // System.out.println("class instances: " + classInstances);
668 // System.out.println("negated class instances: " + negatedClassInstances);
669
670 // System.out.println(prec);
671 // System.out.println(rec);
672 // System.out.println(coverageFactor);
673
674 // too weak: see F-measure above
675 // => does not work for generalised F-measure, because even very general
676 // concepts do not have a recall of 1
677 // if(((1+Math.sqrt(coverageFactor))*rec)/(Math.sqrt(coverageFactor)+1)<1-noise) {
678 // return -1;
679 // }
680 // we only return too weak if there is no recall
681 if(rec <= 0.0000001) {
682 return -1;
683 }
684
685 return getFMeasure(rec,prec);
686 }
687
688 throw new Error("ClassLearningProblem error: not implemented");
689 }
690
691 private boolean terminationTimeExpired(){
692 boolean val = ((System.nanoTime() - nanoStartTime) >= (maxExecutionTimeInSeconds*1000000000l));
693 if(val) {
694 logger.warn("Description test aborted, because it took longer than " + maxExecutionTimeInSeconds + " seconds.");
695 }
696 return val;
697 }
698
699 // @Deprecated
700 // public double getAccuracyOrTooWeakStandard(Description description, double minAccuracy) {
701 // // since we have to perform a retrieval operation anyway, we cannot easily
702 // // get a benefit from the accuracy limit
703 // double accuracy = getAccuracy(description);
704 // if(accuracy >= minAccuracy) {
705 // return accuracy;
706 // } else {
707 // return -1;
708 // }
709 // }
710
711 // please note that getting recall and precision wastes some computational
712 // resource, because both methods need to compute the covered instances
713 public double getRecall(Description description) {
714 int coveredInstances = 0;
715 for(Individual ind : classInstances) {
716 if(reasoner.hasType(description, ind)) {
717 coveredInstances++;
718 }
719 }
720 return coveredInstances/(double)classInstances.size();
721 }
722
723 public double getPrecision(Description description) {
724
725 int additionalInstances = 0;
726 for(Individual ind : superClassInstances) {
727 if(reasoner.hasType(description, ind)) {
728 additionalInstances++;
729 }
730 }
731
732 int coveredInstances = 0;
733 for(Individual ind : classInstances) {
734 if(reasoner.hasType(description, ind)) {
735 coveredInstances++;
736 }
737 }
738
739 return (additionalInstances + coveredInstances == 0) ? 0 : coveredInstances / (double) (coveredInstances + additionalInstances);
740 }
741
742 public double getPredictiveAccuracy() {
743 return 0;
744 }
745
746 // see http://sunsite.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-426/swap2008_submission_14.pdf
747 // for all methods below (currently dummies)
748 public double getMatchRate() {
749 return 0;
750 }
751
752 public double getOmissionError() {
753 return 0;
754 }
755
756 public double getInductionRate() {
757 return 0;
758 }
759
760 public double getComissionError() {
761 return 0;
762 }
763
764 public double getGeneralisedRecall() {
765 return 0;
766 }
767
768 public double getGeneralisedPrecision() {
769 return 0;
770 }
771
772 @SuppressWarnings("unused")
773 private double getInverseJaccardDistance(TreeSet<Individual> set1, TreeSet<Individual> set2) {
774 Set<Individual> intersection = Helper.intersection(set1, set2);
775 Set<Individual> union = Helper.union(set1, set2);
776 return 1 - (union.size() - intersection.size()) / (double) union.size();
777 }
778
779 // computes accuracy from coverage and protusion (changing this function may
780 // make it necessary to change the appoximation too) => not the case anymore
781 // private double getAccuracy(double recall, double precision) {
782 // return (coverageFactor * coverage + Math.sqrt(protusion)) / (coverageFactor + 1);
783 // log: changed from precision^^0.5 (root) to precision^^0.8 as the root is too optimistic in some cases
784 // return (coverageFactor * recall + Math.pow(precision, 0.8)) / (coverageFactor + 1);
785 // }
786
787 private double getFMeasure(double recall, double precision) {
788 // balanced F measure
789 // return (precision + recall == 0) ? 0 : 2 * precision * recall / (precision + recall);
790 // see e.g. http://en.wikipedia.org/wiki/F-measure
791 return (precision + recall == 0) ? 0 :
792 ( (1+Math.sqrt(coverageFactor)) * (precision * recall)
793 / (Math.sqrt(coverageFactor) * precision + recall) );
794 }
795
796 // see paper: expression used in confidence interval estimation
797 public static double p3(double p1, int total) {
798 return 1.96 * Math.sqrt(p1*(1-p1)/(total+4));
799 }
800
801 // see paper: expression used in confidence interval estimation
802 // private static double p2(int success, int total) {
803 // double p1 = p1(success, total);
804 // return 1.96 * Math.sqrt(p1*(1-p1)/(total+4));
805 // }
806
807 // see paper: p'
808 public static double p1(int success, int total) {
809 return (success+2)/(double)(total+4);
810 }
811
812 /**
813 * @return the classToDescribe
814 */
815 public NamedClass getClassToDescribe() {
816 return classToDescribe;
817 }
818
819 /* (non-Javadoc)
820 * @see org.dllearner.core.LearningProblem#evaluate(org.dllearner.core.owl.Description)
821 */
822 @Override
823 public EvaluatedDescriptionClass evaluate(Description description) {
824 ClassScore score = computeScore(description);
825 return new EvaluatedDescriptionClass(description, score);
826 }
827
828 /**
829 * @return the isConsistent
830 */
831 public boolean isConsistent(Description description) {
832 Axiom axiom;
833 if(equivalence) {
834 axiom = new EquivalentClassesAxiom(classToDescribe, description);
835 } else {
836 axiom = new SubClassAxiom(classToDescribe, description);
837 }
838 return reasoner.remainsSatisfiable(axiom);
839 }
840
841 public boolean followsFromKB(Description description) {
842 return equivalence ? reasoner.isEquivalentClass(description, classToDescribe) : reasoner.isSuperClassOf(description, classToDescribe);
843 }
844 }