001 /**
002 * Copyright (C) 2007-2011, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program. If not, see <http://www.gnu.org/licenses/>.
018 */
019
020 package org.dllearner.algorithms.celoe;
021
022 import java.io.File;
023 import java.text.DecimalFormat;
024 import java.util.Collection;
025 import java.util.Iterator;
026 import java.util.LinkedList;
027 import java.util.List;
028 import java.util.Map;
029 import java.util.Set;
030 import java.util.SortedSet;
031 import java.util.TreeSet;
032
033 import org.apache.log4j.Logger;
034 import org.dllearner.core.ComponentAnn;
035 import org.dllearner.core.ComponentInitException;
036 import org.dllearner.core.EvaluatedDescription;
037 import org.dllearner.core.AbstractCELA;
038 import org.dllearner.core.AbstractLearningProblem;
039 import org.dllearner.core.AbstractReasonerComponent;
040 import org.dllearner.core.configurators.CELOEConfigurator;
041 import org.dllearner.core.options.BooleanConfigOption;
042 import org.dllearner.core.options.CommonConfigMappings;
043 import org.dllearner.core.options.CommonConfigOptions;
044 import org.dllearner.core.options.ConfigOption;
045 import org.dllearner.core.options.DoubleConfigOption;
046 import org.dllearner.core.options.StringConfigOption;
047 import org.dllearner.core.owl.ClassHierarchy;
048 import org.dllearner.core.owl.Description;
049 import org.dllearner.core.owl.Individual;
050 import org.dllearner.core.owl.Intersection;
051 import org.dllearner.core.owl.NamedClass;
052 import org.dllearner.core.owl.Restriction;
053 import org.dllearner.core.owl.Thing;
054 import org.dllearner.learningproblems.ClassLearningProblem;
055 import org.dllearner.learningproblems.PosNegLP;
056 import org.dllearner.learningproblems.PosNegLPStandard;
057 import org.dllearner.learningproblems.PosOnlyLP;
058 import org.dllearner.refinementoperators.OperatorInverter;
059 import org.dllearner.refinementoperators.RefinementOperator;
060 import org.dllearner.refinementoperators.RhoDRDown;
061 import org.dllearner.utilities.Files;
062 import org.dllearner.utilities.Helper;
063 import org.dllearner.utilities.owl.ConceptComparator;
064 import org.dllearner.utilities.owl.ConceptTransformation;
065 import org.dllearner.utilities.owl.DescriptionMinimizer;
066 import org.dllearner.utilities.owl.EvaluatedDescriptionSet;
067 import org.dllearner.utilities.owl.PropertyContext;
068
069 import com.jamonapi.Monitor;
070 import com.jamonapi.MonitorFactory;
071
072 /**
073 * The CELOE (Class Expression Learner for Ontology Engineering) algorithm.
074 * It adapts and extends the standard supervised learning algorithm for the
075 * ontology engineering use case.
076 *
077 * @author Jens Lehmann
078 *
079 */
080 @ComponentAnn(name="CELOE", shortName="celoe", version=1.0, description="CELOE is an adapted and extended version of the OCEL algorithm applied for the ontology engineering use case. See http://jens-lehmann.org/files/2011/celoe.pdf for reference.")
081 public class CELOE extends AbstractCELA {
082
083 private static Logger logger = Logger.getLogger(CELOE.class);
084 private CELOEConfigurator configurator;
085
086 private boolean isRunning = false;
087 private boolean stop = false;
088
089 // private OEHeuristicStable heuristicStable = new OEHeuristicStable();
090 // private OEHeuristicRuntime heuristicRuntime = new OEHeuristicRuntime();
091
092 private RefinementOperator operator;
093 private DescriptionMinimizer minimizer;
094
095 // all nodes in the search tree (used for selecting most promising node)
096 private TreeSet<OENode> nodes;
097 private OEHeuristicRuntime heuristic; // = new OEHeuristicRuntime();
098 // root of search tree
099 private OENode startNode;
100 // the class with which we start the refinement process
101 private Description startClass;
102
103 // all descriptions in the search tree plus those which were too weak (for fast redundancy check)
104 private TreeSet<Description> descriptions;
105
106 private EvaluatedDescriptionSet bestEvaluatedDescriptions;
107
108 // if true, then each solution is evaluated exactly instead of approximately
109 // private boolean exactBestDescriptionEvaluation = false;
110 private boolean singleSuggestionMode;
111 private Description bestDescription;
112 private double bestAccuracy = Double.MIN_VALUE;
113
114 private NamedClass classToDescribe;
115 // examples are either 1.) instances of the class to describe 2.) positive examples
116 // 3.) union of pos.+neg. examples depending on the learning problem at hand
117 private Set<Individual> examples;
118
119 // CELOE was originally created for learning classes in ontologies, but also
120 // works for other learning problem types
121 private boolean isClassLearningProblem;
122 private boolean isEquivalenceProblem;
123
124 private long nanoStartTime;
125
126 // important parameters
127 private double noise;
128 private double maxDepth;
129 private boolean filterFollowsFromKB;
130
131 // less important parameters
132 // forces that one solution cannot be subexpression of another expression; this option is useful to get diversity
133 // but it can also suppress quite useful expressions
134 private boolean forceMutualDifference = false;
135
136 // utility variables
137 private String baseURI;
138 private Map<String, String> prefixes;
139 private DecimalFormat dfPercent = new DecimalFormat("0.00%");
140 private ConceptComparator descriptionComparator = new ConceptComparator();
141
142 // statistical variables
143 private int expressionTests = 0;
144 private int minHorizExp = 0;
145 private int maxHorizExp = 0;
146
147 @Override
148 public CELOEConfigurator getConfigurator() {
149 return configurator;
150 }
151
152 public CELOE(AbstractLearningProblem problem, AbstractReasonerComponent reasoner) {
153 super(problem, reasoner);
154 configurator = new CELOEConfigurator(this);
155 }
156
157 public static Collection<Class<? extends AbstractLearningProblem>> supportedLearningProblems() {
158 Collection<Class<? extends AbstractLearningProblem>> problems = new LinkedList<Class<? extends AbstractLearningProblem>>();
159 problems.add(AbstractLearningProblem.class);
160 return problems;
161 }
162
163 public static Collection<ConfigOption<?>> createConfigOptions() {
164 Collection<ConfigOption<?>> options = new LinkedList<ConfigOption<?>>();
165 options.add(CommonConfigOptions.useAllConstructor());
166 options.add(CommonConfigOptions.useExistsConstructor());
167 options.add(CommonConfigOptions.useHasValueConstructor());
168 options.add(CommonConfigOptions.useDataHasValueConstructor());
169 options.add(CommonConfigOptions.valueFreqencyThreshold());
170 options.add(CommonConfigOptions.useCardinalityRestrictions());
171 options.add(CommonConfigOptions.cardinalityLimit());
172 // by default, we do not use negation (should be configurable in GUI)
173 options.add(CommonConfigOptions.useNegation(false));
174 options.add(CommonConfigOptions.useBooleanDatatypes());
175 options.add(CommonConfigOptions.useDoubleDatatypes());
176 options.add(CommonConfigOptions.maxExecutionTimeInSeconds(10));
177 options.add(CommonConfigOptions.getNoisePercentage());
178 options.add(CommonConfigOptions.getTerminateOnNoiseReached(false));
179 options.add(CommonConfigOptions.getMaxDepth(7));
180 options.add(CommonConfigOptions.maxNrOfResults(10));
181 options.add(CommonConfigOptions.maxClassDescriptionTests());
182 options.add(new BooleanConfigOption("singleSuggestionMode", "Use this if you are interested in only one suggestion and your learning problem has many (more than 1000) examples.", false));
183 options.add(CommonConfigOptions.getInstanceBasedDisjoints());
184 options.add(new BooleanConfigOption("filterDescriptionsFollowingFromKB", "If true, then the results will not contain suggestions, which already follow logically from the knowledge base. Be careful, since this requires a potentially expensive consistency check for candidate solutions.", false));
185 options.add(new BooleanConfigOption("reuseExistingDescription", "If true, the algorithm tries to find a good starting point close to an existing definition/super class of the given class in the knowledge base.", false));
186 options.add(new BooleanConfigOption("writeSearchTree", "specifies whether to write a search tree", false));
187 options.add(new StringConfigOption("searchTreeFile","file to use for the search tree", "log/searchTree.txt"));
188 options.add(new BooleanConfigOption("replaceSearchTree","specifies whether to replace the search tree in the log file after each run or append the new search tree", false));
189 options.add(new DoubleConfigOption("expansionPenaltyFactor","heuristic penalty per syntactic construct used (lower = finds more complex expression, but might miss simple ones)", 0.1));
190 options.add(CommonConfigOptions.allowedConcepts());
191 options.add(CommonConfigOptions.ignoredConcepts());
192 return options;
193 }
194
195 public static String getName() {
196 return "CELOE";
197 }
198
199 @Override
200 public void init() throws ComponentInitException {
201
202 // compute used concepts/roles from allowed/ignored
203 // concepts/roles
204 Set<NamedClass> usedConcepts;
205 Set<NamedClass> allowedConcepts = configurator.getAllowedConcepts()==null ? null : CommonConfigMappings.getAtomicConceptSet(configurator.getAllowedConcepts());
206 Set<NamedClass> ignoredConcepts = configurator.getIgnoredConcepts()==null ? null : CommonConfigMappings.getAtomicConceptSet(configurator.getIgnoredConcepts());
207 if(allowedConcepts != null) {
208 // sanity check to control if no non-existing concepts are in the list
209 Helper.checkConcepts(reasoner, allowedConcepts);
210 usedConcepts = allowedConcepts;
211 } else if(ignoredConcepts != null) {
212 usedConcepts = Helper.computeConceptsUsingIgnoreList(reasoner, ignoredConcepts);
213 } else {
214 usedConcepts = Helper.computeConcepts(reasoner);
215 }
216
217 // copy class hierarchy and modify it such that each class is only
218 // reachable via a single path
219 // ClassHierarchy classHierarchy = reasoner.getClassHierarchy().clone();
220 ClassHierarchy classHierarchy = reasoner.getClassHierarchy().cloneAndRestrict(usedConcepts);
221 classHierarchy.thinOutSubsumptionHierarchy();
222
223 heuristic = new OEHeuristicRuntime(configurator);
224
225 minimizer = new DescriptionMinimizer(reasoner);
226
227 startClass = Thing.instance;
228
229 singleSuggestionMode = configurator.getSingleSuggestionMode();
230
231 // create refinement operator
232 operator = new RhoDRDown(reasoner, classHierarchy, startClass, configurator);
233 baseURI = reasoner.getBaseURI();
234 prefixes = reasoner.getPrefixes();
235 if(configurator.getWriteSearchTree()) {
236 File f = new File(configurator.getSearchTreeFile());
237 // System.out.println(f.getAbsolutePath());
238 Files.clearFile(f);
239 }
240
241 bestEvaluatedDescriptions = new EvaluatedDescriptionSet(configurator.getMaxNrOfResults());
242
243 isClassLearningProblem = (learningProblem instanceof ClassLearningProblem);
244
245 // we put important parameters in class variables
246 noise = configurator.getNoisePercentage()/100d;
247 // System.out.println("noise " + noise);
248 maxDepth = configurator.getMaxDepth();
249 // (filterFollowsFromKB is automatically set to false if the problem
250 // is not a class learning problem
251 filterFollowsFromKB = configurator.getFilterDescriptionsFollowingFromKB()
252 && isClassLearningProblem;
253
254 // actions specific to ontology engineering
255 if(isClassLearningProblem) {
256 ClassLearningProblem problem = (ClassLearningProblem) learningProblem;
257 classToDescribe = problem.getClassToDescribe();
258 isEquivalenceProblem = problem.isEquivalenceProblem();
259
260 examples = reasoner.getIndividuals(classToDescribe);
261
262 // start class: intersection of super classes for definitions (since it needs to
263 // capture all instances), but owl:Thing for learning subclasses (since it is
264 // superfluous to add super classes in this case)
265 if(isEquivalenceProblem) {
266 Set<Description> existingDefinitions = reasoner.getAssertedDefinitions(classToDescribe);
267 if(configurator.getReuseExistingDescription() && (existingDefinitions.size() > 0)) {
268 // the existing definition is reused, which in the simplest case means to
269 // use it as a start class or, if it is already too specific, generalise it
270
271 // pick the longest existing definition as candidate
272 Description existingDefinition = null;
273 int highestLength = 0;
274 for(Description exDef : existingDefinitions) {
275 if(exDef.getLength() > highestLength) {
276 existingDefinition = exDef;
277 highestLength = exDef.getLength();
278 }
279 }
280
281 LinkedList<Description> startClassCandidates = new LinkedList<Description>();
282 startClassCandidates.add(existingDefinition);
283 ((RhoDRDown)operator).setDropDisjuncts(true);
284 RefinementOperator upwardOperator = new OperatorInverter(operator);
285
286 // use upward refinement until we find an appropriate start class
287 boolean startClassFound = false;
288 Description candidate;
289 do {
290 candidate = startClassCandidates.pollFirst();
291 if(((ClassLearningProblem)learningProblem).getRecall(candidate)<1.0) {
292 // add upward refinements to list
293 Set<Description> refinements = upwardOperator.refine(candidate, candidate.getLength());
294 // System.out.println("ref: " + refinements);
295 LinkedList<Description> refinementList = new LinkedList<Description>(refinements);
296 // Collections.reverse(refinementList);
297 // System.out.println("list: " + refinementList);
298 startClassCandidates.addAll(refinementList);
299 // System.out.println("candidates: " + startClassCandidates);
300 } else {
301 startClassFound = true;
302 }
303 } while(!startClassFound);
304 startClass = candidate;
305
306 if(startClass.equals(existingDefinition)) {
307 logger.info("Reusing existing description " + startClass.toManchesterSyntaxString(baseURI, prefixes) + " as start class for learning algorithm.");
308 } else {
309 logger.info("Generalised existing description " + existingDefinition.toManchesterSyntaxString(baseURI, prefixes) + " to " + startClass.toManchesterSyntaxString(baseURI, prefixes) + ", which is used as start class for the learning algorithm.");
310 }
311
312 // System.out.println("start class: " + startClass);
313 // System.out.println("existing def: " + existingDefinition);
314 // System.out.println(reasoner.getIndividuals(existingDefinition));
315
316 ((RhoDRDown)operator).setDropDisjuncts(false);
317
318 } else {
319 Set<Description> superClasses = reasoner.getClassHierarchy().getSuperClasses(classToDescribe);
320 if(superClasses.size() > 1) {
321 startClass = new Intersection(new LinkedList<Description>(superClasses));
322 } else if(superClasses.size() == 1){
323 startClass = (Description) superClasses.toArray()[0];
324 } else {
325 startClass = Thing.instance;
326 logger.warn(classToDescribe + " is equivalent to owl:Thing. Usually, it is not " +
327 "sensible to learn a description in this case.");
328 }
329 }
330 }
331 } else if(learningProblem instanceof PosOnlyLP) {
332 examples = ((PosOnlyLP)learningProblem).getPositiveExamples();
333 } else if(learningProblem instanceof PosNegLP) {
334 examples = Helper.union(((PosNegLP)learningProblem).getPositiveExamples(),((PosNegLP)learningProblem).getNegativeExamples());
335 }
336 }
337
338 @Override
339 public Description getCurrentlyBestDescription() {
340 EvaluatedDescription ed = getCurrentlyBestEvaluatedDescription();
341 return ed == null ? null : ed.getDescription();
342 }
343
344 @Override
345 public List<Description> getCurrentlyBestDescriptions() {
346 return bestEvaluatedDescriptions.toDescriptionList();
347 }
348
349 @Override
350 public EvaluatedDescription getCurrentlyBestEvaluatedDescription() {
351 return bestEvaluatedDescriptions.getBest();
352 }
353
354 @Override
355 public TreeSet<? extends EvaluatedDescription> getCurrentlyBestEvaluatedDescriptions() {
356 return bestEvaluatedDescriptions.getSet();
357 }
358
359 public double getCurrentlyBestAccuracy() {
360 return bestEvaluatedDescriptions.getBest().getAccuracy();
361 }
362
363 @Override
364 public void start() {
365 // System.out.println(configurator.getMaxExecutionTimeInSeconds());
366
367 stop = false;
368 isRunning = true;
369 reset();
370 nanoStartTime = System.nanoTime();
371
372 // highest accuracy so far
373 double highestAccuracy = 0.0;
374 OENode nextNode;
375
376 addNode(startClass, null);
377
378 int loop = 0;
379 while (!terminationCriteriaSatisfied()) {
380 // System.out.println("loop " + loop);
381
382 if(!singleSuggestionMode && bestEvaluatedDescriptions.getBestAccuracy() > highestAccuracy) {
383 highestAccuracy = bestEvaluatedDescriptions.getBestAccuracy();
384 logger.info("more accurate (" + dfPercent.format(highestAccuracy) + ") class expression found: " + descriptionToString(bestEvaluatedDescriptions.getBest().getDescription()));
385 }
386
387 // chose best node according to heuristics
388 nextNode = getNextNodeToExpand();
389 int horizExp = nextNode.getHorizontalExpansion();
390
391 // apply operator
392 Monitor mon = MonitorFactory.start("refineNode");
393 TreeSet<Description> refinements = refineNode(nextNode);
394 mon.stop();
395
396 // System.out.println("next node: " + nextNode);
397 // for(Description refinement : refinements) {
398 // System.out.println("refinement: " + refinement);
399 // }
400
401 while(refinements.size() != 0) {
402 // pick element from set
403 Description refinement = refinements.pollFirst();
404 int length = refinement.getLength();
405
406 // we ignore all refinements with lower length and too high depth
407 // (this also avoids duplicate node children)
408 if(length > horizExp && refinement.getDepth() <= maxDepth) {
409
410 // System.out.println("potentially adding " + refinement + " to search tree as child of " + nextNode + " " + new Date());
411 Monitor mon2 = MonitorFactory.start("addNode");
412 addNode(refinement, nextNode);
413 mon2.stop();
414 // adding nodes is potentially computationally expensive, so we have
415 // to check whether max time is exceeded
416 if(terminationCriteriaSatisfied()) {
417 break;
418 }
419 // System.out.println("addNode finished" + " " + new Date());
420 }
421
422 // System.out.println(" refinement queue length: " + refinements.size());
423 }
424
425 updateMinMaxHorizExp(nextNode);
426
427 // writing the search tree (if configured)
428 if (configurator.getWriteSearchTree()) {
429 String treeString = "best node: " + bestEvaluatedDescriptions.getBest() + "\n";
430 if (refinements.size() > 1) {
431 treeString += "all expanded nodes:\n";
432 for (Description n : refinements) {
433 treeString += " " + n + "\n";
434 }
435 }
436 treeString += startNode.toTreeString(baseURI);
437 treeString += "\n";
438
439 if (configurator.getReplaceSearchTree())
440 Files.createFile(new File(configurator.getSearchTreeFile()), treeString);
441 else
442 Files.appendFile(new File(configurator.getSearchTreeFile()), treeString);
443 }
444
445 // System.out.println(loop);
446 loop++;
447 }
448
449 if (stop) {
450 logger.info("Algorithm stopped ("+expressionTests+" descriptions tested). " + nodes.size() + " nodes in the search tree.\n");
451 } else {
452 logger.info("Algorithm terminated successfully ("+expressionTests+" descriptions tested). " + nodes.size() + " nodes in the search tree.\n");
453 }
454
455 if(singleSuggestionMode) {
456 bestEvaluatedDescriptions.add(bestDescription, bestAccuracy, learningProblem);
457 }
458
459 // print solution(s)
460 logger.info("solutions:\n" + getSolutionString());
461
462 // System.out.println(startNode.toTreeString(baseURI));
463
464 isRunning = false;
465 // System.out.println("isRunning: " + isRunning);
466 }
467
468 private OENode getNextNodeToExpand() {
469 // we expand the best node of those, which have not achieved 100% accuracy
470 // already and have a horizontal expansion equal to their length
471 // (rationale: further extension is likely to add irrelevant syntactical constructs)
472 Iterator<OENode> it = nodes.descendingIterator();
473 while(it.hasNext()) {
474 OENode node = it.next();
475 if(node.getAccuracy() < 1.0 || node.getHorizontalExpansion() < node.getDescription().getLength()) {
476 return node;
477 }
478 }
479
480 // this should practically never be called, since for any reasonable learning
481 // task, we will always have at least one node with less than 100% accuracy
482 return nodes.last();
483 }
484
485 // expand node horizontically
486 private TreeSet<Description> refineNode(OENode node) {
487 // we have to remove and add the node since its heuristic evaluation changes through the expansion
488 // (you *must not* include any criteria in the heuristic which are modified outside of this method,
489 // otherwise you may see rarely occurring but critical false ordering in the nodes set)
490 nodes.remove(node);
491 // System.out.println("refining: " + node);
492 int horizExp = node.getHorizontalExpansion();
493 TreeSet<Description> refinements = (TreeSet<Description>) operator.refine(node.getDescription(), horizExp+1);
494 node.incHorizontalExpansion();
495 node.setRefinementCount(refinements.size());
496 nodes.add(node);
497 return refinements;
498 }
499
500 // add node to search tree if it is not too weak
501 // returns true if node was added and false otherwise
502 private boolean addNode(Description description, OENode parentNode) {
503
504 // System.out.println(description);
505
506 // redundancy check (return if redundant)
507 boolean nonRedundant = descriptions.add(description);
508 if(!nonRedundant) {
509 return false;
510 }
511
512 // check whether the description is allowed
513 if(!isDescriptionAllowed(description, parentNode)) {
514 return false;
515 }
516
517 // System.out.println("Test " + new Date());
518 // quality of description (return if too weak)
519 double accuracy = learningProblem.getAccuracyOrTooWeak(description, noise);
520 // issue a warning if accuracy is not between 0 and 1 or -1 (too weak)
521 if(accuracy > 1.0 || (accuracy < 0.0 && accuracy != -1)) {
522 logger.warn("Invalid accuracy value " + accuracy + " for description " + description + ". This could be caused by a bug in the heuristic measure and should be reported to the DL-Learner bug tracker.");
523 System.exit(0);
524 }
525
526 // System.out.println("Test2 " + new Date());
527 expressionTests++;
528 // System.out.println("acc: " + accuracy);
529 // System.out.println(description + " " + accuracy);
530 if(accuracy == -1) {
531 return false;
532 }
533
534 OENode node = new OENode(parentNode, description, accuracy);
535
536 // link to parent (unless start node)
537 if(parentNode == null) {
538 startNode = node;
539 } else {
540 parentNode.addChild(node);
541 }
542
543 nodes.add(node);
544 // System.out.println("Test3 " + new Date());
545
546 // in some cases (e.g. mutation) fully evaluating even a single description is too expensive
547 // due to the high number of examples -- so we just stick to the approximate accuracy
548 if(singleSuggestionMode) {
549 if(accuracy > bestAccuracy) {
550 bestAccuracy = accuracy;
551 bestDescription = description;
552 logger.info("more accurate (" + dfPercent.format(bestAccuracy) + ") class expression found: " + descriptionToString(bestDescription)); // + getTemporaryString(bestDescription));
553 }
554 return true;
555 }
556
557 // System.out.println("description " + description + " accuracy " + accuracy);
558
559 // maybe add to best descriptions (method keeps set size fixed);
560 // we need to make sure that this does not get called more often than
561 // necessary since rewriting is expensive
562 boolean isCandidate = !bestEvaluatedDescriptions.isFull();
563 if(!isCandidate) {
564 EvaluatedDescription worst = bestEvaluatedDescriptions.getWorst();
565 double accThreshold = worst.getAccuracy();
566 isCandidate =
567 (accuracy > accThreshold ||
568 (accuracy >= accThreshold && description.getLength() < worst.getDescriptionLength()));
569 }
570
571 // System.out.println(isCandidate);
572
573 // System.out.println("Test4 " + new Date());
574 if(isCandidate) {
575
576 Description niceDescription = rewriteNode(node);
577 ConceptTransformation.transformToOrderedForm(niceDescription, descriptionComparator);
578 // Description niceDescription = node.getDescription();
579
580 // another test: none of the other suggested descriptions should be
581 // a subdescription of this one unless accuracy is different
582 // => comment: on the one hand, this appears to be too strict, because once A is a solution then everything containing
583 // A is not a candidate; on the other hand this suppresses many meaningless extensions of A
584 boolean shorterDescriptionExists = false;
585 if(forceMutualDifference) {
586 for(EvaluatedDescription ed : bestEvaluatedDescriptions.getSet()) {
587 if(Math.abs(ed.getAccuracy()-accuracy) <= 0.00001 && ConceptTransformation.isSubdescription(niceDescription, ed.getDescription())) {
588 // System.out.println("shorter: " + ed.getDescription());
589 shorterDescriptionExists = true;
590 break;
591 }
592 }
593 }
594
595 // System.out.println("shorter description? " + shorterDescriptionExists + " nice: " + niceDescription);
596
597 if(!shorterDescriptionExists) {
598 if(!filterFollowsFromKB || !((ClassLearningProblem)learningProblem).followsFromKB(niceDescription)) {
599 // System.out.println("Test2");
600 bestEvaluatedDescriptions.add(niceDescription, accuracy, learningProblem);
601 // System.out.println("acc: " + accuracy);
602 // System.out.println(bestEvaluatedDescriptions);
603 }
604 }
605
606 // System.out.println(bestEvaluatedDescriptions.getSet().size());
607 }
608
609 // System.out.println("Test5 " + new Date());
610 // System.out.println("best evaluated descriptions size: " + bestEvaluatedDescriptions.size() + " worst: " + bestEvaluatedDescriptions.getWorst());
611 return true;
612 }
613
614 // checks whether the description is allowed
615 private boolean isDescriptionAllowed(Description description, OENode parentNode) {
616 if(isClassLearningProblem) {
617 if(isEquivalenceProblem) {
618 // the class to learn must not appear on the outermost property level
619 if(occursOnFirstLevel(description, classToDescribe)) {
620 return false;
621 }
622 } else {
623 // none of the superclasses of the class to learn must appear on the
624 // outermost property level
625 TreeSet<Description> toTest = new TreeSet<Description>(descriptionComparator);
626 toTest.add(classToDescribe);
627 while(!toTest.isEmpty()) {
628 Description d = toTest.pollFirst();
629 if(occursOnFirstLevel(description, d)) {
630 return false;
631 }
632 toTest.addAll(reasoner.getClassHierarchy().getSuperClasses(d));
633 }
634 }
635 }
636
637 // perform forall sanity tests
638 if(parentNode != null && ConceptTransformation.getForallOccurences(description) > ConceptTransformation.getForallOccurences(parentNode.getDescription())) {
639 // we have an additional \forall construct, so we now fetch the contexts
640 // in which it occurs
641 SortedSet<PropertyContext> contexts = ConceptTransformation.getForallContexts(description);
642 SortedSet<PropertyContext> parentContexts = ConceptTransformation.getForallContexts(parentNode.getDescription());
643 contexts.removeAll(parentContexts);
644 // System.out.println("parent description: " + parentNode.getDescription());
645 // System.out.println("description: " + description);
646 // System.out.println("contexts: " + contexts);
647 // we now have to perform sanity checks: if \forall is used, then there
648 // should be at least on class instance which has a filler at the given context
649 for(PropertyContext context : contexts) {
650 // transform [r,s] to \exists r.\exists s.\top
651 Description existentialContext = context.toExistentialContext();
652 boolean fillerFound = false;
653 for(Individual instance : examples) {
654 if(reasoner.hasType(existentialContext, instance)) {
655 // System.out.println(instance + " " + existentialContext);
656 fillerFound = true;
657 break;
658 }
659 }
660 // if we do not find a filler, this means that putting \forall at
661 // that position is not meaningful
662 if(!fillerFound) {
663 return false;
664 }
665 }
666 }
667
668 // we do not want to have negations of sibling classes on the outermost level
669 // (they are expressed more naturally by saying that the siblings are disjoint,
670 // so it is reasonable not to include them in solutions)
671 // Set<Description> siblingClasses = reasoner.getClassHierarchy().getSiblingClasses(classToDescribe);
672 // for now, we just disable negation
673
674 return true;
675 }
676
677 // determine whether a named class occurs on the outermost level, i.e. property depth 0
678 // (it can still be at higher depth, e.g. if intersections are nested in unions)
679 private boolean occursOnFirstLevel(Description description, Description clazz) {
680 if(description instanceof NamedClass) {
681 if(description.equals(clazz)) {
682 return true;
683 }
684 }
685
686 if(description instanceof Restriction) {
687 return false;
688 }
689
690 for(Description child : description.getChildren()) {
691 if(occursOnFirstLevel(child, clazz)) {
692 return true;
693 }
694 }
695
696 return false;
697 }
698
699 // check whether the node is a potential solution candidate
700 private Description rewriteNode(OENode node) {
701 Description description = node.getDescription();
702 // minimize description (expensive!) - also performes some human friendly rewrites
703 Description niceDescription = minimizer.minimizeClone(description);
704 // replace \exists r.\top with \exists r.range(r) which is easier to read for humans
705 ConceptTransformation.replaceRange(niceDescription, reasoner);
706 return niceDescription;
707 }
708
709 private boolean terminationCriteriaSatisfied() {
710 return
711 stop ||
712 (configurator.getMaxClassDescriptionTests() != 0 && (expressionTests >= configurator.getMaxClassDescriptionTests())) ||
713 (configurator.getMaxExecutionTimeInSeconds() != 0 && ((System.nanoTime() - nanoStartTime) >= (configurator.getMaxExecutionTimeInSeconds()*1000000000l))) ||
714 (configurator.getTerminateOnNoiseReached() && (100*getCurrentlyBestAccuracy()>100-configurator.getNoisePercentage()));
715 }
716
717 private void reset() {
718 // set all values back to their default values (used for running
719 // the algorithm more than once)
720 nodes = new TreeSet<OENode>(heuristic);
721 descriptions = new TreeSet<Description>(new ConceptComparator());
722 bestEvaluatedDescriptions.getSet().clear();
723 expressionTests = 0;
724 }
725
726 @Override
727 public boolean isRunning() {
728 return isRunning;
729 }
730
731 @Override
732 public void stop() {
733 stop = true;
734 }
735
736 public OENode getSearchTreeRoot() {
737 return startNode;
738 }
739
740 // central function for printing description
741 private String descriptionToString(Description description) {
742 return description.toManchesterSyntaxString(baseURI, prefixes);
743 }
744
745 @SuppressWarnings("unused")
746 private String bestDescriptionToString() {
747 EvaluatedDescription best = bestEvaluatedDescriptions.getBest();
748 return best.getDescription().toManchesterSyntaxString(baseURI, prefixes) + " (accuracy: " + dfPercent.format(best.getAccuracy()) + ")";
749 }
750
751 private String getSolutionString() {
752 int current = 1;
753 String str = "";
754 for(EvaluatedDescription ed : bestEvaluatedDescriptions.getSet().descendingSet()) {
755 // temporary code
756 if(learningProblem instanceof PosNegLPStandard) {
757 str += current + ": " + descriptionToString(ed.getDescription()) + " (pred. acc.: " + dfPercent.format(((PosNegLPStandard)learningProblem).getPredAccuracyOrTooWeakExact(ed.getDescription(),1)) + ", F-measure: "+ dfPercent.format(((PosNegLPStandard)learningProblem).getFMeasureOrTooWeakExact(ed.getDescription(),1)) + ")\n";
758 } else {
759 str += current + ": " + descriptionToString(ed.getDescription()) + " " + dfPercent.format(ed.getAccuracy()) + "\n";
760 // System.out.println(ed);
761 }
762 current++;
763 }
764 return str;
765 }
766
767 // private String getTemporaryString(Description description) {
768 // return descriptionToString(description) + " (pred. acc.: " + dfPercent.format(((PosNegLPStandard)learningProblem).getPredAccuracyOrTooWeakExact(description,1)) + ", F-measure: "+ dfPercent.format(((PosNegLPStandard)learningProblem).getFMeasureOrTooWeakExact(description,1)) + ")";
769 // }
770
771 private void updateMinMaxHorizExp(OENode node) {
772 int newHorizExp = node.getHorizontalExpansion();
773
774 // update maximum value
775 maxHorizExp = Math.max(maxHorizExp, newHorizExp);
776
777 // we just expanded a node with minimum horizontal expansion;
778 // we need to check whether it was the last one
779 if(minHorizExp == newHorizExp - 1) {
780
781 // the best accuracy that a node can achieve
782 double scoreThreshold = heuristic.getNodeScore(node) + 1 - node.getAccuracy();
783
784 for(OENode n : nodes.descendingSet()) {
785 if(n != node) {
786 if(n.getHorizontalExpansion() == minHorizExp) {
787 // we can stop instantly when another node with min.
788 return;
789 }
790 if(heuristic.getNodeScore(n) < scoreThreshold) {
791 // we can stop traversing nodes when their score is too low
792 break;
793 }
794 }
795 }
796
797 // inc. minimum since we found no other node which also has min. horiz. exp.
798 minHorizExp++;
799
800 // System.out.println("minimum horizontal expansion is now " + minHorizExp);
801 }
802 }
803
804 public int getMaximumHorizontalExpansion() {
805 return maxHorizExp;
806 }
807
808 public int getMinimumHorizontalExpansion() {
809 return minHorizExp;
810 }
811
812 /**
813 * @return the expressionTests
814 */
815 public int getClassExpressionTests() {
816 return expressionTests;
817 }
818
819 }