001 /**
002 * Copyright (C) 2007-2011, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program. If not, see <http://www.gnu.org/licenses/>.
018 */
019
020 package org.dllearner.refinementoperators;
021
022 import java.util.Collection;
023 import java.util.Collections;
024 import java.util.HashMap;
025 import java.util.HashSet;
026 import java.util.Iterator;
027 import java.util.LinkedList;
028 import java.util.List;
029 import java.util.Map;
030 import java.util.Set;
031 import java.util.SortedSet;
032 import java.util.TreeMap;
033 import java.util.TreeSet;
034 import java.util.Map.Entry;
035
036 import org.apache.log4j.Logger;
037 import org.dllearner.core.AbstractReasonerComponent;
038 import org.dllearner.core.configurators.OCELConfigurator;
039 import org.dllearner.core.configurators.RefinementOperatorConfigurator;
040 import org.dllearner.core.options.CommonConfigOptions;
041 import org.dllearner.core.owl.BooleanValueRestriction;
042 import org.dllearner.core.owl.ClassHierarchy;
043 import org.dllearner.core.owl.Constant;
044 import org.dllearner.core.owl.DataRange;
045 import org.dllearner.core.owl.DatatypeProperty;
046 import org.dllearner.core.owl.DatatypeSomeRestriction;
047 import org.dllearner.core.owl.Description;
048 import org.dllearner.core.owl.DoubleMaxValue;
049 import org.dllearner.core.owl.DoubleMinValue;
050 import org.dllearner.core.owl.Individual;
051 import org.dllearner.core.owl.Intersection;
052 import org.dllearner.core.owl.NamedClass;
053 import org.dllearner.core.owl.Negation;
054 import org.dllearner.core.owl.Nothing;
055 import org.dllearner.core.owl.ObjectAllRestriction;
056 import org.dllearner.core.owl.ObjectCardinalityRestriction;
057 import org.dllearner.core.owl.ObjectMaxCardinalityRestriction;
058 import org.dllearner.core.owl.ObjectMinCardinalityRestriction;
059 import org.dllearner.core.owl.ObjectProperty;
060 import org.dllearner.core.owl.ObjectPropertyExpression;
061 import org.dllearner.core.owl.ObjectQuantorRestriction;
062 import org.dllearner.core.owl.ObjectSomeRestriction;
063 import org.dllearner.core.owl.ObjectValueRestriction;
064 import org.dllearner.core.owl.StringValueRestriction;
065 import org.dllearner.core.owl.Thing;
066 import org.dllearner.core.owl.Union;
067 import org.dllearner.utilities.Helper;
068 import org.dllearner.utilities.owl.ConceptComparator;
069 import org.dllearner.utilities.owl.ConceptTransformation;
070
071 /**
072 * A downward refinement operator, which makes use of domains
073 * and ranges of properties. The operator is currently under
074 * development. Its aim is to span a much "cleaner" and smaller search
075 * tree compared to RhoDown by omitting many class descriptions,
076 * which are obviously too weak, because they violate
077 * domain/range restrictions. Furthermore, it makes use of disjoint
078 * classes in the knowledge base.
079 *
080 * TODO Some of the code has moved to {@link Utility} in a modified
081 * form to make it accessible for implementations of other refinement
082 * operators. These utility methods may be completed and carefully
083 * integrated back later.
084 *
085 * @author Jens Lehmann
086 *
087 */
088 public class RhoDRDown extends RefinementOperatorAdapter {
089
090 private static Logger logger = Logger
091 .getLogger(RhoDRDown.class);
092
093 private AbstractReasonerComponent rs;
094
095 // hierarchies
096 private ClassHierarchy subHierarchy;
097
098 // domains and ranges
099 private Map<ObjectProperty,Description> opDomains = new TreeMap<ObjectProperty,Description>();
100 private Map<DatatypeProperty,Description> dpDomains = new TreeMap<DatatypeProperty,Description>();
101 private Map<ObjectProperty,Description> opRanges = new TreeMap<ObjectProperty,Description>();
102
103 // maximum number of fillers for eeach role
104 private Map<ObjectProperty,Integer> maxNrOfFillers = new TreeMap<ObjectProperty,Integer>();
105 // limit for cardinality restrictions (this makes sense if we e.g. have compounds with up to
106 // more than 200 atoms but we are only interested in atoms with certain characteristics and do
107 // not want something like e.g. >= 204 hasAtom.NOT Carbon-87; which blows up the search space
108 private int cardinalityLimit = 5;
109
110 // start concept (can be used to start from an arbitrary concept, needs
111 // to be Thing or NamedClass), note that when you use e.g. Compound as
112 // start class, then the algorithm should start the search with class
113 // Compound (and not with Thing), because otherwise concepts like
114 // NOT Carbon-87 will be returned which itself is not a subclass of Compound
115 private Description startClass = new Thing();
116
117 // the length of concepts of top refinements, the first values is
118 // for refinements of \rho_\top(\top), the second one for \rho_A(\top)
119 private int topRefinementsLength = 0;
120 private Map<NamedClass, Integer> topARefinementsLength = new TreeMap<NamedClass, Integer>();
121 // M is finite and this value is the maximum length of any value in M
122 private static int mMaxLength = 4;
123
124 // the sets M_\top and M_A
125 private Map<Integer,SortedSet<Description>> m = new TreeMap<Integer,SortedSet<Description>>();
126 private Map<NamedClass,Map<Integer,SortedSet<Description>>> mA = new TreeMap<NamedClass,Map<Integer,SortedSet<Description>>>();
127
128 // @see MathOperations.getCombos
129 private Map<Integer, List<List<Integer>>> combos = new HashMap<Integer, List<List<Integer>>>();
130
131 // refinements of the top concept ordered by length
132 private Map<Integer, SortedSet<Description>> topRefinements = new TreeMap<Integer, SortedSet<Description>>();
133 private Map<NamedClass,Map<Integer, SortedSet<Description>>> topARefinements = new TreeMap<NamedClass,Map<Integer, SortedSet<Description>>>();
134
135 // cumulated refinements of top (all from length one to the specified length)
136 private Map<Integer, TreeSet<Description>> topRefinementsCumulative = new HashMap<Integer, TreeSet<Description>>();
137 private Map<NamedClass,Map<Integer, TreeSet<Description>>> topARefinementsCumulative = new TreeMap<NamedClass,Map<Integer, TreeSet<Description>>>();
138
139 // app_A set of applicable properties for a given class (separate for
140 // object properties, boolean datatypes, and double datatypes)
141 private Map<NamedClass, Set<ObjectProperty>> appOP = new TreeMap<NamedClass, Set<ObjectProperty>>();
142 private Map<NamedClass, Set<DatatypeProperty>> appBD = new TreeMap<NamedClass, Set<DatatypeProperty>>();
143 private Map<NamedClass, Set<DatatypeProperty>> appDD = new TreeMap<NamedClass, Set<DatatypeProperty>>();
144
145 // most general applicable properties
146 private Map<NamedClass,Set<ObjectProperty>> mgr = new TreeMap<NamedClass,Set<ObjectProperty>>();
147 private Map<NamedClass,Set<DatatypeProperty>> mgbd = new TreeMap<NamedClass,Set<DatatypeProperty>>();
148 private Map<NamedClass,Set<DatatypeProperty>> mgdd = new TreeMap<NamedClass,Set<DatatypeProperty>>();
149 private Map<NamedClass,Set<DatatypeProperty>> mgsd = new TreeMap<NamedClass,Set<DatatypeProperty>>();
150
151 // concept comparator
152 private ConceptComparator conceptComparator = new ConceptComparator();
153
154 // splits for double datatype properties in ascening order
155 private Map<DatatypeProperty,List<Double>> splits = new TreeMap<DatatypeProperty,List<Double>>();
156 private int maxNrOfSplits = 10;
157
158 // data structure for a simple frequent pattern matching preprocessing phase
159 private int frequencyThreshold = CommonConfigOptions.valueFrequencyThresholdDefault;
160 private Map<ObjectProperty, Map<Individual, Integer>> valueFrequency = new HashMap<ObjectProperty, Map<Individual, Integer>>();
161 // data structure with identified frequent values
162 private Map<ObjectProperty, Set<Individual>> frequentValues = new HashMap<ObjectProperty, Set<Individual>>();
163 // frequent data values
164 private Map<DatatypeProperty, Set<Constant>> frequentDataValues = new HashMap<DatatypeProperty, Set<Constant>>();
165 private Map<DatatypeProperty, Map<Constant, Integer>> dataValueFrequency = new HashMap<DatatypeProperty, Map<Constant, Integer>>();
166 private boolean useDataHasValueConstructor = false;
167
168 // staistics
169 public long mComputationTimeNs = 0;
170 public long topComputationTimeNs = 0;
171
172 private boolean applyAllFilter = true;
173 private boolean applyExistsFilter = true;
174 private boolean useAllConstructor = true;
175 private boolean useExistsConstructor = true;
176 private boolean useHasValueConstructor = false;
177 private boolean useCardinalityRestrictions = true;
178 private boolean useNegation = true;
179 private boolean useBooleanDatatypes = true;
180 private boolean useDoubleDatatypes = true;
181 @SuppressWarnings("unused")
182 private boolean useStringDatatypes = false;
183 private boolean disjointChecks = true;
184 private boolean instanceBasedDisjoints = true;
185
186 private boolean dropDisjuncts = false;
187
188 // caches for reasoner queries
189 private Map<Description,Map<Description,Boolean>> cachedDisjoints = new TreeMap<Description,Map<Description,Boolean>>(conceptComparator);
190
191 // private Map<NamedClass,Map<NamedClass,Boolean>> abDisjoint = new TreeMap<NamedClass,Map<NamedClass,Boolean>>();
192 // private Map<NamedClass,Map<NamedClass,Boolean>> notABDisjoint = new TreeMap<NamedClass,Map<NamedClass,Boolean>>();
193 // private Map<NamedClass,Map<NamedClass,Boolean>> notABMeaningful = new TreeMap<NamedClass,Map<NamedClass,Boolean>>();
194
195 public RhoDRDown(AbstractReasonerComponent reasoningService) {
196 // this(reasoningService, reasoningService.getClassHierarchy(), null, true, true, true, true, true, 3, true, true, true, true, null);
197 this.rs = reasoningService;
198 this.subHierarchy = rs.getClassHierarchy();
199 init();
200 }
201
202 public RhoDRDown(AbstractReasonerComponent reasoner, ClassHierarchy subHierarchy, Description startClass, RefinementOperatorConfigurator configurator) {
203 this.rs = reasoner;
204 this.subHierarchy = subHierarchy;
205 this.startClass = startClass;
206 useAllConstructor = configurator.getUseAllConstructor();
207 useExistsConstructor = configurator.getUseExistsConstructor();
208 useHasValueConstructor = configurator.getUseHasValueConstructor();
209 useDataHasValueConstructor = configurator.getUseDataHasValueConstructor();
210 frequencyThreshold = configurator.getValueFrequencyThreshold();
211 useCardinalityRestrictions = configurator.getUseCardinalityRestrictions();
212 cardinalityLimit = configurator.getCardinalityLimit();
213 useNegation = configurator.getUseNegation();
214 useBooleanDatatypes = configurator.getUseBooleanDatatypes();
215 useDoubleDatatypes = configurator.getUseDoubleDatatypes();
216 useStringDatatypes = configurator.getUseStringDatatypes();
217 init();
218 }
219
220 // TODO constructor which takes a RhoDRDownConfigurator object;
221 // this should be an interface implemented e.g. by ExampleBasedROLComponentConfigurator;
222 // the goal is to use the configurator system while still being flexible enough to
223 // use one refinement operator in several learning algorithms
224 public RhoDRDown(AbstractReasonerComponent reasoningService, ClassHierarchy subHierarchy, OCELConfigurator configurator, boolean applyAllFilter, boolean applyExistsFilter, boolean useAllConstructor,
225 boolean useExistsConstructor, boolean useHasValueConstructor, int valueFrequencyThreshold, boolean useCardinalityRestrictions,boolean useNegation, boolean useBooleanDatatypes, boolean useDoubleDatatypes, NamedClass startClass) {
226 this.rs = reasoningService;
227 this.subHierarchy = subHierarchy;
228 this.applyAllFilter = applyAllFilter;
229 this.applyExistsFilter = applyExistsFilter;
230 this.useAllConstructor = useAllConstructor;
231 this.useExistsConstructor = useExistsConstructor;
232 this.useHasValueConstructor = useHasValueConstructor;
233 this.frequencyThreshold = valueFrequencyThreshold;
234 this.useCardinalityRestrictions = useCardinalityRestrictions;
235 cardinalityLimit = configurator.getCardinalityLimit();
236 this.useDataHasValueConstructor = configurator.getUseDataHasValueConstructor();
237 this.useNegation = useNegation;
238 this.useBooleanDatatypes = useBooleanDatatypes;
239 this.useDoubleDatatypes = useDoubleDatatypes;
240 useStringDatatypes = configurator.getUseStringDatatypes();
241 instanceBasedDisjoints = configurator.getInstanceBasedDisjoints();
242 if(startClass != null) {
243 this.startClass = startClass;
244 }
245 init();
246 }
247
248 // subHierarchy = rs.getClassHierarchy();
249 public void init() {
250 // query reasoner for domains and ranges
251 // (because they are used often in the operator)
252 for(ObjectProperty op : rs.getObjectProperties()) {
253 opDomains.put(op, rs.getDomain(op));
254 opRanges.put(op, rs.getRange(op));
255
256 if(useHasValueConstructor) {
257 // init
258 Map<Individual, Integer> opMap = new TreeMap<Individual, Integer>();
259 valueFrequency.put(op, opMap);
260
261 // sets ordered by corresponding individual (which we ignore)
262 Collection<SortedSet<Individual>> fillerSets = rs.getPropertyMembers(op).values();
263 for(SortedSet<Individual> fillerSet : fillerSets) {
264 for(Individual i : fillerSet) {
265 // System.out.println("op " + op + " i " + i);
266 Integer value = opMap.get(i);
267
268 if(value != null) {
269 opMap.put(i, value+1);
270 } else {
271 opMap.put(i, 1);
272 }
273 }
274 }
275
276 // keep only frequent patterns
277 Set<Individual> frequentInds = new TreeSet<Individual>();
278 for(Individual i : opMap.keySet()) {
279 if(opMap.get(i) >= frequencyThreshold) {
280 frequentInds.add(i);
281 // break;
282 }
283 }
284 frequentValues.put(op, frequentInds);
285
286 }
287
288 }
289
290 for(DatatypeProperty dp : rs.getDatatypeProperties()) {
291 dpDomains.put(dp, rs.getDomain(dp));
292
293 if(useDataHasValueConstructor) {
294 Map<Constant, Integer> dpMap = new TreeMap<Constant, Integer>();
295 dataValueFrequency.put(dp, dpMap);
296
297 // sets ordered by corresponding individual (which we ignore)
298 Collection<SortedSet<Constant>> fillerSets = rs.getDatatypeMembers(dp).values();
299 for(SortedSet<Constant> fillerSet : fillerSets) {
300 for(Constant i : fillerSet) {
301 // System.out.println("op " + op + " i " + i);
302 Integer value = dpMap.get(i);
303
304 if(value != null) {
305 dpMap.put(i, value+1);
306 } else {
307 dpMap.put(i, 1);
308 }
309 }
310 }
311
312 // keep only frequent patterns
313 Set<Constant> frequentInds = new TreeSet<Constant>();
314 for(Constant i : dpMap.keySet()) {
315 if(dpMap.get(i) >= frequencyThreshold) {
316 logger.trace("adding value "+i+", because "+dpMap.get(i) +">="+frequencyThreshold);
317 frequentInds.add(i);
318 }
319 }
320 frequentDataValues.put(dp, frequentInds);
321 }
322 }
323
324 // we do not need the temporary set anymore and let the
325 // garbage collector take care of it
326 valueFrequency = null;
327 dataValueFrequency = null;
328
329 // compute splits for double datatype properties
330 for(DatatypeProperty dp : rs.getDoubleDatatypeProperties()) {
331 computeSplits(dp);
332 }
333
334 // determine the maximum number of fillers for each role
335 // (up to a specified cardinality maximum)
336 if(useCardinalityRestrictions) {
337 for(ObjectProperty op : rs.getObjectProperties()) {
338 int maxFillers = 0;
339 Map<Individual,SortedSet<Individual>> opMembers = rs.getPropertyMembers(op);
340 for(SortedSet<Individual> inds : opMembers.values()) {
341 if(inds.size()>maxFillers)
342 maxFillers = inds.size();
343 if(maxFillers >= cardinalityLimit) {
344 maxFillers = cardinalityLimit;
345 break;
346 }
347 }
348 maxNrOfFillers.put(op, maxFillers);
349 }
350 }
351
352 /*
353 String conceptStr = "(\"http://dl-learner.org/carcinogenesis#Compound\" AND (>= 2 \"http://dl-learner.org/carcinogenesis#hasStructure\".\"http://dl-learner.org/carcinogenesis#Ar_halide\" OR ((\"http://dl-learner.org/carcinogenesis#amesTestPositive\" IS TRUE) AND >= 5 \"http://dl-learner.org/carcinogenesis#hasBond\". TOP)))";
354 try {
355 NamedClass struc = new NamedClass("http://dl-learner.org/carcinogenesis#Compound");
356 Description d = KBParser.parseConcept(conceptStr);
357 SortedSet<Description> ds = (SortedSet<Description>) refine(d,15,null,struc);
358 System.out.println(ds);
359
360 Individual i = new Individual("http://dl-learner.org/carcinogenesis#d101");
361 rs.instanceCheck(ds.first(), i);
362
363 } catch (ParseException e) {
364 // TODO Auto-generated catch block
365 e.printStackTrace();
366 }
367 System.exit(0);
368 */
369
370 /*
371 NamedClass struc = new NamedClass("http://dl-learner.org/carcinogenesis#Atom");
372 ObjectProperty op = new ObjectProperty("http://dl-learner.org/carcinogenesis#hasAtom");
373 ObjectSomeRestriction oar = new ObjectSomeRestriction(op,Thing.instance);
374
375 Set<Description> ds = refine(Thing.instance,3,null,struc);
376 // Set<Description> improper = new HashSet<Description>();
377 for(Description d : ds) {
378 // if(rs.subsumes(d, struc)) {
379 // improper.add(d);
380 System.out.println(d);
381 // }
382 }
383 System.out.println(ds.size());
384 // System.out.println(improper.size());
385 System.exit(0);
386 */
387
388 }
389
390 /* (non-Javadoc)
391 * @see org.dllearner.algorithms.refinement.RefinementOperator#refine(org.dllearner.core.owl.Description)
392 */
393 @Override
394 public Set<Description> refine(Description concept) {
395 throw new RuntimeException();
396 }
397
398 @Override
399 public Set<Description> refine(Description description, int maxLength) {
400 // check that maxLength is valid
401 if(maxLength < description.getLength()) {
402 throw new Error("length has to be at least description length (description: " + description + ", max length: " + maxLength + ")");
403 }
404 return refine(description, maxLength, null, startClass);
405 }
406
407 /* (non-Javadoc)
408 * @see org.dllearner.algorithms.refinement.RefinementOperator#refine(org.dllearner.core.owl.Description, int, java.util.List)
409 */
410 @Override
411 public Set<Description> refine(Description description, int maxLength,
412 List<Description> knownRefinements) {
413 return refine(description, maxLength, knownRefinements, startClass);
414 }
415
416 @SuppressWarnings({"unchecked"})
417 public Set<Description> refine(Description description, int maxLength,
418 List<Description> knownRefinements, Description currDomain) {
419
420 // System.out.println("|- " + description + " " + currDomain + " " + maxLength);
421
422 // actions needing to be performed if this is the first time the
423 // current domain is used
424 if(!(currDomain instanceof Thing) && !topARefinementsLength.containsKey(currDomain))
425 topARefinementsLength.put((NamedClass)currDomain, 0);
426
427 // check whether using list or set makes more sense
428 // here; and whether HashSet or TreeSet should be used
429 // => TreeSet because duplicates are possible
430 Set<Description> refinements = new TreeSet<Description>(conceptComparator);
431
432 // used as temporary variable
433 Set<Description> tmp = new HashSet<Description>();
434
435 if(description instanceof Thing) {
436 // extends top refinements if necessary
437 if(currDomain instanceof Thing) {
438 if(maxLength>topRefinementsLength)
439 computeTopRefinements(maxLength);
440 refinements = (TreeSet<Description>) topRefinementsCumulative.get(maxLength).clone();
441 } else {
442 if(maxLength>topARefinementsLength.get(currDomain)) {
443 computeTopRefinements(maxLength, (NamedClass) currDomain);
444 }
445 refinements = (TreeSet<Description>) topARefinementsCumulative.get(currDomain).get(maxLength).clone();
446 }
447 // refinements.addAll(subHierarchy.getMoreSpecialConcepts(description));
448 } else if(description instanceof Nothing) {
449 // cannot be further refined
450 } else if(description instanceof NamedClass) {
451 refinements.addAll(subHierarchy.getSubClasses(description));
452 refinements.remove(new Nothing());
453 } else if (description instanceof Negation && description.getChild(0) instanceof NamedClass) {
454
455 tmp = subHierarchy.getSuperClasses(description.getChild(0));
456
457 for(Description c : tmp) {
458 if(!(c instanceof Thing))
459 refinements.add(new Negation(c));
460 }
461
462 } else if (description instanceof Intersection) {
463
464 // refine one of the elements
465 for(Description child : description.getChildren()) {
466
467 // refine the child; the new max length is the current max length minus
468 // the currently considered concept plus the length of the child
469 // TODO: add better explanation
470 tmp = refine(child, maxLength - description.getLength()+child.getLength(),null,currDomain);
471
472 // create new intersection
473 for(Description c : tmp) {
474 List<Description> newChildren = (List<Description>)((LinkedList<Description>)description.getChildren()).clone();
475 newChildren.add(c);
476 newChildren.remove(child);
477 Intersection mc = new Intersection(newChildren);
478
479 // clean concept and transform it to ordered negation normal form
480 // (non-recursive variant because only depth 1 was modified)
481 ConceptTransformation.cleanConceptNonRecursive(mc);
482 ConceptTransformation.transformToOrderedNegationNormalFormNonRecursive(mc, conceptComparator);
483
484 // check whether the intersection is OK (sanity checks), then add it
485 if(checkIntersection(mc))
486 refinements.add(mc);
487 }
488
489 }
490
491 } else if (description instanceof Union) {
492 // refine one of the elements
493 for(Description child : description.getChildren()) {
494
495 // System.out.println("union child: " + child + " " + maxLength + " " + description.getLength() + " " + child.getLength());
496
497 // refine child
498 tmp = refine(child, maxLength - description.getLength()+child.getLength(),null,currDomain);
499
500 // construct intersection (see above)
501 for(Description c : tmp) {
502 List<Description> newChildren = new LinkedList<Description>(description.getChildren());
503 newChildren.remove(child);
504 newChildren.add(c);
505 Union md = new Union(newChildren);
506
507 // transform to ordered negation normal form
508 ConceptTransformation.transformToOrderedNegationNormalFormNonRecursive(md, conceptComparator);
509 // note that we do not have to call clean here because a disjunction will
510 // never be nested in another disjunction in this operator
511
512 refinements.add(md);
513 }
514
515 }
516
517 // if enabled, we can remove elements of the disjunction
518 if(dropDisjuncts) {
519 // A1 OR A2 => {A1,A2}
520 if(description.getChildren().size() == 2) {
521 refinements.add(description.getChild(0));
522 refinements.add(description.getChild(1));
523 } else {
524 // copy children list and remove a different element in each turn
525 for(int i=0; i<description.getChildren().size(); i++) {
526 List<Description> newChildren = new LinkedList<Description>(description.getChildren());
527 newChildren.remove(i);
528 Union md = new Union(newChildren);
529 refinements.add(md);
530 }
531 }
532 }
533
534 } else if (description instanceof ObjectSomeRestriction) {
535 ObjectPropertyExpression role = ((ObjectQuantorRestriction)description).getRole();
536 Description range = opRanges.get(role);
537
538 // rule 1: EXISTS r.D => EXISTS r.E
539 tmp = refine(description.getChild(0), maxLength-2, null, range);
540
541 for(Description c : tmp)
542 refinements.add(new ObjectSomeRestriction(((ObjectQuantorRestriction)description).getRole(),c));
543
544 // rule 2: EXISTS r.D => EXISTS s.D or EXISTS r^-1.D => EXISTS s^-1.D
545 // currently inverse roles are not supported
546 ObjectProperty ar = (ObjectProperty) role;
547 Set<ObjectProperty> moreSpecialRoles = rs.getSubProperties(ar);
548 for(ObjectProperty moreSpecialRole : moreSpecialRoles)
549 refinements.add(new ObjectSomeRestriction(moreSpecialRole, description.getChild(0)));
550
551 // rule 3: EXISTS r.D => >= 2 r.D
552 // (length increases by 1 so we have to check whether max length is sufficient)
553 if(useCardinalityRestrictions) {
554 if(maxLength > description.getLength() && maxNrOfFillers.get(ar)>1) {
555 ObjectMinCardinalityRestriction min = new ObjectMinCardinalityRestriction(2,role,description.getChild(0));
556 refinements.add(min);
557 }
558 }
559
560 // rule 4: EXISTS r.TOP => EXISTS r.{value}
561 if(useHasValueConstructor && description.getChild(0) instanceof Thing) {
562 // watch out for frequent patterns
563 Set<Individual> frequentInds = frequentValues.get(role);
564 if(frequentInds != null) {
565 for(Individual ind : frequentInds) {
566 ObjectValueRestriction ovr = new ObjectValueRestriction((ObjectProperty)role, ind);
567 refinements.add(ovr);
568 }
569 }
570 }
571
572 } else if (description instanceof ObjectAllRestriction) {
573 ObjectPropertyExpression role = ((ObjectQuantorRestriction)description).getRole();
574 Description range = opRanges.get(role);
575
576 // rule 1: ALL r.D => ALL r.E
577 tmp = refine(description.getChild(0), maxLength-2, null, range);
578
579 for(Description c : tmp) {
580 refinements.add(new ObjectAllRestriction(((ObjectQuantorRestriction)description).getRole(),c));
581 }
582
583 // rule 2: ALL r.D => ALL r.BOTTOM if D is a most specific atomic concept
584 if(description.getChild(0) instanceof NamedClass && tmp.size()==0) {
585 refinements.add(new ObjectAllRestriction(((ObjectQuantorRestriction)description).getRole(),new Nothing()));
586 }
587
588 // rule 3: ALL r.D => ALL s.D or ALL r^-1.D => ALL s^-1.D
589 // currently inverse roles are not supported
590 ObjectProperty ar = (ObjectProperty) role;
591 Set<ObjectProperty> moreSpecialRoles = rs.getSubProperties(ar);
592 for(ObjectProperty moreSpecialRole : moreSpecialRoles) {
593 refinements.add(new ObjectAllRestriction(moreSpecialRole, description.getChild(0)));
594 }
595
596 // rule 4: ALL r.D => <= (maxFillers-1) r.D
597 // (length increases by 1 so we have to check whether max length is sufficient)
598 // => commented out because this is acutally not a downward refinement
599 // if(useCardinalityRestrictions) {
600 // if(maxLength > description.getLength() && maxNrOfFillers.get(ar)>1) {
601 // ObjectMaxCardinalityRestriction max = new ObjectMaxCardinalityRestriction(maxNrOfFillers.get(ar)-1,role,description.getChild(0));
602 // refinements.add(max);
603 // }
604 // }
605 } else if (description instanceof ObjectCardinalityRestriction) {
606 ObjectPropertyExpression role = ((ObjectCardinalityRestriction)description).getRole();
607 Description range = opRanges.get(role);
608 int number = ((ObjectCardinalityRestriction)description).getCardinality();
609 if(description instanceof ObjectMaxCardinalityRestriction) {
610 // rule 1: <= x r.C => <= x r.D
611 tmp = refine(description.getChild(0), maxLength-3, null, range);
612
613 for(Description d : tmp) {
614 refinements.add(new ObjectMaxCardinalityRestriction(number,role,d));
615 }
616
617 // rule 2: <= x r.C => <= (x-1) r.C
618 ObjectMaxCardinalityRestriction max = (ObjectMaxCardinalityRestriction) description;
619 // int number = max.getNumber();
620 if(number > 1)
621 refinements.add(new ObjectMaxCardinalityRestriction(number-1,max.getRole(),max.getChild(0)));
622
623 } else if(description instanceof ObjectMinCardinalityRestriction) {
624 tmp = refine(description.getChild(0), maxLength-3, null, range);
625
626 for(Description d : tmp) {
627 refinements.add(new ObjectMinCardinalityRestriction(number,role,d));
628 }
629
630 // >= x r.C => >= (x+1) r.C
631 ObjectMinCardinalityRestriction min = (ObjectMinCardinalityRestriction) description;
632 // int number = min.getNumber();
633 if(number < maxNrOfFillers.get(min.getRole()))
634 refinements.add(new ObjectMinCardinalityRestriction(number+1,min.getRole(),min.getChild(0)));
635 }
636 } else if (description instanceof DatatypeSomeRestriction) {
637
638 DatatypeSomeRestriction dsr = (DatatypeSomeRestriction) description;
639 DatatypeProperty dp = (DatatypeProperty) dsr.getRestrictedPropertyExpression();
640 DataRange dr = dsr.getDataRange();
641 if(dr instanceof DoubleMaxValue) {
642 double value = ((DoubleMaxValue)dr).getValue();
643 // find out which split value was used
644 int splitIndex = splits.get(dp).lastIndexOf(value);
645 if(splitIndex == -1)
646 throw new Error("split error");
647 int newSplitIndex = splitIndex - 1;
648 if(newSplitIndex >= 0) {
649 DoubleMaxValue max = new DoubleMaxValue(splits.get(dp).get(newSplitIndex));
650 DatatypeSomeRestriction newDSR = new DatatypeSomeRestriction(dp,max);
651 refinements.add(newDSR);
652 // System.out.println(description + " => " + newDSR);
653 }
654 } else if(dr instanceof DoubleMinValue) {
655 double value = ((DoubleMinValue)dr).getValue();
656 // find out which split value was used
657 int splitIndex = splits.get(dp).lastIndexOf(value);
658 if(splitIndex == -1)
659 throw new Error("split error");
660 int newSplitIndex = splitIndex + 1;
661 if(newSplitIndex < splits.get(dp).size()) {
662 DoubleMinValue min = new DoubleMinValue(splits.get(dp).get(newSplitIndex));
663 DatatypeSomeRestriction newDSR = new DatatypeSomeRestriction(dp,min);
664 refinements.add(newDSR);
665 }
666 }
667 } else if (description instanceof StringValueRestriction) {
668 StringValueRestriction svr = (StringValueRestriction) description;
669 DatatypeProperty dp = svr.getRestrictedPropertyExpression();
670 Set<DatatypeProperty> subDPs = rs.getSubProperties(dp);
671 for(DatatypeProperty subDP : subDPs) {
672 refinements.add(new StringValueRestriction(subDP, svr.getStringValue()));
673 }
674 }
675
676 // if a refinement is not Bottom, Top, ALL r.Bottom a refinement of top can be appended
677 if(!(description instanceof Thing) && !(description instanceof Nothing)
678 && !(description instanceof ObjectAllRestriction && description.getChild(0) instanceof Nothing)) {
679 // -1 because of the AND symbol which is appended
680 int topRefLength = maxLength - description.getLength() - 1;
681
682 // maybe we have to compute new top refinements here
683 if(currDomain instanceof Thing) {
684 if(topRefLength > topRefinementsLength)
685 computeTopRefinements(topRefLength);
686 } else if(topRefLength > topARefinementsLength.get(currDomain))
687 computeTopRefinements(topRefLength,(NamedClass)currDomain);
688
689 if(topRefLength>0) {
690 Set<Description> topRefs;
691 if(currDomain instanceof Thing)
692 topRefs = topRefinementsCumulative.get(topRefLength);
693 else
694 topRefs = topARefinementsCumulative.get(currDomain).get(topRefLength);
695
696 for(Description c : topRefs) {
697 // true if refinement should be skipped due to filters,
698 // false otherwise
699 boolean skip = false;
700
701 // if a refinement of of the form ALL r, we check whether ALL r
702 // does not occur already
703 if(applyAllFilter) {
704 if(c instanceof ObjectAllRestriction) {
705 for(Description child : description.getChildren()) {
706 if(child instanceof ObjectAllRestriction) {
707 ObjectPropertyExpression r1 = ((ObjectAllRestriction)c).getRole();
708 ObjectPropertyExpression r2 = ((ObjectAllRestriction)child).getRole();
709 if(r1.toString().equals(r2.toString()))
710 skip = true;
711 }
712 }
713 }
714 }
715
716 // check for double datatype properties
717 /*
718 if(c instanceof DatatypeSomeRestriction &&
719 description instanceof DatatypeSomeRestriction) {
720 DataRange dr = ((DatatypeSomeRestriction)c).getDataRange();
721 DataRange dr2 = ((DatatypeSomeRestriction)description).getDataRange();
722 // it does not make sense to have statements like height >= 1.8 AND height >= 1.7
723 if((dr instanceof DoubleMaxValue && dr2 instanceof DoubleMaxValue)
724 ||(dr instanceof DoubleMinValue && dr2 instanceof DoubleMinValue))
725 skip = true;
726 }*/
727
728 // perform a disjointness check when named classes are added;
729 // this can avoid a lot of superfluous computation in the algorithm e.g.
730 // when A1 looks good, so many refinements of the form (A1 OR (A2 AND A3))
731 // are generated which are all equal to A1 due to disjointness of A2 and A3
732 if(disjointChecks && c instanceof NamedClass && description instanceof NamedClass && isDisjoint(description, c)) {
733 skip = true;
734 // System.out.println(c + " ignored when refining " + description);
735 }
736
737 if(!skip) {
738 Intersection mc = new Intersection();
739 mc.addChild(description);
740 mc.addChild(c);
741
742 // clean and transform to ordered negation normal form
743 ConceptTransformation.cleanConceptNonRecursive(mc);
744 ConceptTransformation.transformToOrderedNegationNormalFormNonRecursive(mc, conceptComparator);
745
746 // last check before intersection is added
747 if(checkIntersection(mc))
748 refinements.add(mc);
749 }
750 }
751 }
752 }
753
754 // for(Description refinement : refinements) {
755 // if((refinement instanceof Intersection || refinement instanceof Union) && refinement.getChildren().size()<2) {
756 // System.out.println(description + " " + refinement + " " + currDomain + " " + maxLength);
757 // System.exit(0);
758 // }
759 // }
760
761 return refinements;
762 }
763
764 // when a child of an intersection is refined and reintegrated into the
765 // intersection, we can perform some sanity checks;
766 // method returns true if everything is OK and false otherwise
767 // TODO: can be implemented more efficiently if the newly added child
768 // is given as parameter
769 public static boolean checkIntersection(Intersection intersection) {
770 // rule 1: max. restrictions at most once
771 boolean maxDoubleOccurence = false;
772 // rule 2: min restrictions at most once
773 boolean minDoubleOccurence = false;
774 // rule 3: no double occurences of boolean datatypes
775 TreeSet<DatatypeProperty> occuredDP = new TreeSet<DatatypeProperty>();
776 // rule 4: no double occurences of hasValue restrictions
777 TreeSet<ObjectProperty> occuredVR = new TreeSet<ObjectProperty>();
778
779 for(Description child : intersection.getChildren()) {
780 if(child instanceof DatatypeSomeRestriction) {
781 DataRange dr = ((DatatypeSomeRestriction)child).getDataRange();
782 if(dr instanceof DoubleMaxValue) {
783 if(maxDoubleOccurence)
784 return false;
785 else
786 maxDoubleOccurence = true;
787 } else if(dr instanceof DoubleMinValue) {
788 if(minDoubleOccurence)
789 return false;
790 else
791 minDoubleOccurence = true;
792 }
793 } else if(child instanceof BooleanValueRestriction) {
794 DatatypeProperty dp = (DatatypeProperty) ((BooleanValueRestriction)child).getRestrictedPropertyExpression();
795 // System.out.println("dp: " + dp);
796 // return false if the boolean property exists already
797 if(!occuredDP.add(dp))
798 return false;
799 } else if(child instanceof ObjectValueRestriction) {
800 ObjectProperty op = (ObjectProperty) ((ObjectValueRestriction)child).getRestrictedPropertyExpression();
801 if(!occuredVR.add(op))
802 return false;
803 }
804 // System.out.println(child.getClass());
805 }
806 return true;
807 }
808
809 /**
810 * By default, the operator does not specialize e.g. (A or B) to A, because
811 * it only guarantees weak completeness. Under certain circumstances, e.g.
812 * refinement of a fixed given concept, it can be useful to allow such
813 * refinements, which can be done by passing the parameter true to this method.
814 * @param dropDisjuncts Whether to remove disjuncts in refinement process.
815 */
816 public void setDropDisjuncts(boolean dropDisjuncts) {
817 this.dropDisjuncts = dropDisjuncts;
818 }
819
820 private void computeTopRefinements(int maxLength) {
821 computeTopRefinements(maxLength, null);
822 }
823
824 private void computeTopRefinements(int maxLength, NamedClass domain) {
825 long topComputationTimeStartNs = System.nanoTime();
826
827 if(domain == null && m.size() == 0)
828 computeM();
829
830 if(domain != null && !mA.containsKey(domain))
831 computeM(domain);
832
833 int refinementsLength;
834
835 if(domain == null) {
836 refinementsLength = topRefinementsLength;
837 } else {
838 if(!topARefinementsLength.containsKey(domain))
839 topARefinementsLength.put(domain,0);
840
841 refinementsLength = topARefinementsLength.get(domain);
842 }
843
844 // compute all possible combinations of the disjunction
845 for(int i = refinementsLength+1; i <= maxLength; i++) {
846 combos.put(i,MathOperations.getCombos(i, mMaxLength));
847
848 // initialise the refinements with empty sets
849 if(domain == null) {
850 topRefinements.put(i, new TreeSet<Description>(conceptComparator));
851 } else {
852 if(!topARefinements.containsKey(domain))
853 topARefinements.put(domain, new TreeMap<Integer,SortedSet<Description>>());
854 topARefinements.get(domain).put(i, new TreeSet<Description>(conceptComparator));
855 }
856
857 for(List<Integer> combo : combos.get(i)) {
858
859 // combination is a single number => try to use M
860 if(combo.size()==1) {
861 // note we cannot use "put" instead of "addAll" because there
862 // can be several combos for one length
863 if(domain == null)
864 topRefinements.get(i).addAll(m.get(i));
865 else
866 topARefinements.get(domain).get(i).addAll(mA.get(domain).get(i));
867 // combinations has several numbers => generate disjunct
868 } else {
869
870 // check whether the combination makes sense, i.e. whether
871 // all lengths mentioned in it have corresponding elements
872 // e.g. when negation is deactivated there won't be elements of
873 // length 2 in M
874 boolean validCombo = true;
875 for(Integer j : combo) {
876 if((domain == null && m.get(j).size()==0) ||
877 (domain != null && mA.get(domain).get(j).size()==0))
878 validCombo = false;
879 }
880
881 if(validCombo) {
882
883 SortedSet<Union> baseSet = new TreeSet<Union>(conceptComparator);
884 for(Integer j : combo) {
885 if(domain == null)
886 baseSet = MathOperations.incCrossProduct(baseSet, m.get(j));
887 else
888 baseSet = MathOperations.incCrossProduct(baseSet, mA.get(domain).get(j));
889 }
890
891 // convert all concepts in ordered negation normal form
892 for(Description concept : baseSet) {
893 ConceptTransformation.transformToOrderedForm(concept, conceptComparator);
894 }
895
896 // apply the exists filter (throwing out all refinements with
897 // double \exists r for any r)
898 // TODO: similar filtering can be done for boolean datatype
899 // properties
900 if(applyExistsFilter) {
901 Iterator<Union> it = baseSet.iterator();
902 while(it.hasNext()) {
903 if(MathOperations.containsDoubleObjectSomeRestriction(it.next()))
904 it.remove();
905 }
906 }
907
908 // add computed refinements
909 if(domain == null)
910 topRefinements.get(i).addAll(baseSet);
911 else
912 topARefinements.get(domain).get(i).addAll(baseSet);
913
914 }
915 }
916 }
917
918 // create cumulative versions of refinements such that they can
919 // be accessed easily
920 TreeSet<Description> cumulativeRefinements = new TreeSet<Description>(conceptComparator);
921 for(int j=1; j<=i; j++) {
922 if(domain == null) {
923 cumulativeRefinements.addAll(topRefinements.get(j));
924 } else {
925 cumulativeRefinements.addAll(topARefinements.get(domain).get(j));
926 }
927 }
928
929 if(domain == null) {
930 topRefinementsCumulative.put(i, cumulativeRefinements);
931 } else {
932 if(!topARefinementsCumulative.containsKey(domain))
933 topARefinementsCumulative.put(domain, new TreeMap<Integer, TreeSet<Description>>());
934 topARefinementsCumulative.get(domain).put(i, cumulativeRefinements);
935 }
936 }
937
938 // register new top refinements length
939 if(domain == null)
940 topRefinementsLength = maxLength;
941 else
942 topARefinementsLength.put(domain,maxLength);
943
944 topComputationTimeNs += System.nanoTime() - topComputationTimeStartNs;
945 }
946
947 // compute M_\top
948 private void computeM() {
949 long mComputationTimeStartNs = System.nanoTime();
950
951 // initialise all possible lengths (1 to 3)
952 for(int i=1; i<=mMaxLength; i++) {
953 m.put(i, new TreeSet<Description>(conceptComparator));
954 }
955
956 SortedSet<Description> m1 = subHierarchy.getSubClasses(new Thing());
957 m.put(1,m1);
958
959 SortedSet<Description> m2 = new TreeSet<Description>(conceptComparator);
960 if(useNegation) {
961 Set<Description> m2tmp = subHierarchy.getSuperClasses(new Nothing());
962 for(Description c : m2tmp) {
963 if(!(c instanceof Thing)) {
964 m2.add(new Negation(c));
965 }
966 }
967 }
968
969 // boolean datatypes, e.g. testPositive = true
970 if(useBooleanDatatypes) {
971 Set<DatatypeProperty> booleanDPs = rs.getBooleanDatatypeProperties();
972 for(DatatypeProperty dp : booleanDPs) {
973 m2.add(new BooleanValueRestriction(dp,true));
974 m2.add(new BooleanValueRestriction(dp,false));
975 }
976 }
977 m.put(2,m2);
978
979 SortedSet<Description> m3 = new TreeSet<Description>(conceptComparator);
980 if(useExistsConstructor) {
981 // only uses most general roles
982 for(ObjectProperty r : rs.getMostGeneralProperties()) {
983 m3.add(new ObjectSomeRestriction(r, new Thing()));
984 }
985 }
986
987 if(useAllConstructor) {
988 // we allow \forall r.\top here because otherwise the operator
989 // becomes too difficult to manage due to dependencies between
990 // M_A and M_A' where A'=ran(r)
991 for(ObjectProperty r : rs.getMostGeneralProperties()) {
992 m3.add(new ObjectAllRestriction(r, new Thing()));
993 }
994 }
995
996 if(useDoubleDatatypes) {
997 Set<DatatypeProperty> doubleDPs = rs.getDoubleDatatypeProperties();
998 for(DatatypeProperty dp : doubleDPs) {
999 if(splits.get(dp).size()>0) {
1000 DoubleMaxValue max = new DoubleMaxValue(splits.get(dp).get(splits.get(dp).size()-1));
1001 DoubleMinValue min = new DoubleMinValue(splits.get(dp).get(0));
1002 m3.add(new DatatypeSomeRestriction(dp,max));
1003 m3.add(new DatatypeSomeRestriction(dp,min));
1004 }
1005 }
1006 }
1007
1008 if(useDataHasValueConstructor) {
1009 Set<DatatypeProperty> stringDPs = rs.getStringDatatypeProperties();
1010 for(DatatypeProperty dp : stringDPs) {
1011 // loop over frequent values
1012 Set<Constant> freqValues = frequentDataValues.get(dp);
1013 for(Constant c : freqValues) {
1014 m3.add(new StringValueRestriction(dp, c.getLiteral()));
1015 }
1016 }
1017 }
1018
1019 m.put(3,m3);
1020
1021 SortedSet<Description> m4 = new TreeSet<Description>(conceptComparator);
1022 if(useCardinalityRestrictions) {
1023 for(ObjectProperty r : rs.getMostGeneralProperties()) {
1024 int maxFillers = maxNrOfFillers.get(r);
1025 // zero fillers: <= -1 r.C does not make sense
1026 // one filler: <= 0 r.C is equivalent to NOT EXISTS r.C,
1027 // but we still keep it, because ALL r.NOT C may be difficult to reach
1028 if(maxFillers > 0)
1029 m4.add(new ObjectMaxCardinalityRestriction(maxFillers-1, r, new Thing()));
1030 }
1031 }
1032 m.put(4,m4);
1033
1034 mComputationTimeNs += System.nanoTime() - mComputationTimeStartNs;
1035 }
1036
1037 // computation of the set M_A
1038 // a major difference compared to the ILP 2007 \rho operator is that
1039 // M is finite and contains elements of length (currently) at most 3
1040 private void computeM(NamedClass nc) {
1041 long mComputationTimeStartNs = System.nanoTime();
1042
1043 // System.out.println(nc);
1044
1045 mA.put(nc, new TreeMap<Integer,SortedSet<Description>>());
1046 // initialise all possible lengths (1 to 3)
1047 for(int i=1; i<=mMaxLength; i++) {
1048 mA.get(nc).put(i, new TreeSet<Description>(conceptComparator));
1049 }
1050
1051 // incomplete, prior implementation
1052 // SortedSet<Description> m1 = subHierarchy.getSubClasses(nc);
1053 // mA.get(nc).put(1,m1);
1054
1055 // most general classes, which are not disjoint with nc and provide real refinement
1056 SortedSet<Description> m1 = getClassCandidates(nc);
1057 mA.get(nc).put(1,m1);
1058
1059 // most specific negated classes, which are not disjoint with nc
1060 SortedSet<Description> m2 = new TreeSet<Description>();
1061 if(useNegation) {
1062 m2 = getNegClassCandidates(nc);
1063 mA.get(nc).put(2,m2);
1064 }
1065
1066 // System.out.println("m1 " + "(" + nc + "): " + m1);
1067 // System.out.println("m2 " + "(" + nc + "): " + m2);
1068
1069 /*
1070 SortedSet<Description> m2 = new TreeSet<Description>(conceptComparator);
1071 if(useNegation) {
1072 // the definition in the paper is more complex, but actually
1073 // we only have to insert the most specific concepts satisfying
1074 // the mentioned restrictions; there is no need to implement a
1075 // recursive method because for A subClassOf A' we have not A'
1076 // subClassOf A and thus: if A and B are disjoint then also A'
1077 // and B; if not A AND B = B then also not A' AND B = B
1078 // 2010/03: the latter is not correct => a recursive method is needed
1079 SortedSet<Description> m2tmp = subHierarchy.getSuperClasses(new Nothing());
1080
1081 for(Description c : m2tmp) {
1082 // if(c instanceof Thing)
1083 // m2.add(c);
1084 // else {
1085 // we obviously do not add \top (\top refines \top does not make sense)
1086 if(!(c instanceof Thing)) {
1087 NamedClass a = (NamedClass) c;
1088 if(!isNotADisjoint(a, nc) && isNotAMeaningful(a, nc))
1089 m2.add(new Negation(a));
1090 }
1091 }
1092 }
1093 */
1094
1095 // compute applicable properties
1096 computeMg(nc);
1097
1098 // boolean datatypes, e.g. testPositive = true
1099 if(useBooleanDatatypes) {
1100 Set<DatatypeProperty> booleanDPs = mgbd.get(nc);
1101 for(DatatypeProperty dp : booleanDPs) {
1102 m2.add(new BooleanValueRestriction(dp,true));
1103 m2.add(new BooleanValueRestriction(dp,false));
1104 }
1105 }
1106
1107 mA.get(nc).put(2,m2);
1108
1109 SortedSet<Description> m3 = new TreeSet<Description>(conceptComparator);
1110 if(useExistsConstructor) {
1111 for(ObjectProperty r : mgr.get(nc)) {
1112 m3.add(new ObjectSomeRestriction(r, new Thing()));
1113 }
1114 }
1115
1116 if(useAllConstructor) {
1117 // we allow \forall r.\top here because otherwise the operator
1118 // becomes too difficult to manage due to dependencies between
1119 // M_A and M_A' where A'=ran(r)
1120 for(ObjectProperty r : mgr.get(nc)) {
1121 m3.add(new ObjectAllRestriction(r, new Thing()));
1122 }
1123 }
1124
1125 if(useDoubleDatatypes) {
1126 Set<DatatypeProperty> doubleDPs = mgdd.get(nc);
1127 // System.out.println("cached disjoints " + cachedDisjoints);
1128 // System.out.println("appOP " + appOP);
1129 // System.out.println("appBD " + appBD);
1130 // System.out.println("appDD " + appDD);
1131 // System.out.println("mgr " + mgr);
1132 // System.out.println("mgbd " + mgbd);
1133 // System.out.println("mgdd " + mgdd);
1134
1135 for(DatatypeProperty dp : doubleDPs) {
1136 if(splits.get(dp).size() > 0) {
1137 DoubleMaxValue max = new DoubleMaxValue(splits.get(dp).get(splits.get(dp).size()-1));
1138 DoubleMinValue min = new DoubleMinValue(splits.get(dp).get(0));
1139 m3.add(new DatatypeSomeRestriction(dp,max));
1140 m3.add(new DatatypeSomeRestriction(dp,min));
1141 }
1142 }
1143 }
1144
1145 if(useDataHasValueConstructor) {
1146 Set<DatatypeProperty> stringDPs = mgsd.get(nc);
1147 for(DatatypeProperty dp : stringDPs) {
1148 // loop over frequent values
1149 Set<Constant> freqValues = frequentDataValues.get(dp);
1150 for(Constant c : freqValues) {
1151 m3.add(new StringValueRestriction(dp, c.getLiteral()));
1152 }
1153 }
1154 }
1155
1156 mA.get(nc).put(3,m3);
1157
1158 SortedSet<Description> m4 = new TreeSet<Description>(conceptComparator);
1159 if(useCardinalityRestrictions) {
1160 for(ObjectProperty r : mgr.get(nc)) {
1161 int maxFillers = maxNrOfFillers.get(r);
1162 // zero fillers: <= -1 r.C does not make sense
1163 // one filler: <= 0 r.C is equivalent to NOT EXISTS r.C,
1164 // but we still keep it, because ALL r.NOT C may be difficult to reach
1165 if(maxFillers > 0)
1166 m4.add(new ObjectMaxCardinalityRestriction(maxFillers-1, r, new Thing()));
1167 }
1168 }
1169 mA.get(nc).put(4,m4);
1170
1171 // System.out.println(mA.get(nc));
1172
1173 mComputationTimeNs += System.nanoTime() - mComputationTimeStartNs;
1174 }
1175
1176 // get candidates for a refinement of \top restricted to a class B
1177 public SortedSet<Description> getClassCandidates(NamedClass index) {
1178 return getClassCandidatesRecursive(index, Thing.instance);
1179 }
1180
1181 private SortedSet<Description> getClassCandidatesRecursive(Description index, Description upperClass) {
1182 SortedSet<Description> candidates = new TreeSet<Description>();
1183 // System.out.println("index " + index + " upper class " + upperClass);
1184
1185 // we descend the subsumption hierarchy to ensure that we get
1186 // the most general concepts satisfying the criteria
1187 for(Description candidate : subHierarchy.getSubClasses(upperClass)) {
1188 // System.out.println("testing " + candidate + " ... ");
1189
1190 // NamedClass candidate = (NamedClass) d;
1191 // check disjointness with index (if not no further traversal downwards is necessary)
1192 if(!isDisjoint(candidate,index)) {
1193 // System.out.println( " passed disjointness test ... ");
1194 // check whether the class is meaningful, i.e. adds something to the index
1195 // to do this, we need to make sure that the class is not a superclass of the
1196 // index (otherwise we get nothing new) - for instance based disjoints, we
1197 // make sure that there is at least one individual, which is not already in the
1198 // upper class
1199 boolean meaningful;
1200 if(instanceBasedDisjoints) {
1201 // bug: tests should be performed against the index, not the upper class
1202 // SortedSet<Individual> tmp = rs.getIndividuals(upperClass);
1203 SortedSet<Individual> tmp = rs.getIndividuals(index);
1204 tmp.removeAll(rs.getIndividuals(candidate));
1205 // System.out.println(" instances of " + index + " and not " + candidate + ": " + tmp.size());
1206 meaningful = tmp.size() != 0;
1207 } else {
1208 meaningful = !isDisjoint(new Negation(candidate),index);
1209 }
1210
1211 if(meaningful) {
1212 // candidate went successfully through all checks
1213 candidates.add(candidate);
1214 // System.out.println(" real refinement");
1215 } else {
1216 // descend subsumption hierarchy to find candidates
1217 // System.out.println(" enter recursion");
1218 candidates.addAll(getClassCandidatesRecursive(index, candidate));
1219 }
1220 }
1221 // else {
1222 // System.out.println(" ruled out, because it is disjoint");
1223 // }
1224 }
1225 // System.out.println("cc method exit");
1226 return candidates;
1227 }
1228
1229 // get candidates for a refinement of \top restricted to a class B
1230 public SortedSet<Description> getNegClassCandidates(NamedClass index) {
1231 return getNegClassCandidatesRecursive(index, Nothing.instance);
1232 }
1233
1234 private SortedSet<Description> getNegClassCandidatesRecursive(Description index, Description lowerClass) {
1235 SortedSet<Description> candidates = new TreeSet<Description>(conceptComparator);
1236 // System.out.println("index " + index + " lower class " + lowerClass);
1237
1238 for(Description candidate : subHierarchy.getSuperClasses(lowerClass)) {
1239 if(!(candidate instanceof Thing)) {
1240 // System.out.println("candidate: " + candidate);
1241 // check disjointness with index/range (should not be disjoint otherwise not useful)
1242 if(!isDisjoint(new Negation(candidate),index)) {
1243 boolean meaningful;
1244 // System.out.println("not disjoint");
1245 if(instanceBasedDisjoints) {
1246 SortedSet<Individual> tmp = rs.getIndividuals(index);
1247 tmp.removeAll(rs.getIndividuals(new Negation(candidate)));
1248 meaningful = tmp.size() != 0;
1249 // System.out.println("instances " + tmp.size());
1250 } else {
1251 meaningful = !isDisjoint(candidate,index);
1252 }
1253
1254 if(meaningful) {
1255 candidates.add(new Negation(candidate));
1256 } else {
1257 candidates.addAll(getNegClassCandidatesRecursive(index, candidate));
1258 }
1259 }
1260 }
1261 }
1262 return candidates;
1263 }
1264
1265 private void computeMg(NamedClass domain) {
1266 // compute the applicable properties if this has not been done yet
1267 if(appOP.get(domain) == null)
1268 computeApp(domain);
1269
1270 // initialise mgr, mgbd, mgdd, mgsd
1271 mgr.put(domain, new TreeSet<ObjectProperty>());
1272 mgbd.put(domain, new TreeSet<DatatypeProperty>());
1273 mgdd.put(domain, new TreeSet<DatatypeProperty>());
1274 mgsd.put(domain, new TreeSet<DatatypeProperty>());
1275
1276 SortedSet<ObjectProperty> mostGeneral = rs.getMostGeneralProperties();
1277 computeMgrRecursive(domain, mostGeneral, mgr.get(domain));
1278 SortedSet<DatatypeProperty> mostGeneralDP = rs.getMostGeneralDatatypeProperties();
1279 // we make the (reasonable) assumption here that all sub and super
1280 // datatype properties have the same type (e.g. boolean, integer, double)
1281 Set<DatatypeProperty> mostGeneralBDP = Helper.intersection(mostGeneralDP, rs.getBooleanDatatypeProperties());
1282 Set<DatatypeProperty> mostGeneralDDP = Helper.intersection(mostGeneralDP, rs.getDoubleDatatypeProperties());
1283 Set<DatatypeProperty> mostGeneralSDP = Helper.intersection(mostGeneralDP, rs.getStringDatatypeProperties());
1284 computeMgbdRecursive(domain, mostGeneralBDP, mgbd.get(domain));
1285 computeMgddRecursive(domain, mostGeneralDDP, mgdd.get(domain));
1286 computeMgsdRecursive(domain, mostGeneralSDP, mgsd.get(domain));
1287 }
1288
1289 private void computeMgrRecursive(NamedClass domain, Set<ObjectProperty> currProperties, Set<ObjectProperty> mgrTmp) {
1290 for(ObjectProperty prop : currProperties) {
1291 if(appOP.get(domain).contains(prop))
1292 mgrTmp.add(prop);
1293 else
1294 computeMgrRecursive(domain, rs.getSubProperties(prop), mgrTmp);
1295 }
1296 }
1297
1298 private void computeMgbdRecursive(NamedClass domain, Set<DatatypeProperty> currProperties, Set<DatatypeProperty> mgbdTmp) {
1299 for(DatatypeProperty prop : currProperties) {
1300 if(appBD.get(domain).contains(prop))
1301 mgbdTmp.add(prop);
1302 else
1303 computeMgbdRecursive(domain, rs.getSubProperties(prop), mgbdTmp);
1304 }
1305 }
1306
1307 private void computeMgddRecursive(NamedClass domain, Set<DatatypeProperty> currProperties, Set<DatatypeProperty> mgddTmp) {
1308 for(DatatypeProperty prop : currProperties) {
1309 if(appDD.get(domain).contains(prop))
1310 mgddTmp.add(prop);
1311 else
1312 computeMgddRecursive(domain, rs.getSubProperties(prop), mgddTmp);
1313 }
1314 }
1315
1316 private void computeMgsdRecursive(NamedClass domain, Set<DatatypeProperty> currProperties, Set<DatatypeProperty> mgsdTmp) {
1317 for(DatatypeProperty prop : currProperties) {
1318 if(appDD.get(domain).contains(prop))
1319 mgsdTmp.add(prop);
1320 else
1321 computeMgsdRecursive(domain, rs.getSubProperties(prop), mgsdTmp);
1322 }
1323 }
1324
1325 // computes the set of applicable properties for a given class
1326 private void computeApp(NamedClass domain) {
1327 // object properties
1328 Set<ObjectProperty> mostGeneral = rs.getObjectProperties();
1329 Set<ObjectProperty> applicableRoles = new TreeSet<ObjectProperty>();
1330 for(ObjectProperty role : mostGeneral) {
1331 // TODO: currently we just rely on named classes as roles,
1332 // instead of computing dom(r) and ran(r)
1333 Description d = rs.getDomain(role);
1334 if(!isDisjoint(domain,d))
1335 applicableRoles.add(role);
1336 }
1337 appOP.put(domain, applicableRoles);
1338
1339 // boolean datatype properties
1340 Set<DatatypeProperty> mostGeneralBDPs = rs.getBooleanDatatypeProperties();
1341 Set<DatatypeProperty> applicableBDPs = new TreeSet<DatatypeProperty>();
1342 for(DatatypeProperty role : mostGeneralBDPs) {
1343 // Description d = (NamedClass) rs.getDomain(role);
1344 Description d = rs.getDomain(role);
1345 if(!isDisjoint(domain,d))
1346 applicableBDPs.add(role);
1347 }
1348 appBD.put(domain, applicableBDPs);
1349
1350 // double datatype properties
1351 Set<DatatypeProperty> mostGeneralDDPs = rs.getDoubleDatatypeProperties();
1352 Set<DatatypeProperty> applicableDDPs = new TreeSet<DatatypeProperty>();
1353 for(DatatypeProperty role : mostGeneralDDPs) {
1354 // Description d = (NamedClass) rs.getDomain(role);
1355 Description d = rs.getDomain(role);
1356 // System.out.println("domain: " + d);
1357 if(!isDisjoint(domain,d))
1358 applicableDDPs.add(role);
1359 }
1360 appDD.put(domain, applicableDDPs);
1361 }
1362
1363 // returns true of the intersection contains elements disjoint
1364 // to the given description (if true adding the description to
1365 // the intersection results in a description equivalent to bottom)
1366 // e.g. OldPerson AND YoungPerson; Nitrogen-34 AND Tin-113
1367 // Note: currently we only check named classes in the intersection,
1368 // it would be interesting to see whether it makes sense to extend this
1369 // (advantage: less refinements, drawback: operator will need infinitely many
1370 // reasoner queries in the long run)
1371 @SuppressWarnings({"unused"})
1372 private boolean containsDisjoints(Intersection intersection, Description d) {
1373 List<Description> children = intersection.getChildren();
1374 for(Description child : children) {
1375 if(d instanceof Nothing)
1376 return true;
1377 else if(child instanceof NamedClass) {
1378 if(isDisjoint((NamedClass)child, d))
1379 return true;
1380 }
1381 }
1382 return false;
1383 }
1384
1385 private boolean isDisjoint(Description d1, Description d2) {
1386
1387 // System.out.println("| " + d1 + " " + d2);
1388 // System.out.println("| " + cachedDisjoints);
1389
1390 // check whether we have cached this query
1391 Map<Description,Boolean> tmp = cachedDisjoints.get(d1);
1392 Boolean tmp2 = null;
1393 if(tmp != null)
1394 tmp2 = tmp.get(d2);
1395
1396 // System.out.println("| " + tmp + " " + tmp2);
1397
1398 if(tmp2==null) {
1399 Boolean result;
1400 if(instanceBasedDisjoints) {
1401 result = isDisjointInstanceBased(d1,d2);
1402 } else {
1403 Description d = new Intersection(d1, d2);
1404 result = rs.isSuperClassOf(new Nothing(), d);
1405 }
1406 // add the result to the cache (we add it twice such that
1407 // the order of access does not matter)
1408
1409 // System.out.println("| result: " + result);
1410
1411 // create new entries if necessary
1412 Map<Description,Boolean> map1 = new TreeMap<Description,Boolean>(conceptComparator);
1413 Map<Description,Boolean> map2 = new TreeMap<Description,Boolean>(conceptComparator);
1414 if(tmp == null)
1415 cachedDisjoints.put(d1, map1);
1416 if(!cachedDisjoints.containsKey(d2))
1417 cachedDisjoints.put(d2, map2);
1418
1419 // add result symmetrically in the description matrix
1420 cachedDisjoints.get(d1).put(d2, result);
1421 cachedDisjoints.get(d2).put(d1, result);
1422 // System.out.println("---");
1423 return result;
1424 } else {
1425 // System.out.println("===");
1426 return tmp2;
1427 }
1428 }
1429
1430 private boolean isDisjointInstanceBased(Description d1, Description d2) {
1431 SortedSet<Individual> d1Instances = rs.getIndividuals(d1);
1432 SortedSet<Individual> d2Instances = rs.getIndividuals(d2);
1433 // System.out.println(d1 + " " + d2);
1434 // System.out.println(d1 + " " + d1Instances);
1435 // System.out.println(d2 + " " + d2Instances);
1436 for(Individual d1Instance : d1Instances) {
1437 if(d2Instances.contains(d1Instance))
1438 return false;
1439 }
1440 return true;
1441 }
1442
1443 /*
1444 // computes whether two classes are disjoint; this should be computed
1445 // by the reasoner only ones and otherwise taken from a matrix
1446 private boolean isDisjoint(NamedClass a, Description d) {
1447 // we need to test whether A AND B is equivalent to BOTTOM
1448 Description d2 = new Intersection(a, d);
1449 return rs.subsumes(new Nothing(), d2);
1450 }*/
1451
1452 // we need to test whether NOT A AND B is equivalent to BOTTOM
1453 @SuppressWarnings("unused")
1454 private boolean isNotADisjoint(NamedClass a, NamedClass b) {
1455 // Map<NamedClass,Boolean> tmp = notABDisjoint.get(a);
1456 // Boolean tmp2 = null;
1457 // if(tmp != null)
1458 // tmp2 = tmp.get(b);
1459 //
1460 // if(tmp2==null) {
1461 Description notA = new Negation(a);
1462 Description d = new Intersection(notA, b);
1463 Boolean result = rs.isSuperClassOf(new Nothing(), d);
1464 // ... add to cache ...
1465 return result;
1466 // } else
1467 // return tmp2;
1468 }
1469
1470 // we need to test whether NOT A AND B = B
1471 // (if not then NOT A is not meaningful in the sense that it does
1472 // not semantically add anything to B)
1473 @SuppressWarnings("unused")
1474 private boolean isNotAMeaningful(NamedClass a, NamedClass b) {
1475 Description notA = new Negation(a);
1476 Description d = new Intersection(notA, b);
1477 // check b subClassOf b AND NOT A (if yes then it is not meaningful)
1478 return !rs.isSuperClassOf(d, b);
1479 }
1480
1481 private void computeSplits(DatatypeProperty dp) {
1482 Set<Double> valuesSet = new TreeSet<Double>();
1483 // Set<Individual> individuals = rs.getIndividuals();
1484 Map<Individual,SortedSet<Double>> valueMap = rs.getDoubleDatatypeMembers(dp);
1485 // add all values to the set (duplicates will be remove automatically)
1486 for(Entry<Individual,SortedSet<Double>> e : valueMap.entrySet())
1487 valuesSet.addAll(e.getValue());
1488 // convert set to a list where values are sorted
1489 List<Double> values = new LinkedList<Double>(valuesSet);
1490 Collections.sort(values);
1491
1492 int nrOfValues = values.size();
1493 // create split set
1494 List<Double> splitsDP = new LinkedList<Double>();
1495 for(int splitNr=0; splitNr < Math.min(maxNrOfSplits,nrOfValues-1); splitNr++) {
1496 int index;
1497 if(nrOfValues<=maxNrOfSplits)
1498 index = splitNr;
1499 else
1500 index = (int) Math.floor(splitNr * (double)nrOfValues/(maxNrOfSplits+1));
1501
1502 double value = 0.5*(values.get(index)+values.get(index+1));
1503 splitsDP.add(value);
1504 }
1505 splits.put(dp, splitsDP);
1506
1507 // System.out.println(values);
1508 // System.out.println(splits);
1509 // System.exit(0);
1510 }
1511 }