001 /**
002 * Copyright (C) 2007-2011, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program. If not, see <http://www.gnu.org/licenses/>.
018 */
019
020 package org.dllearner.utilities.examples;
021
022 import java.util.Iterator;
023 import java.util.SortedSet;
024 import java.util.TreeSet;
025
026 import org.apache.log4j.Logger;
027 import org.dllearner.kb.sparql.SPARQLTasks;
028 import org.dllearner.utilities.datastructures.SetManipulation;
029 import org.dllearner.utilities.owl.OWLVocabulary;
030
031 public class AutomaticNegativeExampleFinderSPARQL {
032
033 // LOGGER: ComponentManager
034 private static Logger logger = Logger.getLogger(AutomaticNegativeExampleFinderSPARQL.class);
035
036 private SPARQLTasks sparqltasks;
037
038 private SortedSet<String> filterClasses;
039
040 private SortedSet<String> fullPositiveSet;
041
042 private SortedSet<String> fromRelated = new TreeSet<String>();
043 private SortedSet<String> fromNearbyClasses = new TreeSet<String>();
044 private SortedSet<String> fromSuperclasses = new TreeSet<String>();;
045 private SortedSet<String> fromParallelClasses = new TreeSet<String>();;
046 private SortedSet<String> fromRandom = new TreeSet<String>();;
047 private SortedSet<String> fromDomain = new TreeSet<String>();;
048 private SortedSet<String> fromRange = new TreeSet<String>();;
049
050 static int poslimit = 10;
051 static int neglimit = 20;
052
053
054 /**
055 * takes as input a full positive set to make sure no negatives are added as positives
056 *
057 * @param fullPositiveSet
058 * @param st
059 */
060 public AutomaticNegativeExampleFinderSPARQL(
061 SortedSet<String> fullPositiveSet,
062 SPARQLTasks st, SortedSet<String> filterClasses) {
063 super();
064 this.fullPositiveSet = new TreeSet<String>();
065 this.fullPositiveSet.addAll(fullPositiveSet);
066 this.sparqltasks = st;
067 this.filterClasses=filterClasses;
068 }
069
070
071
072 /**
073 * see <code> getNegativeExamples(int neglimit, boolean stable )</code>
074 * @param neglimit
075 */
076 public SortedSet<String> getNegativeExamples(int neglimit ) {
077 return getNegativeExamples(neglimit, false);
078 }
079
080 /**
081 * aggregates all collected neg examples
082 * CAVE: it is necessary to call one of the make functions before calling this
083 * OTHERWISE it will choose random examples
084 *
085 * @param neglimit size of negative Example set, 0 means all, which can be quite large several thousands
086 * @param stable decides whether neg Examples are randomly picked, default false, faster for developing, since the cache can be used
087 */
088 public SortedSet<String> getNegativeExamples(int neglimit, boolean stable ) {
089 SortedSet<String> negatives = new TreeSet<String>();
090 negatives.addAll(fromNearbyClasses);
091 negatives.addAll(fromParallelClasses);
092 negatives.addAll(fromRelated);
093 negatives.addAll(fromSuperclasses);
094 if(negatives.isEmpty()) {
095 negatives.addAll(fromRandom);
096 }
097 if(neglimit<=0){
098 logger.debug("neg Example size NO shrinking: " + negatives.size());
099 return negatives;
100 }
101
102 logger.debug("neg Example size before shrinking: " + negatives.size());
103 if (stable) {
104 negatives = SetManipulation.stableShrink(negatives,neglimit);
105 }
106 else {
107 negatives = SetManipulation.fuzzyShrink(negatives,neglimit);
108 }
109 logger.debug("neg Example size after shrinking: " + negatives.size());
110 return negatives;
111 }
112
113
114 public void makeNegativeExamplesFromRandomInstances() {
115 logger.debug("making random examples ");
116 String variable = "subject";
117 String sparqlQueryString="SELECT ?"+variable+" WHERE {" +
118 "?"+variable+" <" +OWLVocabulary.RDF_TYPE+">" + " ?o" +
119 "}";
120
121 fromRandom = sparqltasks.queryAsSet(sparqlQueryString, variable);
122 fromRandom.removeAll(fullPositiveSet);
123 logger.debug("|-negExample size from random: " + fromRandom.size());
124 }
125
126 /**
127 * makes neg ex from related instances, that take part in a role R(pos,neg)
128 * filters all objects, that don't use the given namespace
129 * @param instances
130 * @param objectNamespace
131 */
132 public void makeNegativeExamplesFromRelatedInstances(SortedSet<String> instances,
133 String objectNamespace) {
134 logger.debug("making examples from related instances");
135 for (String oneInstance : instances) {
136 makeNegativeExamplesFromRelatedInstances(oneInstance, objectNamespace);
137 }
138 logger.debug("|-negExample size from related: " + fromRelated.size());
139 }
140
141 private void makeNegativeExamplesFromRelatedInstances(String oneInstance, String objectnamespace) {
142 // SortedSet<String> result = new TreeSet<String>();
143
144 String SPARQLquery = "SELECT * WHERE { \n" + "<" + oneInstance + "> " + "?p ?object. \n"
145 + "FILTER (REGEX(str(?object), '" + objectnamespace + "')).\n" + "}";
146
147 fromRelated.addAll(sparqltasks.queryAsSet(SPARQLquery, "object"));
148 fromRelated.removeAll(fullPositiveSet);
149
150 }
151
152 // keep a while may still be needed
153 /*public void dbpediaMakeNegativeExamplesFromRelatedInstances(String subject) {
154 // SortedSet<String> result = new TreeSet<String>();
155
156 String SPARQLquery = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n"
157 + "FILTER (REGEX(str(?o), 'http://dbpedia.org/resource/')).\n"
158 + "FILTER (!REGEX(str(?p), 'http://www.w3.org/2004/02/skos'))\n" + "}";
159
160 this.fromRelated.addAll(sparqltasks.queryAsSet(SPARQLquery, "o"));
161
162 }*/
163
164 public void makeNegativeExamplesFromNearbyClasses(SortedSet<String> positiveSet, int sparqlResultLimit){
165 SortedSet<String> classes = new TreeSet<String>();
166 Iterator<String> instanceIter = positiveSet.iterator();
167 while(classes.isEmpty() && instanceIter.hasNext()) {
168 classes.addAll(sparqltasks.getClassesForInstance(instanceIter.next(), 100));
169 }
170 String concept=classes.first();
171 if (filterClasses!=null&&filterClasses.size()>0){
172 boolean br=false;
173 for (String oneClass : classes){
174 Iterator<String> iter=filterClasses.iterator();
175 while (iter.hasNext()){
176 if (oneClass.startsWith(iter.next())){
177 break;
178 }
179 else{
180 concept=oneClass;
181 br=true;
182 break;
183 }
184 }
185 if (br) break;
186 }
187 }
188 concept = concept.replaceAll("\"", "");
189 SortedSet<String> superClasses = sparqltasks.getSuperClasses(concept, 1);
190
191 classes = new TreeSet<String>();
192 for (String oneSuperClass : superClasses) {
193 classes.addAll(sparqltasks.getSubClasses(oneSuperClass, 1));
194 }
195 classes.remove(concept);
196 for (String oneClass : classes) {
197 try{
198 fromNearbyClasses.addAll(sparqltasks.retrieveInstancesForClassDescription("\""
199 + oneClass + "\"", sparqlResultLimit));
200 } catch (Exception e){}
201 }
202
203 this.fromNearbyClasses.removeAll(fullPositiveSet);
204 }
205
206 /**
207 * makes negEx from classes, the posEx belong to.
208 * Gets all Classes from PosEx, gets Instances from these Classes, returns all
209 * @param positiveSet
210 * @param sparqlResultLimit
211 */
212 public void makeNegativeExamplesFromParallelClasses(SortedSet<String> positiveSet, int sparqlResultLimit){
213 makeNegativeExamplesFromClassesOfInstances(positiveSet, sparqlResultLimit);
214 }
215
216 private void makeNegativeExamplesFromClassesOfInstances(SortedSet<String> positiveSet,
217 int sparqlResultLimit) {
218 logger.debug("making neg Examples from parallel classes");
219 SortedSet<String> classes = new TreeSet<String>();
220 // superClasses.add(concept.replace("\"", ""));
221 // logger.debug("before"+superClasses);
222 // superClasses = dbpediaGetSuperClasses( superClasses, 4);
223 // logger.debug("getting negExamples from "+superClasses.size()+"
224 // superclasses");
225
226 for (String instance : positiveSet) {
227 try{
228 classes.addAll(sparqltasks.getClassesForInstance(instance, sparqlResultLimit));
229 }catch (Exception e) {
230 e.printStackTrace();
231 logger.warn("ignoring SPARQLQuery failure, see log/sparql.txt");
232 }
233 }
234 logger.debug("getting negExamples from " + classes.size() + " parallel classes");
235 for (String oneClass : classes) {
236 logger.debug(oneClass);
237 // rsc = new
238 // JenaResultSetConvenience(queryConcept("\""+oneClass+"\"",limit));
239 try{
240 this.fromParallelClasses.addAll(sparqltasks.retrieveInstancesForClassDescription("\"" + oneClass
241 + "\"", sparqlResultLimit));
242 }catch (Exception e) {
243 logger.warn("ignoring SPARQLQuery failure, see log/sparql.txt");
244 }
245 }
246
247 fromParallelClasses.removeAll(fullPositiveSet);
248 logger.debug("|-neg Example size from parallelclass: " + fromParallelClasses.size());
249
250 }
251
252
253
254 /**
255 * it gets the first class of an arbitrary instance and queries the superclasses of it,
256 * could be more elaborate.
257 * It is better to use makeNegativeExamplesFromSuperClasses
258 * @param positiveSet
259 * @param sparqlResultSetLimit
260 */
261 public void makeNegativeExamplesFromSuperClassesOfInstances(SortedSet<String> positiveSet,
262 int sparqlResultSetLimit) {
263 SortedSet<String> classes = new TreeSet<String>();
264 Iterator<String> instanceIter = positiveSet.iterator();
265 while(classes.isEmpty() && instanceIter.hasNext()) {
266 classes.addAll(sparqltasks.getClassesForInstance(instanceIter.next(), sparqlResultSetLimit));
267
268 }
269 makeNegativeExamplesFromSuperClasses(classes.first(), sparqlResultSetLimit);
270 }
271
272
273 public void makeNegativeExamplesFromSuperClasses(String concept, int sparqlResultSetLimit) {
274 makeNegativeExamplesFromSuperClasses( concept, sparqlResultSetLimit, 2);
275 }
276
277 /**
278 * if pos ex derive from one class, then neg ex are taken from a superclass
279 * @param concept
280 * @param sparqlResultSetLimit
281 */
282 public void makeNegativeExamplesFromSuperClasses(String concept, int sparqlResultSetLimit, int depth) {
283
284 concept = concept.replaceAll("\"", "");
285 // superClasses.add(concept.replace("\"", ""));
286 // logger.debug("before"+superClasses);
287 SortedSet<String> superClasses = sparqltasks.getSuperClasses(concept, depth);
288 logger.debug("making neg Examples from " + superClasses.size() + " superclasses");
289
290 for (String oneSuperClass : superClasses) {
291 logger.debug(oneSuperClass);
292 fromSuperclasses.addAll(sparqltasks.retrieveInstancesForClassDescription("\""
293 + oneSuperClass + "\"", sparqlResultSetLimit));
294
295 }
296 this.fromSuperclasses.removeAll(fullPositiveSet);
297 logger.debug("|-neg Example from superclass: " + fromSuperclasses.size());
298 }
299
300 @SuppressWarnings("unused")
301 private void makeNegativeExamplesFromDomain(String role, int sparqlResultSetLimit){
302 logger.debug("making Negative Examples from Domain of : "+role);
303 fromDomain.addAll(sparqltasks.getDomainInstances(role, sparqlResultSetLimit));
304 fromDomain.removeAll(fullPositiveSet);
305 logger.debug("|-neg Example size from Domain: "+this.fromDomain.size());
306 }
307
308 @SuppressWarnings("unused")
309 private void makeNegativeExamplesFromRange(String role, int sparqlResultSetLimit){
310 logger.debug("making Negative Examples from Range of : "+role);
311 fromRange.addAll(sparqltasks.getRangeInstances(role, sparqlResultSetLimit));
312 fromRange.removeAll(fullPositiveSet);
313 logger.debug("|-neg Example size from Range: "+fromRange.size());
314 }
315 }