001    /**
002     * Copyright (C) 2007-2011, Jens Lehmann
003     *
004     * This file is part of DL-Learner.
005     *
006     * DL-Learner is free software; you can redistribute it and/or modify
007     * it under the terms of the GNU General Public License as published by
008     * the Free Software Foundation; either version 3 of the License, or
009     * (at your option) any later version.
010     *
011     * DL-Learner is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014     * GNU General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018     */
019    
020    package org.dllearner.utilities.examples;
021    
022    import java.util.Iterator;
023    import java.util.SortedSet;
024    import java.util.TreeSet;
025    
026    import org.apache.log4j.Logger;
027    import org.dllearner.kb.sparql.SPARQLTasks;
028    import org.dllearner.utilities.datastructures.SetManipulation;
029    import org.dllearner.utilities.owl.OWLVocabulary;
030    
031    public class AutomaticNegativeExampleFinderSPARQL {
032    
033            // LOGGER: ComponentManager
034            private static Logger logger = Logger.getLogger(AutomaticNegativeExampleFinderSPARQL.class);
035    
036            private SPARQLTasks sparqltasks;
037    
038            private SortedSet<String> filterClasses;
039            
040            private SortedSet<String> fullPositiveSet;
041            
042            private SortedSet<String> fromRelated  = new TreeSet<String>();
043            private SortedSet<String> fromNearbyClasses  = new TreeSet<String>();
044            private SortedSet<String> fromSuperclasses = new TreeSet<String>();;
045            private SortedSet<String> fromParallelClasses = new TreeSet<String>();;
046            private SortedSet<String> fromRandom = new TreeSet<String>();;
047            private SortedSet<String> fromDomain = new TreeSet<String>();;
048            private SortedSet<String> fromRange = new TreeSet<String>();;
049            
050            static int poslimit = 10;
051            static int neglimit = 20;
052    
053            
054            /**
055             * takes as input a full positive set to make sure no negatives are added as positives
056             *  
057             * @param fullPositiveSet
058             * @param st
059             */
060            public AutomaticNegativeExampleFinderSPARQL(
061                            SortedSet<String> fullPositiveSet,
062                            SPARQLTasks st, SortedSet<String> filterClasses) {
063                    super();
064                    this.fullPositiveSet = new TreeSet<String>(); 
065                    this.fullPositiveSet.addAll(fullPositiveSet);
066                    this.sparqltasks = st;
067                    this.filterClasses=filterClasses;
068            }
069            
070            
071            
072            /**
073             * see <code>  getNegativeExamples(int neglimit, boolean stable )</code>
074             * @param neglimit
075             */
076            public SortedSet<String> getNegativeExamples(int neglimit ) {
077                    return getNegativeExamples(neglimit, false);
078            }
079    
080            /**
081             * aggregates all collected neg examples
082             * CAVE: it is necessary to call one of the make functions before calling this
083             * OTHERWISE it will choose random examples
084             * 
085             * @param neglimit size of negative Example set, 0 means all, which can be quite large several thousands
086             * @param stable decides whether neg Examples are randomly picked, default false, faster for developing, since the cache can be used
087             */
088            public SortedSet<String> getNegativeExamples(int neglimit, boolean stable ) {
089                    SortedSet<String> negatives = new TreeSet<String>();
090                    negatives.addAll(fromNearbyClasses);
091                    negatives.addAll(fromParallelClasses);
092                    negatives.addAll(fromRelated);
093                    negatives.addAll(fromSuperclasses);
094                    if(negatives.isEmpty()) {
095                            negatives.addAll(fromRandom);
096                    }
097                    if(neglimit<=0){
098                            logger.debug("neg Example size NO shrinking: " + negatives.size());
099                            return negatives;
100                    }
101                    
102                    logger.debug("neg Example size before shrinking: " + negatives.size());
103                    if (stable) {
104                            negatives = SetManipulation.stableShrink(negatives,neglimit);
105                    }
106                    else {
107                            negatives = SetManipulation.fuzzyShrink(negatives,neglimit);
108                    }
109                    logger.debug("neg Example size after shrinking: " + negatives.size());
110                    return negatives;
111            }
112    
113            
114            public void makeNegativeExamplesFromRandomInstances() {
115                    logger.debug("making random examples ");
116                    String variable = "subject";
117                    String sparqlQueryString="SELECT ?"+variable+" WHERE {" +
118                                    "?"+variable+" <" +OWLVocabulary.RDF_TYPE+">" + " ?o" + 
119                                    "}";
120                    
121                    fromRandom = sparqltasks.queryAsSet(sparqlQueryString, variable);
122                    fromRandom.removeAll(fullPositiveSet);
123                    logger.debug("|-negExample size from random: " + fromRandom.size());
124            }
125            
126            /**
127             * makes neg ex from related instances, that take part in a role R(pos,neg)
128             * filters all objects, that don't use the given namespace 
129             * @param instances
130             * @param objectNamespace
131             */
132            public void makeNegativeExamplesFromRelatedInstances(SortedSet<String> instances,
133                            String objectNamespace) {
134                    logger.debug("making examples from related instances");
135                    for (String oneInstance : instances) {
136                            makeNegativeExamplesFromRelatedInstances(oneInstance, objectNamespace);
137                    }
138                    logger.debug("|-negExample size from related: " + fromRelated.size());
139            }
140    
141            private void makeNegativeExamplesFromRelatedInstances(String oneInstance, String objectnamespace) {
142                    // SortedSet<String> result = new TreeSet<String>();
143    
144                    String SPARQLquery = "SELECT * WHERE { \n" + "<" + oneInstance + "> " + "?p ?object. \n"
145                                    + "FILTER (REGEX(str(?object), '" + objectnamespace + "')).\n" + "}";
146    
147                    fromRelated.addAll(sparqltasks.queryAsSet(SPARQLquery, "object"));
148                    fromRelated.removeAll(fullPositiveSet);
149    
150            }
151    
152            // keep a while may still be needed
153            /*public void dbpediaMakeNegativeExamplesFromRelatedInstances(String subject) {
154                    // SortedSet<String> result = new TreeSet<String>();
155    
156                    String SPARQLquery = "SELECT * WHERE { \n" + "<" + subject + "> " + "?p ?o. \n"
157                                    + "FILTER (REGEX(str(?o), 'http://dbpedia.org/resource/')).\n"
158                                    + "FILTER (!REGEX(str(?p), 'http://www.w3.org/2004/02/skos'))\n" + "}";
159    
160                    this.fromRelated.addAll(sparqltasks.queryAsSet(SPARQLquery, "o"));
161    
162            }*/
163            
164            public void makeNegativeExamplesFromNearbyClasses(SortedSet<String> positiveSet, int sparqlResultLimit){
165                    SortedSet<String> classes = new TreeSet<String>();
166                    Iterator<String> instanceIter = positiveSet.iterator();
167                    while(classes.isEmpty() && instanceIter.hasNext()) {
168                            classes.addAll(sparqltasks.getClassesForInstance(instanceIter.next(), 100));
169                    }
170                    String concept=classes.first();
171                    if (filterClasses!=null&&filterClasses.size()>0){
172                            boolean br=false;
173                            for (String oneClass : classes){
174                                    Iterator<String> iter=filterClasses.iterator();
175                                    while (iter.hasNext()){
176                                            if (oneClass.startsWith(iter.next())){
177                                                    break;
178                                            }
179                                            else{
180                                                    concept=oneClass;
181                                                    br=true;
182                                                    break;
183                                            }
184                                    }
185                                    if (br) break;
186                            }
187                    }
188                    concept = concept.replaceAll("\"", "");
189                    SortedSet<String> superClasses = sparqltasks.getSuperClasses(concept, 1);
190                    
191                    classes = new TreeSet<String>();
192                    for (String oneSuperClass : superClasses) {
193                            classes.addAll(sparqltasks.getSubClasses(oneSuperClass, 1));
194                    }
195                    classes.remove(concept);
196                    for (String oneClass : classes) {
197                            try{
198                                    fromNearbyClasses.addAll(sparqltasks.retrieveInstancesForClassDescription("\""
199                                                    + oneClass + "\"", sparqlResultLimit));
200                            } catch (Exception e){}
201                    }
202                    
203                    this.fromNearbyClasses.removeAll(fullPositiveSet);
204            }
205            
206            /**
207             * makes negEx from classes, the posEx belong to.
208             * Gets all Classes from PosEx, gets Instances from these Classes, returns all
209             * @param positiveSet
210             * @param sparqlResultLimit
211             */
212            public void makeNegativeExamplesFromParallelClasses(SortedSet<String> positiveSet, int sparqlResultLimit){
213                    makeNegativeExamplesFromClassesOfInstances(positiveSet, sparqlResultLimit);
214            }
215            
216            private void makeNegativeExamplesFromClassesOfInstances(SortedSet<String> positiveSet,
217                            int sparqlResultLimit) {
218                    logger.debug("making neg Examples from parallel classes");
219                    SortedSet<String> classes = new TreeSet<String>();
220                    // superClasses.add(concept.replace("\"", ""));
221                    // logger.debug("before"+superClasses);
222                    // superClasses = dbpediaGetSuperClasses( superClasses, 4);
223                    // logger.debug("getting negExamples from "+superClasses.size()+"
224                    // superclasses");
225    
226                    for (String instance : positiveSet) {
227                            try{
228                            classes.addAll(sparqltasks.getClassesForInstance(instance, sparqlResultLimit));
229                            }catch (Exception e) {
230                                    e.printStackTrace();
231                                    logger.warn("ignoring SPARQLQuery failure, see log/sparql.txt");
232                            }
233                    }
234                    logger.debug("getting negExamples from " + classes.size() + " parallel classes");
235                    for (String oneClass : classes) {
236                            logger.debug(oneClass);
237                            // rsc = new
238                            // JenaResultSetConvenience(queryConcept("\""+oneClass+"\"",limit));
239                            try{
240                            this.fromParallelClasses.addAll(sparqltasks.retrieveInstancesForClassDescription("\"" + oneClass
241                                            + "\"", sparqlResultLimit));
242                            }catch (Exception e) {
243                                    logger.warn("ignoring SPARQLQuery failure, see log/sparql.txt");
244                            }
245                    }
246                    
247                    fromParallelClasses.removeAll(fullPositiveSet);
248                    logger.debug("|-neg Example size from parallelclass: " + fromParallelClasses.size());
249    
250            }
251    
252            
253            
254            /**
255             * it gets the first class of an arbitrary  instance and queries the superclasses of it,
256             * could be more elaborate.
257             * It is better to use makeNegativeExamplesFromSuperClasses
258             * @param positiveSet
259             * @param sparqlResultSetLimit
260             */
261            public void makeNegativeExamplesFromSuperClassesOfInstances(SortedSet<String> positiveSet, 
262                            int sparqlResultSetLimit) {
263                    SortedSet<String> classes = new TreeSet<String>();
264                    Iterator<String> instanceIter = positiveSet.iterator();
265                    while(classes.isEmpty() && instanceIter.hasNext()) {
266                            classes.addAll(sparqltasks.getClassesForInstance(instanceIter.next(), sparqlResultSetLimit));
267                    
268                    }
269                    makeNegativeExamplesFromSuperClasses(classes.first(), sparqlResultSetLimit);
270            }
271            
272    
273            public void makeNegativeExamplesFromSuperClasses(String concept, int sparqlResultSetLimit) {
274                    makeNegativeExamplesFromSuperClasses( concept,  sparqlResultSetLimit, 2);
275            }
276            
277            /**
278             * if pos ex derive from one class, then neg ex are taken from a superclass
279             * @param concept
280             * @param sparqlResultSetLimit
281             */
282            public void makeNegativeExamplesFromSuperClasses(String concept, int sparqlResultSetLimit, int depth) {
283    
284                    concept = concept.replaceAll("\"", "");
285                    // superClasses.add(concept.replace("\"", ""));
286                    // logger.debug("before"+superClasses);
287                    SortedSet<String> superClasses = sparqltasks.getSuperClasses(concept, depth);
288                    logger.debug("making neg Examples from " + superClasses.size() + " superclasses");
289    
290                    for (String oneSuperClass : superClasses) {
291                            logger.debug(oneSuperClass);
292                            fromSuperclasses.addAll(sparqltasks.retrieveInstancesForClassDescription("\""
293                                            + oneSuperClass + "\"", sparqlResultSetLimit));
294    
295                    }
296                    this.fromSuperclasses.removeAll(fullPositiveSet);
297                    logger.debug("|-neg Example from superclass: " + fromSuperclasses.size());
298            }
299            
300            @SuppressWarnings("unused")
301            private void makeNegativeExamplesFromDomain(String role, int sparqlResultSetLimit){
302                    logger.debug("making Negative Examples from Domain of : "+role);
303                    fromDomain.addAll(sparqltasks.getDomainInstances(role, sparqlResultSetLimit));
304                    fromDomain.removeAll(fullPositiveSet);
305                    logger.debug("|-neg Example size from Domain: "+this.fromDomain.size());
306            }
307            
308            @SuppressWarnings("unused")
309            private void makeNegativeExamplesFromRange(String role, int sparqlResultSetLimit){
310                    logger.debug("making Negative Examples from Range of : "+role);
311                    fromRange.addAll(sparqltasks.getRangeInstances(role, sparqlResultSetLimit));
312                    fromRange.removeAll(fullPositiveSet);
313                    logger.debug("|-neg Example size from Range: "+fromRange.size());
314            }
315    }