001    /**
002     * Copyright (C) 2007-2011, Jens Lehmann
003     *
004     * This file is part of DL-Learner.
005     *
006     * DL-Learner is free software; you can redistribute it and/or modify
007     * it under the terms of the GNU General Public License as published by
008     * the Free Software Foundation; either version 3 of the License, or
009     * (at your option) any later version.
010     *
011     * DL-Learner is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014     * GNU General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018     */
019    
020    package org.dllearner.kb.sparql;
021    
022    import java.io.File;
023    import java.net.URI;
024    import java.net.URL;
025    import java.util.ArrayList;
026    import java.util.Collection;
027    import java.util.LinkedList;
028    import java.util.List;
029    import java.util.TreeSet;
030    
031    import javax.swing.ProgressMonitor;
032    
033    import org.apache.log4j.Logger;
034    import org.dllearner.core.AbstractKnowledgeSource;
035    import org.dllearner.core.OntologyFormat;
036    import org.dllearner.core.OntologyFormatUnsupportedException;
037    import org.dllearner.core.configurators.SparqlKnowledgeSourceConfigurator;
038    import org.dllearner.core.options.BooleanConfigOption;
039    import org.dllearner.core.options.CommonConfigOptions;
040    import org.dllearner.core.options.ConfigEntry;
041    import org.dllearner.core.options.ConfigOption;
042    import org.dllearner.core.options.IntegerConfigOption;
043    import org.dllearner.core.options.InvalidConfigOptionValueException;
044    import org.dllearner.core.options.StringConfigOption;
045    import org.dllearner.core.options.StringSetConfigOption;
046    import org.dllearner.core.options.StringTupleListConfigOption;
047    import org.dllearner.core.options.URLConfigOption;
048    import org.dllearner.core.owl.KB;
049    import org.dllearner.kb.aquisitors.SparqlTupleAquisitor;
050    import org.dllearner.kb.aquisitors.SparqlTupleAquisitorImproved;
051    import org.dllearner.kb.aquisitors.TupleAquisitor;
052    import org.dllearner.kb.extraction.Configuration;
053    import org.dllearner.kb.extraction.Manager;
054    import org.dllearner.kb.extraction.Node;
055    import org.dllearner.kb.manipulator.Manipulator;
056    import org.dllearner.kb.manipulator.ObjectReplacementRule;
057    import org.dllearner.kb.manipulator.PredicateReplacementRule;
058    import org.dllearner.kb.manipulator.Rule.Months;
059    import org.dllearner.utilities.Files;
060    import org.dllearner.utilities.JamonMonitorLogger;
061    import org.dllearner.utilities.datastructures.StringTuple;
062    import org.dllearner.utilities.statistics.SimpleClock;
063    import org.semanticweb.owlapi.model.OWLOntology;
064    
065    import com.jamonapi.Monitor;
066    import com.jamonapi.MonitorFactory;
067    
068    /**
069     * Represents the SPARQL Endpoint Component.
070     * 
071     * @author Jens Lehmann
072     * @author Sebastian Knappe
073     * @author Sebastian Hellmann
074     */
075    public class SparqlKnowledgeSource extends AbstractKnowledgeSource {
076    
077            private ProgressMonitor mon;
078            
079            private static final boolean debugExitAfterExtraction = false; // switches
080    
081    
082            private SparqlKnowledgeSourceConfigurator configurator;
083    
084            /**
085             * @return the configurator for this Knowledgesource
086             */
087            @Override
088            public SparqlKnowledgeSourceConfigurator getConfigurator() {
089                    return configurator;
090            }
091    
092            public SparqlKnowledgeSource() {
093                    this.configurator = new SparqlKnowledgeSourceConfigurator(this);
094            }
095    
096            // these are saved for further reference
097            private URL url;
098            private SparqlEndpoint endpoint = null;
099    
100            //private String format = "N-TRIPLES";
101            //private String format = "RDF/XML";
102    
103            private URL ontologyFragmentURL;
104    
105            
106            private OWLOntology fragment;
107            
108            private Manipulator manipulator = null;
109            
110            
111    
112            // received ontology as array, used if format=Array(an element of the
113            // array consists of the subject, predicate and object separated by '<'
114            //private String[] ontArray;
115    
116            // received ontology as KB, the internal format
117            //private KB kb;
118    
119            // mainly used for statistic
120            private int nrOfExtractedAxioms = 0;
121    
122    
123            public static String getName() {
124                    return "SPARQL Endpoint";
125            }
126    
127            private static Logger logger = Logger
128                            .getLogger(SparqlKnowledgeSource.class);
129    
130            /**
131             * Specifies the configuration options for this knowledge source.
132             * 
133             * @see org.dllearner.core.AbstractComponent#createConfigOptions()
134             * @return Options of this component.
135             */
136            public static Collection<ConfigOption<?>> createConfigOptions() {
137                    Collection<ConfigOption<?>> options = new LinkedList<ConfigOption<?>>();
138                    options.add(new URLConfigOption("url", "URL of SPARQL Endpoint",
139                                    null, true, true));
140                    options.add(new StringConfigOption("cacheDir", "dir of cache", "cache",
141                                    false, true));
142                    options.add(new BooleanConfigOption("useCache",
143                                    "If true a Cache is used", true, false, true));
144                    options.add(new BooleanConfigOption("useCacheDatabase", "If true, H2 database is used, otherwise one file per query is written.", false));
145                    options
146                                    .add(new StringSetConfigOption(
147                                                    "instances",
148                                                    "relevant instances e.g. positive and negative examples in a learning problem",
149                                                    null, true, true));
150                    options.add(new IntegerConfigOption("recursionDepth",
151                                    "recursion depth of KB fragment selection", 1, false, true));
152                    options
153                                    .add(new StringConfigOption(
154                                                    "predefinedFilter",
155                                                    "the mode of the SPARQL Filter, use one of YAGO,SKOS,YAGOSKOS , YAGOSPECIALHIERARCHY, TEST",
156                                                    null, false, true));
157                    options
158                                    .add(new StringConfigOption(
159                                                    "predefinedEndpoint",
160                                                    "the mode of the SPARQL Filter, use one of DBPEDIA, LOCAL, GOVTRACK, REVYU, MYOPENLINK, FACTBOOK",
161                                                    null, false, true));
162                    options
163                                    .add(new StringConfigOption(
164                                                    "predefinedManipulator",
165                                                    "the mode of the Manipulator, use one of STANDARD, DBPEDIA-NAVIGATOR",
166                                                    null, false, true));
167                    options.add(new StringSetConfigOption("predList",
168                                    "list of all ignored roles", new TreeSet<String>(), false, true));
169                    options.add(new StringSetConfigOption("objList",
170                                    "list of all ignored objects", new TreeSet<String>(), false, true));
171                    options
172                                    .add(new BooleanConfigOption(
173                                                    "saveExtractedFragment",
174                                                    "Specifies whether the extracted ontology is written to a file or not. " +
175                                                    "The OWL file is written to the cache dir." +
176                                                    "Some DBpedia URI will make the XML invalid",
177                                                    false, false, true));
178                    options.add(new StringTupleListConfigOption("replacePredicate",
179                                    "rule for replacing predicates", new ArrayList<StringTuple>(), false, true));
180                    options.add(new StringTupleListConfigOption("replaceObject",
181                                    "rule for replacing predicates", new ArrayList<StringTuple>(), false, true));
182                    options.add(new IntegerConfigOption("breakSuperClassRetrievalAfter",
183                                    "stops a cyclic hierarchy after specified number of classes",
184                                    1000, false, true));
185    
186                    options.add(new BooleanConfigOption("useLits",
187                                    "use Literals in SPARQL query", true, false, true));
188                    options
189                    .add(new BooleanConfigOption(
190                                    "getAllSuperClasses",
191                                    "If true then all superclasses are retrieved until the most general class (owl:Thing) is reached.",
192                                    true, false, true));
193                    options.add(new BooleanConfigOption("closeAfterRecursion",
194                                    "gets all classes for all instances", true, false, true));
195                    options.add(new BooleanConfigOption("getPropertyInformation",
196                                    "gets all types for extracted ObjectProperties", false, false,
197                                    true));
198                    options.add(new BooleanConfigOption("dissolveBlankNodes",
199                                    "determines whether Blanknodes are dissolved. This is a costly function.", true, false,
200                                    true));
201                    options.add(new BooleanConfigOption("useImprovedSparqlTupelAquisitor",
202                                    "uses deeply nested SparqlQueries, according to recursion depth, still EXPERIMENTAL", false, false,
203                                    true));
204                    options.add(CommonConfigOptions.getVerbosityOption());
205    
206                    options.add(new StringSetConfigOption("defaultGraphURIs",
207                                    "a list of all default Graph URIs", new TreeSet<String>(), false, true));
208                    options.add(new StringSetConfigOption("namedGraphURIs",
209                                    "a list of all named Graph URIs", new TreeSet<String>(), false, true));
210                    return options;
211            }
212    
213            /*
214             * @see org.dllearner.core.Component#applyConfigEntry(org.dllearner.core.ConfigEntry)
215             */
216            @Override
217            public <T> void applyConfigEntry(ConfigEntry<T> entry)
218                            throws InvalidConfigOptionValueException {
219                    //TODO remove this function
220                    
221            }
222    
223            /*
224             * (non-Javadoc)
225             * 
226             * @see org.dllearner.core.Component#init()
227             */
228            @Override
229            public void init() {
230                    logger.info("SparqlModul: Collecting Ontology");
231                    SimpleClock totalTime = new SimpleClock();
232                    //SimpleClock extractionTime = new SimpleClock();
233                    if(mon != null){
234                            mon.setNote("Collecting Ontology");
235                    }
236                    logger.trace(getURL());
237                    logger.trace(getSparqlEndpoint());
238                    logger.trace(configurator.getInstances());
239                    Manager m = new Manager();
240                    m.addProgressMonitor(mon);
241    
242                    // get Options for Manipulator
243                    Manipulator manipulator = getManipulator();
244    
245                    TupleAquisitor tupleAquisitor = getTupleAquisitor();
246    
247                    Configuration configuration = new Configuration(tupleAquisitor,
248                                    manipulator, configurator.getRecursionDepth(), configurator
249                                                    .getGetAllSuperClasses(), configurator
250                                                    .getCloseAfterRecursion(), configurator
251                                                    .getGetPropertyInformation(), configurator
252                                                    .getBreakSuperClassRetrievalAfter(),
253                                                    configurator.getDissolveBlankNodes());
254    
255                    // give everything to the manager
256                    m.useConfiguration(configuration);
257    
258                    //String ont = "";
259                    try {
260    
261                            // the actual extraction is started here
262                            Monitor extractionTime = JamonMonitorLogger.getTimeMonitor(SparqlKnowledgeSource.class, "total extraction time").start();
263                            List<Node> seedNodes=new ArrayList<Node>();
264                            
265                            //if(!threaded){
266                                    seedNodes = m.extract(configurator.getInstances());
267                            /*}else{
268                                    int maxPoolSize = configurator.getInstances().size();
269                                    ThreadPoolExecutor ex = new ThreadPoolExecutor(5,maxPoolSize,1,TimeUnit.SECONDS,new ArrayBlockingQueue<Runnable>(100));
270                                    List<FutureTask<Node>> tasks = new ArrayList<FutureTask<Node>>();
271                                                            
272                                    for (String uri : configurator.getInstances()) {
273                                            
274                                            ExtractOneInstance e = new ExtractOneInstance(m,uri);
275                                            
276                                            FutureTask<Node> ft = new FutureTask<Node>(e);
277                                            ex.submit(ft);
278                                            tasks.add(ft);
279                                            //System.out.println(f.get());
280                                            //seedNodes.add(f.get());
281                                            //System.out.println("finished FutureTask "+seedNodes.size());
282                                    }
283                                    for(FutureTask<Node> ft : tasks){
284                                            //System.out.println(ft.get());
285                                            //System.out.println("aaa");
286                                            seedNodes.add(ft.get());
287                                            
288                                    }
289                            }*/
290                            extractionTime.stop();
291                    
292                            
293                            fragment = m.getOWLAPIOntologyForNodes(seedNodes, configurator.getSaveExtractedFragment());
294                            
295    
296                            logger.info("Finished collecting fragment. needed "+extractionTime.getLastValue()+" ms");
297    
298                            ontologyFragmentURL = m.getPhysicalOntologyURL();
299                            
300                            nrOfExtractedAxioms = configuration.getOwlAPIOntologyCollector().getNrOfExtractedAxioms();
301                            
302                    
303                    } catch (Exception e) {
304                            e.printStackTrace();
305                    }
306                    //nrOfExtractedTriples = m.getNrOfExtractedTriples();
307                    logger.info("SparqlModul: ****Finished " + totalTime.getAndSet(""));
308                    if (debugExitAfterExtraction) {
309    
310                            File jamonlog = new File("log/jamon.html");
311                            Files.createFile(jamonlog, MonitorFactory.getReport());
312                            Files.appendFile(jamonlog, "<xmp>\n"
313                                            + JamonMonitorLogger.getStringForAllSortedByLabel());
314                            System.exit(0);
315                    }
316            }
317            
318            public List<Node> extractParallel(){
319                    return null;
320            }
321            
322            /*private class ExtractOneInstance  implements Callable{
323                    Manager m;
324                    Node n;
325                    String uri;
326                    
327                    private ExtractOneInstance(Manager m, String uri){
328                            super();
329                            this.m = m;
330                            this.uri = uri;
331                    }
332                    
333                    
334                    
335                    public Node call(){
336                            System.out.println("funky");
337                            return m.extractOneURI(uri);
338                    }
339            }*/
340    
341            /*
342             * (non-Javadoc)
343             * 
344             * @see org.dllearner.core.KnowledgeSource#toDIG()
345             */
346            @Override
347            public String toDIG(URI kbURI) {
348                throw new RuntimeException("Inside Dig Converter - this doesn't work in our custom version as we have upgraded to jena 2.6.2 which doesn't support DIG");
349    //                      return JenaOWLDIGConverter.getTellsString(ontologyFragmentURL,
350    //                                      OntologyFormat.RDF_XML, kbURI);
351            }
352    
353            /*
354             * (non-Javadoc)
355             * 
356             * @see org.dllearner.core.KnowledgeSource#export(java.io.File,
357             *      org.dllearner.core.OntologyFormat)
358             */
359            @Override
360            public void export(File file, OntologyFormat format)
361                            throws OntologyFormatUnsupportedException {
362                    // currently no export functions implemented, so we just throw an
363                    // exception
364                    throw new OntologyFormatUnsupportedException("export", format);
365            }
366    
367            /**
368             * @return the URL of the used sparql endpoint
369             */
370            public URL getURL() {
371                    if(endpoint == null){
372                            if(url==null){
373                                    if(configurator.getPredefinedEndpoint() == null){
374                                                    url = configurator.getUrl();
375                                            return url;
376                                    }else{
377                                            return getSparqlEndpoint().getURL();
378                                    }
379                                    
380                            }else{
381                                    return url;
382                            }
383                    }else {
384                            return endpoint.getURL();
385                    }
386                    
387            }
388    
389    
390            public SparqlQuery sparqlQuery(String query) {
391                    return new SparqlQuery(query, getSparqlEndpoint());
392            }
393    
394            
395            public SparqlEndpoint getSparqlEndpoint(){
396                    if(endpoint==null) {
397                            if (configurator.getPredefinedEndpoint() == null) {
398                                    endpoint = new SparqlEndpoint(getURL(), new LinkedList<String>(
399                                                    configurator.getDefaultGraphURIs()),
400                                                    new LinkedList<String>(configurator.getNamedGraphURIs()));
401                            } else {
402                                    endpoint = SparqlEndpoint.getEndpointByName(configurator
403                                                    .getPredefinedEndpoint());
404                                    // System.out.println(endpoint);
405            
406                            }
407                    }
408                    return endpoint;
409    
410            }
411            
412            public SPARQLTasks getSPARQLTasks() {
413    
414                    // get Options for endpoints
415                    
416                    if (configurator.getUseCache()){
417                            return new SPARQLTasks(new Cache(configurator.getCacheDir(), configurator.getUseCacheDatabase()),
418                                            getSparqlEndpoint());
419                    }else {
420                            return new SPARQLTasks(getSparqlEndpoint());
421                    }
422            }
423    
424            public SparqlQueryMaker getSparqlQueryMaker() {
425                    // get Options for Filters
426                    if (configurator.getPredefinedFilter() == null) {
427                            return new SparqlQueryMaker("forbid", configurator.getObjList(),
428                                            configurator.getPredList(), configurator.getUseLits());
429    
430                    } else {
431    
432                            return SparqlQueryMaker.getSparqlQueryMakerByName(configurator
433                                            .getPredefinedFilter());
434                    }
435    
436            }
437    
438            public Manipulator getManipulator() {
439                    
440                    if(this.manipulator!=null){
441                            return this.manipulator;
442                    }
443                    
444                    // get Options for Filters
445                    if (configurator.getPredefinedManipulator() != null) {
446                            return Manipulator.getManipulatorByName(configurator
447                                            .getPredefinedManipulator());
448    
449                    } else {
450                            Manipulator m = Manipulator.getDefaultManipulator();
451                            for (StringTuple st : configurator.getReplacePredicate()) {
452                                    m.addRule(new PredicateReplacementRule(Months.MAY, st.a, st.b));
453                            }
454                            for (StringTuple st : configurator.getReplaceObject()) {
455                                    m.addRule(new ObjectReplacementRule(Months.MAY, st.a, st.b));
456                            }
457                            return m;
458                    }
459    
460            }
461            
462            public void setManipulator(Manipulator m ){
463                    this.manipulator = m;
464                    
465            }
466    
467            public TupleAquisitor getTupleAquisitor() {
468                    TupleAquisitor ret = null;
469                    if (configurator.getUseImprovedSparqlTupelAquisitor()) {
470                            ret = new SparqlTupleAquisitorImproved(getSparqlQueryMaker(),
471                                            getSPARQLTasks(), configurator.getRecursionDepth());
472                    } else {
473                            ret = new SparqlTupleAquisitor(getSparqlQueryMaker(),
474                                            getSPARQLTasks());
475                    }
476                    return ret;
477    
478            }
479    
480            /*
481             * (non-Javadoc)
482             * 
483             * @see org.dllearner.core.KnowledgeSource#toKB()
484             */
485            @Override
486            public KB toKB() {
487                    // TODO Does this work?
488                    return new KB();
489            }
490    
491            public URL getOntologyFragmentURL() {
492                    return ontologyFragmentURL;
493            }
494            
495            public OWLOntology getOWLAPIOntology() {
496                    return fragment;
497            }
498    
499            public boolean isUseCache() {
500                    return configurator.getUseCache();
501            }
502    
503            public String getCacheDir() {
504                    return configurator.getCacheDir();
505            }
506    
507            public int getNrOfExtractedAxioms() {
508                    return nrOfExtractedAxioms;
509            }
510            
511            public void addProgressMonitor(ProgressMonitor mon){
512                    this.mon = mon;
513            }
514    
515            
516    
517    }