001 /**
002 * Copyright (C) 2007-2011, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program. If not, see <http://www.gnu.org/licenses/>.
018 */
019
020 package org.dllearner.kb.sparql;
021
022 import java.io.File;
023 import java.net.URI;
024 import java.net.URL;
025 import java.util.ArrayList;
026 import java.util.Collection;
027 import java.util.LinkedList;
028 import java.util.List;
029 import java.util.TreeSet;
030
031 import javax.swing.ProgressMonitor;
032
033 import org.apache.log4j.Logger;
034 import org.dllearner.core.AbstractKnowledgeSource;
035 import org.dllearner.core.OntologyFormat;
036 import org.dllearner.core.OntologyFormatUnsupportedException;
037 import org.dllearner.core.configurators.SparqlKnowledgeSourceConfigurator;
038 import org.dllearner.core.options.BooleanConfigOption;
039 import org.dllearner.core.options.CommonConfigOptions;
040 import org.dllearner.core.options.ConfigEntry;
041 import org.dllearner.core.options.ConfigOption;
042 import org.dllearner.core.options.IntegerConfigOption;
043 import org.dllearner.core.options.InvalidConfigOptionValueException;
044 import org.dllearner.core.options.StringConfigOption;
045 import org.dllearner.core.options.StringSetConfigOption;
046 import org.dllearner.core.options.StringTupleListConfigOption;
047 import org.dllearner.core.options.URLConfigOption;
048 import org.dllearner.core.owl.KB;
049 import org.dllearner.kb.aquisitors.SparqlTupleAquisitor;
050 import org.dllearner.kb.aquisitors.SparqlTupleAquisitorImproved;
051 import org.dllearner.kb.aquisitors.TupleAquisitor;
052 import org.dllearner.kb.extraction.Configuration;
053 import org.dllearner.kb.extraction.Manager;
054 import org.dllearner.kb.extraction.Node;
055 import org.dllearner.kb.manipulator.Manipulator;
056 import org.dllearner.kb.manipulator.ObjectReplacementRule;
057 import org.dllearner.kb.manipulator.PredicateReplacementRule;
058 import org.dllearner.kb.manipulator.Rule.Months;
059 import org.dllearner.utilities.Files;
060 import org.dllearner.utilities.JamonMonitorLogger;
061 import org.dllearner.utilities.datastructures.StringTuple;
062 import org.dllearner.utilities.statistics.SimpleClock;
063 import org.semanticweb.owlapi.model.OWLOntology;
064
065 import com.jamonapi.Monitor;
066 import com.jamonapi.MonitorFactory;
067
068 /**
069 * Represents the SPARQL Endpoint Component.
070 *
071 * @author Jens Lehmann
072 * @author Sebastian Knappe
073 * @author Sebastian Hellmann
074 */
075 public class SparqlKnowledgeSource extends AbstractKnowledgeSource {
076
077 private ProgressMonitor mon;
078
079 private static final boolean debugExitAfterExtraction = false; // switches
080
081
082 private SparqlKnowledgeSourceConfigurator configurator;
083
084 /**
085 * @return the configurator for this Knowledgesource
086 */
087 @Override
088 public SparqlKnowledgeSourceConfigurator getConfigurator() {
089 return configurator;
090 }
091
092 public SparqlKnowledgeSource() {
093 this.configurator = new SparqlKnowledgeSourceConfigurator(this);
094 }
095
096 // these are saved for further reference
097 private URL url;
098 private SparqlEndpoint endpoint = null;
099
100 //private String format = "N-TRIPLES";
101 //private String format = "RDF/XML";
102
103 private URL ontologyFragmentURL;
104
105
106 private OWLOntology fragment;
107
108 private Manipulator manipulator = null;
109
110
111
112 // received ontology as array, used if format=Array(an element of the
113 // array consists of the subject, predicate and object separated by '<'
114 //private String[] ontArray;
115
116 // received ontology as KB, the internal format
117 //private KB kb;
118
119 // mainly used for statistic
120 private int nrOfExtractedAxioms = 0;
121
122
123 public static String getName() {
124 return "SPARQL Endpoint";
125 }
126
127 private static Logger logger = Logger
128 .getLogger(SparqlKnowledgeSource.class);
129
130 /**
131 * Specifies the configuration options for this knowledge source.
132 *
133 * @see org.dllearner.core.AbstractComponent#createConfigOptions()
134 * @return Options of this component.
135 */
136 public static Collection<ConfigOption<?>> createConfigOptions() {
137 Collection<ConfigOption<?>> options = new LinkedList<ConfigOption<?>>();
138 options.add(new URLConfigOption("url", "URL of SPARQL Endpoint",
139 null, true, true));
140 options.add(new StringConfigOption("cacheDir", "dir of cache", "cache",
141 false, true));
142 options.add(new BooleanConfigOption("useCache",
143 "If true a Cache is used", true, false, true));
144 options.add(new BooleanConfigOption("useCacheDatabase", "If true, H2 database is used, otherwise one file per query is written.", false));
145 options
146 .add(new StringSetConfigOption(
147 "instances",
148 "relevant instances e.g. positive and negative examples in a learning problem",
149 null, true, true));
150 options.add(new IntegerConfigOption("recursionDepth",
151 "recursion depth of KB fragment selection", 1, false, true));
152 options
153 .add(new StringConfigOption(
154 "predefinedFilter",
155 "the mode of the SPARQL Filter, use one of YAGO,SKOS,YAGOSKOS , YAGOSPECIALHIERARCHY, TEST",
156 null, false, true));
157 options
158 .add(new StringConfigOption(
159 "predefinedEndpoint",
160 "the mode of the SPARQL Filter, use one of DBPEDIA, LOCAL, GOVTRACK, REVYU, MYOPENLINK, FACTBOOK",
161 null, false, true));
162 options
163 .add(new StringConfigOption(
164 "predefinedManipulator",
165 "the mode of the Manipulator, use one of STANDARD, DBPEDIA-NAVIGATOR",
166 null, false, true));
167 options.add(new StringSetConfigOption("predList",
168 "list of all ignored roles", new TreeSet<String>(), false, true));
169 options.add(new StringSetConfigOption("objList",
170 "list of all ignored objects", new TreeSet<String>(), false, true));
171 options
172 .add(new BooleanConfigOption(
173 "saveExtractedFragment",
174 "Specifies whether the extracted ontology is written to a file or not. " +
175 "The OWL file is written to the cache dir." +
176 "Some DBpedia URI will make the XML invalid",
177 false, false, true));
178 options.add(new StringTupleListConfigOption("replacePredicate",
179 "rule for replacing predicates", new ArrayList<StringTuple>(), false, true));
180 options.add(new StringTupleListConfigOption("replaceObject",
181 "rule for replacing predicates", new ArrayList<StringTuple>(), false, true));
182 options.add(new IntegerConfigOption("breakSuperClassRetrievalAfter",
183 "stops a cyclic hierarchy after specified number of classes",
184 1000, false, true));
185
186 options.add(new BooleanConfigOption("useLits",
187 "use Literals in SPARQL query", true, false, true));
188 options
189 .add(new BooleanConfigOption(
190 "getAllSuperClasses",
191 "If true then all superclasses are retrieved until the most general class (owl:Thing) is reached.",
192 true, false, true));
193 options.add(new BooleanConfigOption("closeAfterRecursion",
194 "gets all classes for all instances", true, false, true));
195 options.add(new BooleanConfigOption("getPropertyInformation",
196 "gets all types for extracted ObjectProperties", false, false,
197 true));
198 options.add(new BooleanConfigOption("dissolveBlankNodes",
199 "determines whether Blanknodes are dissolved. This is a costly function.", true, false,
200 true));
201 options.add(new BooleanConfigOption("useImprovedSparqlTupelAquisitor",
202 "uses deeply nested SparqlQueries, according to recursion depth, still EXPERIMENTAL", false, false,
203 true));
204 options.add(CommonConfigOptions.getVerbosityOption());
205
206 options.add(new StringSetConfigOption("defaultGraphURIs",
207 "a list of all default Graph URIs", new TreeSet<String>(), false, true));
208 options.add(new StringSetConfigOption("namedGraphURIs",
209 "a list of all named Graph URIs", new TreeSet<String>(), false, true));
210 return options;
211 }
212
213 /*
214 * @see org.dllearner.core.Component#applyConfigEntry(org.dllearner.core.ConfigEntry)
215 */
216 @Override
217 public <T> void applyConfigEntry(ConfigEntry<T> entry)
218 throws InvalidConfigOptionValueException {
219 //TODO remove this function
220
221 }
222
223 /*
224 * (non-Javadoc)
225 *
226 * @see org.dllearner.core.Component#init()
227 */
228 @Override
229 public void init() {
230 logger.info("SparqlModul: Collecting Ontology");
231 SimpleClock totalTime = new SimpleClock();
232 //SimpleClock extractionTime = new SimpleClock();
233 if(mon != null){
234 mon.setNote("Collecting Ontology");
235 }
236 logger.trace(getURL());
237 logger.trace(getSparqlEndpoint());
238 logger.trace(configurator.getInstances());
239 Manager m = new Manager();
240 m.addProgressMonitor(mon);
241
242 // get Options for Manipulator
243 Manipulator manipulator = getManipulator();
244
245 TupleAquisitor tupleAquisitor = getTupleAquisitor();
246
247 Configuration configuration = new Configuration(tupleAquisitor,
248 manipulator, configurator.getRecursionDepth(), configurator
249 .getGetAllSuperClasses(), configurator
250 .getCloseAfterRecursion(), configurator
251 .getGetPropertyInformation(), configurator
252 .getBreakSuperClassRetrievalAfter(),
253 configurator.getDissolveBlankNodes());
254
255 // give everything to the manager
256 m.useConfiguration(configuration);
257
258 //String ont = "";
259 try {
260
261 // the actual extraction is started here
262 Monitor extractionTime = JamonMonitorLogger.getTimeMonitor(SparqlKnowledgeSource.class, "total extraction time").start();
263 List<Node> seedNodes=new ArrayList<Node>();
264
265 //if(!threaded){
266 seedNodes = m.extract(configurator.getInstances());
267 /*}else{
268 int maxPoolSize = configurator.getInstances().size();
269 ThreadPoolExecutor ex = new ThreadPoolExecutor(5,maxPoolSize,1,TimeUnit.SECONDS,new ArrayBlockingQueue<Runnable>(100));
270 List<FutureTask<Node>> tasks = new ArrayList<FutureTask<Node>>();
271
272 for (String uri : configurator.getInstances()) {
273
274 ExtractOneInstance e = new ExtractOneInstance(m,uri);
275
276 FutureTask<Node> ft = new FutureTask<Node>(e);
277 ex.submit(ft);
278 tasks.add(ft);
279 //System.out.println(f.get());
280 //seedNodes.add(f.get());
281 //System.out.println("finished FutureTask "+seedNodes.size());
282 }
283 for(FutureTask<Node> ft : tasks){
284 //System.out.println(ft.get());
285 //System.out.println("aaa");
286 seedNodes.add(ft.get());
287
288 }
289 }*/
290 extractionTime.stop();
291
292
293 fragment = m.getOWLAPIOntologyForNodes(seedNodes, configurator.getSaveExtractedFragment());
294
295
296 logger.info("Finished collecting fragment. needed "+extractionTime.getLastValue()+" ms");
297
298 ontologyFragmentURL = m.getPhysicalOntologyURL();
299
300 nrOfExtractedAxioms = configuration.getOwlAPIOntologyCollector().getNrOfExtractedAxioms();
301
302
303 } catch (Exception e) {
304 e.printStackTrace();
305 }
306 //nrOfExtractedTriples = m.getNrOfExtractedTriples();
307 logger.info("SparqlModul: ****Finished " + totalTime.getAndSet(""));
308 if (debugExitAfterExtraction) {
309
310 File jamonlog = new File("log/jamon.html");
311 Files.createFile(jamonlog, MonitorFactory.getReport());
312 Files.appendFile(jamonlog, "<xmp>\n"
313 + JamonMonitorLogger.getStringForAllSortedByLabel());
314 System.exit(0);
315 }
316 }
317
318 public List<Node> extractParallel(){
319 return null;
320 }
321
322 /*private class ExtractOneInstance implements Callable{
323 Manager m;
324 Node n;
325 String uri;
326
327 private ExtractOneInstance(Manager m, String uri){
328 super();
329 this.m = m;
330 this.uri = uri;
331 }
332
333
334
335 public Node call(){
336 System.out.println("funky");
337 return m.extractOneURI(uri);
338 }
339 }*/
340
341 /*
342 * (non-Javadoc)
343 *
344 * @see org.dllearner.core.KnowledgeSource#toDIG()
345 */
346 @Override
347 public String toDIG(URI kbURI) {
348 throw new RuntimeException("Inside Dig Converter - this doesn't work in our custom version as we have upgraded to jena 2.6.2 which doesn't support DIG");
349 // return JenaOWLDIGConverter.getTellsString(ontologyFragmentURL,
350 // OntologyFormat.RDF_XML, kbURI);
351 }
352
353 /*
354 * (non-Javadoc)
355 *
356 * @see org.dllearner.core.KnowledgeSource#export(java.io.File,
357 * org.dllearner.core.OntologyFormat)
358 */
359 @Override
360 public void export(File file, OntologyFormat format)
361 throws OntologyFormatUnsupportedException {
362 // currently no export functions implemented, so we just throw an
363 // exception
364 throw new OntologyFormatUnsupportedException("export", format);
365 }
366
367 /**
368 * @return the URL of the used sparql endpoint
369 */
370 public URL getURL() {
371 if(endpoint == null){
372 if(url==null){
373 if(configurator.getPredefinedEndpoint() == null){
374 url = configurator.getUrl();
375 return url;
376 }else{
377 return getSparqlEndpoint().getURL();
378 }
379
380 }else{
381 return url;
382 }
383 }else {
384 return endpoint.getURL();
385 }
386
387 }
388
389
390 public SparqlQuery sparqlQuery(String query) {
391 return new SparqlQuery(query, getSparqlEndpoint());
392 }
393
394
395 public SparqlEndpoint getSparqlEndpoint(){
396 if(endpoint==null) {
397 if (configurator.getPredefinedEndpoint() == null) {
398 endpoint = new SparqlEndpoint(getURL(), new LinkedList<String>(
399 configurator.getDefaultGraphURIs()),
400 new LinkedList<String>(configurator.getNamedGraphURIs()));
401 } else {
402 endpoint = SparqlEndpoint.getEndpointByName(configurator
403 .getPredefinedEndpoint());
404 // System.out.println(endpoint);
405
406 }
407 }
408 return endpoint;
409
410 }
411
412 public SPARQLTasks getSPARQLTasks() {
413
414 // get Options for endpoints
415
416 if (configurator.getUseCache()){
417 return new SPARQLTasks(new Cache(configurator.getCacheDir(), configurator.getUseCacheDatabase()),
418 getSparqlEndpoint());
419 }else {
420 return new SPARQLTasks(getSparqlEndpoint());
421 }
422 }
423
424 public SparqlQueryMaker getSparqlQueryMaker() {
425 // get Options for Filters
426 if (configurator.getPredefinedFilter() == null) {
427 return new SparqlQueryMaker("forbid", configurator.getObjList(),
428 configurator.getPredList(), configurator.getUseLits());
429
430 } else {
431
432 return SparqlQueryMaker.getSparqlQueryMakerByName(configurator
433 .getPredefinedFilter());
434 }
435
436 }
437
438 public Manipulator getManipulator() {
439
440 if(this.manipulator!=null){
441 return this.manipulator;
442 }
443
444 // get Options for Filters
445 if (configurator.getPredefinedManipulator() != null) {
446 return Manipulator.getManipulatorByName(configurator
447 .getPredefinedManipulator());
448
449 } else {
450 Manipulator m = Manipulator.getDefaultManipulator();
451 for (StringTuple st : configurator.getReplacePredicate()) {
452 m.addRule(new PredicateReplacementRule(Months.MAY, st.a, st.b));
453 }
454 for (StringTuple st : configurator.getReplaceObject()) {
455 m.addRule(new ObjectReplacementRule(Months.MAY, st.a, st.b));
456 }
457 return m;
458 }
459
460 }
461
462 public void setManipulator(Manipulator m ){
463 this.manipulator = m;
464
465 }
466
467 public TupleAquisitor getTupleAquisitor() {
468 TupleAquisitor ret = null;
469 if (configurator.getUseImprovedSparqlTupelAquisitor()) {
470 ret = new SparqlTupleAquisitorImproved(getSparqlQueryMaker(),
471 getSPARQLTasks(), configurator.getRecursionDepth());
472 } else {
473 ret = new SparqlTupleAquisitor(getSparqlQueryMaker(),
474 getSPARQLTasks());
475 }
476 return ret;
477
478 }
479
480 /*
481 * (non-Javadoc)
482 *
483 * @see org.dllearner.core.KnowledgeSource#toKB()
484 */
485 @Override
486 public KB toKB() {
487 // TODO Does this work?
488 return new KB();
489 }
490
491 public URL getOntologyFragmentURL() {
492 return ontologyFragmentURL;
493 }
494
495 public OWLOntology getOWLAPIOntology() {
496 return fragment;
497 }
498
499 public boolean isUseCache() {
500 return configurator.getUseCache();
501 }
502
503 public String getCacheDir() {
504 return configurator.getCacheDir();
505 }
506
507 public int getNrOfExtractedAxioms() {
508 return nrOfExtractedAxioms;
509 }
510
511 public void addProgressMonitor(ProgressMonitor mon){
512 this.mon = mon;
513 }
514
515
516
517 }