001 /**
002 * Copyright (C) 2007-2011, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program. If not, see <http://www.gnu.org/licenses/>.
018 */
019
020 package org.dllearner.kb.extraction;
021
022 import java.util.ArrayList;
023 import java.util.List;
024 import java.util.SortedSet;
025 import java.util.TreeSet;
026
027 import org.apache.log4j.Logger;
028 import org.dllearner.kb.aquisitors.SparqlTupleAquisitorImproved;
029 import org.dllearner.kb.aquisitors.TupleAquisitor;
030 import org.dllearner.utilities.JamonMonitorLogger;
031 import org.dllearner.utilities.statistics.SimpleClock;
032
033 import com.jamonapi.Monitor;
034
035 /**
036 * This class is used to extract the information .
037 *
038 * @author Sebastian Hellmann
039 */
040 public class ExtractionAlgorithm {
041
042 private Configuration configuration;
043 private SortedSet<String> alreadyQueriedSuperClasses = new TreeSet<String>();
044 private boolean stop = false;
045
046
047 private static Logger logger = Logger
048 .getLogger(ExtractionAlgorithm.class);
049
050 public ExtractionAlgorithm(Configuration configuration) {
051 this.configuration = configuration;
052 }
053
054
055 public void stop(){
056 stop=true;
057 }
058
059 private boolean stopCondition(){
060 return stop;
061 }
062
063 void reset(){
064 stop = false;
065 }
066
067 private Node getFirstNode(String uri) {
068 return new InstanceNode(uri);
069 }
070
071 @SuppressWarnings("unused")
072 private List<Node> expandAll(String[] uris, TupleAquisitor tupelAquisitor) {
073 List<Node> nodeList = new ArrayList<Node>();
074 for (String oneURI : uris) {
075 nodeList.add(expandNode(oneURI, tupelAquisitor));
076 }
077
078 return nodeList;
079 }
080
081 /**
082 * most important function expands one example
083 * CAVE: the recursion is not a
084 * recursion anymore, it was transformed to an iteration
085 *
086 */
087 public Node expandNode(String uri, TupleAquisitor tupleAquisitor) {
088 SimpleClock sc = new SimpleClock();
089 if(tupleAquisitor instanceof SparqlTupleAquisitorImproved){
090 ((SparqlTupleAquisitorImproved)tupleAquisitor).removeFromCache(uri);
091 }
092
093 Node seedNode = getFirstNode(uri);
094 List<Node> newNodes = new ArrayList<Node>();
095 List<Node> collectNodes = new ArrayList<Node>();
096 List<Node> tmp = new ArrayList<Node>();
097
098
099 logger.info("Seed Node: "+seedNode);
100 newNodes.add(seedNode);
101
102
103 Monitor basic = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeBasicExtraction").start();
104 for (int x = 1; x <= configuration.getRecursiondepth(); x++) {
105
106 sc.reset();
107 while (!newNodes.isEmpty() && !stopCondition()) {
108 Node nextNode = newNodes.remove(0);
109 logger.info("Expanding " + nextNode);
110
111 // these are the new not expanded nodes
112 // the others are saved in connection with the original node
113 tupleAquisitor.setNextTaskToNormal();
114 tmp.addAll(nextNode.expand(tupleAquisitor,
115 configuration.getManipulator()));
116 //.out.println(tmpVec);
117
118 }
119 collectNodes.addAll(tmp);
120 newNodes.addAll(tmp);
121 tmp.clear();
122
123 logger.info("Recursion counter: " + x + " with " + newNodes.size()
124 + " Nodes remaining, " + sc.getAndSet(""));
125 }
126 basic.stop();
127
128 if(configuration.isCloseAfterRecursion()&& !stopCondition()){
129 Monitor m = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeCloseAfterRecursion").start();
130 List<InstanceNode> l = getInstanceNodes(newNodes);
131 logger.info("Getting classes for remaining instances: "+l.size() + " instances");
132 tupleAquisitor.setNextTaskToClassesForInstances();
133 collectNodes.addAll(expandCloseAfterRecursion(l, tupleAquisitor));
134 m.stop();
135 }
136 // gets All Class Nodes and expands them further
137 if (configuration.isGetAllSuperClasses()&& !stopCondition()) {
138 Monitor m = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeGetAllSuperClasses").start();
139 List<ClassNode> allClassNodes = getClassNodes(collectNodes);
140 tupleAquisitor.setNextTaskToClassInformation();
141 logger.info("Get all superclasses for "+allClassNodes.size() + " classes");
142 expandAllSuperClassesOfANode(allClassNodes, tupleAquisitor);
143 m.stop();
144 }
145
146
147 if(configuration.isGetPropertyInformation()&& !stopCondition() ){
148 collectNodes.add(seedNode);
149 Monitor m = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeGetPropertyInformation").start();
150 List<ObjectPropertyNode> objectProperties = getObjectPropertyNodes(collectNodes);
151 logger.info("Get info for "+objectProperties.size() + " objectProperties");
152 for (ObjectPropertyNode node : objectProperties) {
153 if(stopCondition()){
154 break;
155 }
156 collectNodes.addAll(node.expandProperties(tupleAquisitor, configuration.getManipulator(), configuration.isDissolveBlankNodes()));
157 }
158 List<DatatypePropertyNode> datatypeProperties = getDatatypeProperties(collectNodes);
159 logger.info("Get info for "+datatypeProperties.size() + " datatypeProperties");
160 for (DatatypePropertyNode node : datatypeProperties) {
161 if(stopCondition()){
162 break;
163 }
164 collectNodes.addAll(node.expandProperties(tupleAquisitor, configuration.getManipulator(), configuration.isDissolveBlankNodes()));
165 }
166 m.stop();
167 }
168
169 Monitor m = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeBlankNode").start();
170 if( configuration.isDissolveBlankNodes() && !stopCondition()){
171 expandBlankNodes(getBlankNodes(collectNodes),tupleAquisitor);
172 }
173 m.stop();
174
175
176 return seedNode;
177
178 }
179
180 private List<Node> expandBlankNodes(List<BlankNode> blankNodes, TupleAquisitor tupelAquisitor) {
181 List<Node> newNodes = new ArrayList<Node>();
182 while (!blankNodes.isEmpty()&& !stopCondition()) {
183 Node next = blankNodes.remove(0);
184 List<Node> l = next.expand(tupelAquisitor, configuration.getManipulator());
185 for (Node node : l) {
186 blankNodes.add((BlankNode) node);
187 }
188
189 }
190 return newNodes;
191 }
192
193
194 private List<Node> expandCloseAfterRecursion(List<InstanceNode> instanceNodes, TupleAquisitor tupelAquisitor) {
195
196 List<Node> newNodes = new ArrayList<Node>();
197 tupelAquisitor.setNextTaskToClassesForInstances();
198 while (!instanceNodes.isEmpty() && !stopCondition()) {
199 logger.trace("Getting classes for remaining instances: "
200 + instanceNodes.size());
201 Node next = instanceNodes.remove(0);
202 if(next.isExpanded()){
203 JamonMonitorLogger.increaseCount(this.getClass(), "skipped nodes");
204 continue;
205 }
206 logger.trace("Getting classes for: " + next);
207 newNodes.addAll(next.expand(tupelAquisitor, configuration.getManipulator()));
208 if (newNodes.size() >= configuration.getBreakSuperClassesAfter()) {
209 break;
210 }//endif
211 }//endwhile
212
213 return newNodes;
214 }
215
216 private void expandAllSuperClassesOfANode(List<ClassNode> allClassNodes, TupleAquisitor tupelAquisitor) {
217
218
219 List<Node> newClasses = new ArrayList<Node>();
220 newClasses.addAll(allClassNodes);
221 //TODO LinkedData incompatibility
222
223 int i = 0;
224
225 while (!newClasses.isEmpty() && !stopCondition()) {
226 logger.trace("Remaining classes: " + newClasses.size());
227 Node next = newClasses.remove(0);
228
229 logger.trace("Getting Superclasses for: " + next);
230
231 if (!alreadyQueriedSuperClasses.contains(next.getURIString().toString())) {
232 logger.trace("" + next+" not in cache retrieving");
233 alreadyQueriedSuperClasses.add(next.getURIString().toString());
234 tupelAquisitor.setNextTaskToClassInformation();
235
236 newClasses.addAll(next.expand(tupelAquisitor, configuration.getManipulator()));
237
238
239
240 if (i > configuration.getBreakSuperClassesAfter()) {
241 break;
242 }//endinnerif
243 i++;
244 }//endouterif
245 else {
246 logger.trace("" + next+" in mem cache skipping");
247 }
248
249 }//endwhile
250 if(!configuration.isOptimizeForDLLearner()){
251 alreadyQueriedSuperClasses.clear();
252 }
253
254 }
255
256 private static List<ClassNode> getClassNodes(List<Node> l ){
257 List<ClassNode> retList = new ArrayList<ClassNode>();
258 for (Node node : l) {
259 if (node instanceof ClassNode) {
260 retList.add( (ClassNode) node);
261
262 }
263
264 }
265 return retList;
266 }
267
268
269 private static List<InstanceNode> getInstanceNodes(List<Node> l ){
270 List<InstanceNode> retList = new ArrayList<InstanceNode>();
271 for (Node node : l) {
272 if (node instanceof InstanceNode) {
273 retList.add( (InstanceNode) node);
274
275 }
276
277 }
278 return retList;
279 }
280
281 private static List<BlankNode> getBlankNodes(List<Node> l ){
282 List<BlankNode> retList = new ArrayList<BlankNode>();
283 for (Node node : l) {
284 if (node instanceof BlankNode) {
285 retList.add( (BlankNode) node);
286
287 }
288
289 }
290 return retList;
291 }
292
293 private static List<ObjectPropertyNode> getObjectPropertyNodes(List<Node> l ){
294 List<ObjectPropertyNode> properties = new ArrayList<ObjectPropertyNode>();
295 for (Node node : l) {
296 if (node instanceof InstanceNode) {
297 properties.addAll(( (InstanceNode) node).getObjectProperties());
298
299 }
300
301 }
302 return properties;
303 }
304
305 private static List<DatatypePropertyNode> getDatatypeProperties(List<Node> l ){
306 List<DatatypePropertyNode> properties = new ArrayList<DatatypePropertyNode>();
307 for (Node node : l) {
308 if (node instanceof InstanceNode) {
309 properties.addAll(( (InstanceNode) node).getDatatypePropertyNode());
310 }
311
312 }
313 return properties;
314 }
315
316 }