001    /**
002     * Copyright (C) 2007-2011, Jens Lehmann
003     *
004     * This file is part of DL-Learner.
005     *
006     * DL-Learner is free software; you can redistribute it and/or modify
007     * it under the terms of the GNU General Public License as published by
008     * the Free Software Foundation; either version 3 of the License, or
009     * (at your option) any later version.
010     *
011     * DL-Learner is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014     * GNU General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018     */
019    
020    package org.dllearner.kb.extraction;
021    
022    import java.util.ArrayList;
023    import java.util.List;
024    import java.util.SortedSet;
025    import java.util.TreeSet;
026    
027    import org.apache.log4j.Logger;
028    import org.dllearner.kb.aquisitors.SparqlTupleAquisitorImproved;
029    import org.dllearner.kb.aquisitors.TupleAquisitor;
030    import org.dllearner.utilities.JamonMonitorLogger;
031    import org.dllearner.utilities.statistics.SimpleClock;
032    
033    import com.jamonapi.Monitor;
034    
035    /**
036     * This class is used to extract the information .
037     * 
038     * @author Sebastian Hellmann
039     */
040    public class ExtractionAlgorithm {
041    
042            private Configuration configuration;
043            private SortedSet<String> alreadyQueriedSuperClasses = new TreeSet<String>();
044            private boolean stop = false;
045    
046            
047            private static Logger logger = Logger
048                    .getLogger(ExtractionAlgorithm.class);
049    
050            public ExtractionAlgorithm(Configuration configuration) {
051                    this.configuration = configuration;
052            }
053    
054            
055            public void stop(){
056                    stop=true;
057            }
058            
059            private boolean stopCondition(){
060                    return stop;
061            }
062            
063            void reset(){
064                    stop = false;
065            }
066            
067            private Node getFirstNode(String uri) {
068                    return new InstanceNode(uri);
069            }
070    
071            @SuppressWarnings("unused")
072            private List<Node> expandAll(String[] uris, TupleAquisitor tupelAquisitor) {
073                    List<Node> nodeList = new ArrayList<Node>();
074                    for (String oneURI : uris) {
075                            nodeList.add(expandNode(oneURI, tupelAquisitor));
076                    }
077                    
078                    return nodeList;
079            }
080    
081            /**
082             * most important function expands one example 
083             * CAVE: the recursion is not a
084             * recursion anymore, it was transformed to an iteration
085             *
086             */
087            public Node expandNode(String uri, TupleAquisitor tupleAquisitor) {
088                    SimpleClock sc = new SimpleClock();
089                    if(tupleAquisitor instanceof SparqlTupleAquisitorImproved){
090                            ((SparqlTupleAquisitorImproved)tupleAquisitor).removeFromCache(uri);
091                    }
092                    
093                    Node seedNode = getFirstNode(uri);
094                    List<Node> newNodes = new ArrayList<Node>();
095                    List<Node> collectNodes = new ArrayList<Node>();
096                    List<Node> tmp = new ArrayList<Node>();
097                    
098                    
099                    logger.info("Seed Node: "+seedNode);
100                    newNodes.add(seedNode);
101                    
102    
103                    Monitor basic = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeBasicExtraction").start();
104                    for (int x = 1; x <= configuration.getRecursiondepth(); x++) {
105                            
106                            sc.reset();
107                            while (!newNodes.isEmpty() && !stopCondition()) {
108                                    Node nextNode = newNodes.remove(0);
109                                    logger.info("Expanding " + nextNode);
110                                    
111                                    // these are the new not expanded nodes
112                                    // the others are saved in connection with the original node
113                                    tupleAquisitor.setNextTaskToNormal();
114                                    tmp.addAll(nextNode.expand(tupleAquisitor,
115                                                    configuration.getManipulator()));
116                                    //.out.println(tmpVec);
117                                    
118                            }
119                            collectNodes.addAll(tmp);
120                            newNodes.addAll(tmp);
121                            tmp.clear();
122                            
123                            logger.info("Recursion counter: " + x + " with " + newNodes.size()
124                                            + " Nodes remaining, " + sc.getAndSet(""));
125                    }
126                    basic.stop();
127                    
128                    if(configuration.isCloseAfterRecursion()&& !stopCondition()){
129                            Monitor m = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeCloseAfterRecursion").start();
130                            List<InstanceNode> l = getInstanceNodes(newNodes);
131                            logger.info("Getting classes for remaining instances: "+l.size() + " instances");
132                            tupleAquisitor.setNextTaskToClassesForInstances();
133                            collectNodes.addAll(expandCloseAfterRecursion(l, tupleAquisitor));
134                            m.stop();
135                    }
136                    // gets All Class Nodes and expands them further
137                    if (configuration.isGetAllSuperClasses()&& !stopCondition()) {
138                            Monitor m = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeGetAllSuperClasses").start();
139                            List<ClassNode> allClassNodes = getClassNodes(collectNodes);
140                            tupleAquisitor.setNextTaskToClassInformation();
141                            logger.info("Get all superclasses for "+allClassNodes.size() + " classes");
142                            expandAllSuperClassesOfANode(allClassNodes, tupleAquisitor);
143                            m.stop();
144                    }
145                            
146                    
147                    if(configuration.isGetPropertyInformation()&& !stopCondition() ){
148                            collectNodes.add(seedNode);
149                            Monitor m = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeGetPropertyInformation").start();
150                            List<ObjectPropertyNode> objectProperties = getObjectPropertyNodes(collectNodes);
151                            logger.info("Get info for "+objectProperties.size() + " objectProperties");
152                            for (ObjectPropertyNode node : objectProperties) {
153                                    if(stopCondition()){
154                                            break;
155                                    }
156                                    collectNodes.addAll(node.expandProperties(tupleAquisitor, configuration.getManipulator(), configuration.isDissolveBlankNodes()));
157                            }
158                            List<DatatypePropertyNode> datatypeProperties = getDatatypeProperties(collectNodes);
159                            logger.info("Get info for "+datatypeProperties.size() + " datatypeProperties");
160                            for (DatatypePropertyNode node : datatypeProperties) {
161                                    if(stopCondition()){
162                                            break;
163                                    }
164                                    collectNodes.addAll(node.expandProperties(tupleAquisitor, configuration.getManipulator(), configuration.isDissolveBlankNodes()));
165                            }
166                            m.stop();
167                    }
168                    
169                    Monitor m = JamonMonitorLogger.getTimeMonitor(ExtractionAlgorithm.class, "TimeBlankNode").start();
170                    if( configuration.isDissolveBlankNodes() && !stopCondition()){
171                            expandBlankNodes(getBlankNodes(collectNodes),tupleAquisitor);
172                    }
173                    m.stop();
174                    
175            
176                    return seedNode;
177    
178            }
179            
180            private List<Node> expandBlankNodes(List<BlankNode> blankNodes, TupleAquisitor tupelAquisitor) {
181                    List<Node> newNodes = new ArrayList<Node>();
182                    while (!blankNodes.isEmpty()&& !stopCondition()) {
183                            Node next = blankNodes.remove(0);
184                            List<Node> l = next.expand(tupelAquisitor, configuration.getManipulator());
185                            for (Node node : l) {
186                                    blankNodes.add((BlankNode) node);
187                            }
188                            
189                    }
190                    return newNodes;
191            }
192                    
193            
194            private List<Node> expandCloseAfterRecursion(List<InstanceNode> instanceNodes, TupleAquisitor tupelAquisitor) {
195                    
196                    List<Node> newNodes = new ArrayList<Node>();
197                    tupelAquisitor.setNextTaskToClassesForInstances();
198                    while (!instanceNodes.isEmpty() && !stopCondition()) {
199                            logger.trace("Getting classes for remaining instances: "
200                                            + instanceNodes.size());
201                            Node next = instanceNodes.remove(0);
202                            if(next.isExpanded()){
203                                    JamonMonitorLogger.increaseCount(this.getClass(), "skipped nodes");
204                                    continue;
205                            }
206                            logger.trace("Getting classes for: " + next);
207                            newNodes.addAll(next.expand(tupelAquisitor, configuration.getManipulator()));
208                            if (newNodes.size() >= configuration.getBreakSuperClassesAfter()) {
209                                    break;
210                            }//endif
211                    }//endwhile
212                    
213                    return newNodes;
214            }
215            
216            private void expandAllSuperClassesOfANode(List<ClassNode> allClassNodes, TupleAquisitor tupelAquisitor) {
217                    
218                    
219                    List<Node> newClasses = new ArrayList<Node>();
220                    newClasses.addAll(allClassNodes);
221                    //TODO LinkedData incompatibility
222                    
223                    int i = 0;
224                    
225                    while (!newClasses.isEmpty() && !stopCondition()) {
226                            logger.trace("Remaining classes: " + newClasses.size());
227                            Node next = newClasses.remove(0);
228                            
229                            logger.trace("Getting Superclasses for: " + next);
230                            
231                            if (!alreadyQueriedSuperClasses.contains(next.getURIString().toString())) {
232                                    logger.trace("" + next+" not in cache retrieving");
233                                    alreadyQueriedSuperClasses.add(next.getURIString().toString());
234                                    tupelAquisitor.setNextTaskToClassInformation();
235                                    
236                                    newClasses.addAll(next.expand(tupelAquisitor, configuration.getManipulator()));
237                                    
238                                    
239                                    
240                                    if (i > configuration.getBreakSuperClassesAfter()) {
241                                            break;
242                                    }//endinnerif
243                                    i++;
244                            }//endouterif
245                            else {
246                                    logger.trace("" + next+"  in mem cache skipping");
247                            }
248    
249                    }//endwhile
250                    if(!configuration.isOptimizeForDLLearner()){
251                            alreadyQueriedSuperClasses.clear();
252                    }
253    
254            }
255            
256            private static List<ClassNode> getClassNodes(List<Node> l ){
257                    List<ClassNode> retList = new ArrayList<ClassNode>();
258                    for (Node node : l) {
259                            if (node instanceof ClassNode) {
260                                    retList.add( (ClassNode) node);
261                                    
262                            }
263                            
264                    }
265                    return retList;
266            }
267            
268    
269            private static List<InstanceNode> getInstanceNodes(List<Node> l ){
270                    List<InstanceNode> retList = new ArrayList<InstanceNode>();
271                    for (Node node : l) {
272                            if (node instanceof InstanceNode) {
273                                    retList.add( (InstanceNode) node);
274                                    
275                            }
276                            
277                    }
278                    return retList;
279            }
280            
281            private static List<BlankNode> getBlankNodes(List<Node> l ){
282                    List<BlankNode> retList = new ArrayList<BlankNode>();
283                    for (Node node : l) {
284                            if (node instanceof BlankNode) {
285                                    retList.add( (BlankNode) node);
286                                    
287                            }
288                            
289                    }
290                    return retList;
291            }
292            
293            private static List<ObjectPropertyNode> getObjectPropertyNodes(List<Node> l ){
294                    List<ObjectPropertyNode> properties = new ArrayList<ObjectPropertyNode>();
295                    for (Node node : l) {
296                            if (node instanceof InstanceNode) {
297                                    properties.addAll(( (InstanceNode) node).getObjectProperties());
298                                    
299                            }
300                            
301                    }
302                    return properties;
303            }
304            
305            private static List<DatatypePropertyNode> getDatatypeProperties(List<Node> l ){
306                    List<DatatypePropertyNode> properties = new ArrayList<DatatypePropertyNode>();
307                    for (Node node : l) {
308                            if (node instanceof InstanceNode) {
309                                    properties.addAll(( (InstanceNode) node).getDatatypePropertyNode());
310                            }
311                            
312                    }
313                    return properties;
314            }
315    
316    }