001    /**
002     * Copyright (C) 2007-2008, Jens Lehmann
003     *
004     * This file is part of DL-Learner.
005     * 
006     * DL-Learner is free software; you can redistribute it and/or modify
007     * it under the terms of the GNU General Public License as published by
008     * the Free Software Foundation; either version 3 of the License, or
009     * (at your option) any later version.
010     *
011     * DL-Learner is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014     * GNU General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018     *
019     */
020    package org.dllearner.scripts.evaluation;
021    
022    import java.io.File;
023    import java.io.IOException;
024    import java.text.DecimalFormat;
025    import java.text.SimpleDateFormat;
026    import java.util.Date;
027    import java.util.List;
028    import java.util.Random;
029    
030    import org.dllearner.algorithms.el.ELDescriptionTree;
031    import org.dllearner.core.ComponentInitException;
032    import org.dllearner.core.ComponentManager;
033    import org.dllearner.core.KnowledgeSource;
034    import org.dllearner.core.ReasonerComponent;
035    import org.dllearner.core.owl.Thing;
036    import org.dllearner.kb.OWLFile;
037    import org.dllearner.reasoning.OWLAPIReasoner;
038    import org.dllearner.refinementoperators.ELDown2;
039    import org.dllearner.utilities.Files;
040    import org.dllearner.utilities.statistics.Stat;
041    
042    import com.jamonapi.Monitor;
043    import com.jamonapi.MonitorFactory;
044    
045    /**
046     * An evaluation of the EL refinement operator {@link ELDown2}. It creates
047     * a set of artificial ontologies with varying complexity and performs
048     * refinement steps on them.
049     * 
050     * @author Jens Lehmann
051     *
052     */
053    public class ELOperatorBenchmark {
054    
055            private static Random rand = new Random(1);
056            private static DecimalFormat df = new DecimalFormat();
057            
058            public static void main(String[] args) throws ComponentInitException, IOException {
059                    
060    //              Logger logger = Logger.getRootLogger();
061    //              logger.setLevel(Level.TRACE);
062    //              SimpleLayout layout = new SimpleLayout();
063    //              FileAppender app = new FileAppender(layout, "log/el/log.txt", false);
064    //              logger.removeAllAppenders();
065    //              logger.addAppender(app);
066                    
067                    // create a directory for log files
068                    Date dt = new Date();
069                    SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd_HH:mm:ss");
070                    String statDir = "log/el/" + df.format(dt) + "/";
071                    new File(statDir).mkdir();              
072                    
073                    // single ontology test
074    //              String example = "/home/jl/promotion/ontologien/galen2.owl";
075    //              for(int i=10; i<17; i++) {
076    //                      rand = new Random(1);
077    //                      testOntology(statDir, example, 100, i);
078    //              }
079                    
080                    // real world ontology tests //
081                    String base = "/home/jl/promotion/ontologien/el_benchmark/";
082                    String[] onts = new String[] {
083                                    "go",
084                                    // is ok at length 10
085                                    "galen2", 
086                                    // is ok at length 8 
087                                    "cton", 
088                                    // is ok at length 8 
089                                    "earthrealm2", 
090                                    // is ok at length 8 
091                                    "process", 
092                                    // is ok at length 8 
093                                    "tambis", 
094                                    // ontology file seems broken "thesaurus", 
095                                    // is ok at length 8 
096                                    "transportation",
097                                    // takes too long to read in 
098                                    "pto",
099                                    // ontology file seems broken "iso_19115", 
100                                    // ontology file seems broken "nci",    
101                                    // cannot even be read 
102                                    "fma_owl_dl_component_1.4.0"                            
103                                    };
104    
105                    for(String ont : onts) {
106                            String file = base + ont + ".owl";
107                            rand = new Random(1);
108                            testOntology(statDir, file, 100, 7);
109                    }
110                    
111                    // artificial ontology tests //
112                    /*
113                    // number of concepts and roles
114                    int[] conceptCounts = new int[] { 5, 10, 50, 100 }; //, 500, 1000 };
115                    int[] roleCounts = new int[] { 5, 10, 50, 100, 500, 1000};
116                    String base = "/home/jl/downloads/uni-leipzig/OTAGen-v1/generated/generated_";
117                    
118                    // loop through all artificial ontologies
119                    for(int conceptCount : conceptCounts) {
120                            for(int roleCount : roleCounts) {
121                                    String file = base + "c" + conceptCount + "_r" + roleCount + ".owl";
122                                    rand = new Random(1);
123                                    testOntology(statDir, file, 100, 7);
124                            }
125                    }
126                    
127                    System.exit(0);
128                    */
129                    /*
130                    
131                    // number of applications of operator
132                    int opApplications = 10;
133                    
134                    // statistics directory
135                    String statDir = "/log/stat/el/";
136                    String statFile = statDir + "stats.txt";
137                    String gnuPlotApplicationTimeFile = statDir + "application.gp";
138                    String gnuPlotRefinementTimeFile = statDir + "refinement.gp";
139                    boolean writeOntologies = true;
140                    String ontologyDir = "/log/stat/el/ontologies/";
141                    
142                    
143                    
144                    for(int conceptCount : conceptCounts) {
145                            for(int roleCount : roleCounts) {
146                                    // code for ontology creation
147                                    KB kb = new KB();
148                                    
149                                    // create class hierarchy (concept 0 is owl:Thing)
150                                    for(int i=1; i<=conceptCount; i++) {
151                                            // create class
152                                            NamedClass nc = new NamedClass("a" + i);
153                                            // pick an existing class as super class
154                                            int j = (i == 0) ? 0 : rand.nextInt(i);
155                                            Description superClass;
156                                            if(j==0) {
157                                                    superClass = Thing.instance;
158                                            } else {
159                                                    superClass = new NamedClass("a" + j);
160                                            }
161                                            kb.addAxiom(new SubClassAxiom(nc, superClass));
162                                            // disjointness with siblings
163                                    }
164                                    
165                                    
166                                    // save ontology
167                                    File f = new File(ontologyDir + "c" + conceptCount + "r" + roleCount + ".owl");
168                                    kb.export(f, OntologyFormat.RDF_XML);
169                                    
170    
171                            }
172                    }
173                    */
174    //              ELDown2 operator = new ELDown2();
175            }
176            
177            private static void testOntology(String statDir, String ont, int nrOfChains, int chainLength) throws ComponentInitException, IOException {
178                    System.out.print("Reading in " + ont + " ... ");
179                    ComponentManager cm = ComponentManager.getInstance();
180                    // reading ontology into a reasoner
181                    KnowledgeSource source = cm.knowledgeSource(OWLFile.class);
182                    File ontFile = new File(ont);
183                    cm.applyConfigEntry(source, "url", ontFile.toURI().toURL());
184                    source.init();
185                    ReasonerComponent reasoner = cm.reasoner(OWLAPIReasoner.class, source);
186                    reasoner.init();
187                    System.out.println("done.");
188                    System.out.println();
189                    
190                    // log file name
191                    String name = ontFile.getName();
192                    String statFileName = name.substring(0, name.lastIndexOf(".")) + "_" + chainLength + ".txt";
193                    File statFile = new File(statDir + statFileName);
194                    
195                    String statString = "";
196                    int refinementMaxSizeOverall = 0;
197                    MonitorFactory.reset();
198                    for(int loop = 0; loop < nrOfChains; loop++) {
199                    
200                            // application of operator and statistics recording     
201                            ELDescriptionTree currTree = new ELDescriptionTree(reasoner, Thing.instance);
202                            ELDown2 operator = new ELDown2(reasoner);
203                            Stat runtime = new Stat();
204                            Stat runtimePerRefinement = new Stat();
205                            
206                            System.out.println("Testing operator (applying it " + chainLength + " times):");
207                            for(int i=0; i < chainLength; i++) {
208    //                              System.out.println(currTree.transformToDescription().toKBSyntaxString());
209                                    System.out.print("current concept: " + currTree.transformToDescription().toString(reasoner.getBaseURI(), reasoner.getPrefixes()));
210                                    // apply operator on current description
211                                    long start = System.nanoTime();
212                                    List<ELDescriptionTree> refinements = operator.refine(currTree);
213                                    long time = System.nanoTime() - start;
214                                    runtime.addNumber(time/1000000d);
215                                    runtimePerRefinement.addNumber(time/1000000d/refinements.size());
216                                    MonitorFactory.add("operator application time", "ms.", time/1000000d);
217                                    MonitorFactory.add("operator application time per refinement", "ms.", time/1000000d/refinements.size());
218                                    MonitorFactory.add("refinement count", "count", refinements.size());
219                                    
220                                    int sizeSum = 0;
221                                    for(ELDescriptionTree tree : refinements) {
222    //                                      System.out.println("   " + tree.toDescriptionString());
223                                            int size = tree.getSize();
224                                            sizeSum += size;
225                                            refinementMaxSizeOverall = Math.max(size, refinementMaxSizeOverall);
226                                    }
227                                    
228                                    MonitorFactory.add("refinement size", "count", sizeSum/(double)refinements.size());
229                                    MonitorFactory.add("refinement size increase", "count", (sizeSum-refinements.size()*currTree.getSize())/(double)refinements.size());
230                                    
231                                    System.out.println("  [has " + refinements.size() + " refinements]");
232                                    
233                                    // pick a refinement randomly - this has the disadvantage that we have huge
234                                    // variations over different runs
235                                    int index = rand.nextInt(refinements.size());
236                                    currTree = refinements.get(index);
237                                    
238                                    // we pick a/the median of the refinements as next refinement
239    //                              ELDescriptionTreeComparator treeComp = new ELDescriptionTreeComparator();
240    //                              TreeSet<ELDescriptionTree> refinementsSet = new TreeSet<ELDescriptionTree>(treeComp);
241    //                              refinementsSet.addAll(refinements);
242    //                              List<ELDescriptionTree> refinementList = new LinkedList<ELDescriptionTree>(refinements);
243                                    // sort by size (first criterion of comparator)
244    //                              Collections.sort(refinementList, treeComp);
245    //                              currTree = refinementList.get((int)(refinementList.size()*0.5));
246    //                              System.out.println(rand.nextGaussian());
247    //                              currTree = refinementList.get((int)(refinementList.size()*rand.nextGaussian()));                                
248                                    
249                                    MonitorFactory.add("picked refinement size", "count", currTree.getSize());
250                            }
251                            System.out.println("operator time: " + runtime.prettyPrint("ms"));
252                            System.out.println("operator time per refinement: " + runtimePerRefinement.prettyPrint("ms"));
253                            System.out.println();
254                            
255                    }
256                    
257                    statString += "file: " + name + "\n";
258                    statString += "nr of refinement chains: " + nrOfChains + "\n";
259                    statString += "refinement chain length: " + chainLength + "\n\n";
260                    
261                    statString += getMonitorData(MonitorFactory.getMonitor("operator application time", "ms."));
262                    statString += getMonitorData(MonitorFactory.getMonitor("operator application time per refinement", "ms."));
263                    statString += "\n";
264            
265                    statString += getMonitorDataCount(MonitorFactory.getMonitor("refinement count", "count"));              
266                    statString += getMonitorDataCount(MonitorFactory.getMonitor("refinement size", "count"));
267                    statString += "refinement max size overall: " + refinementMaxSizeOverall + "\n";
268                    statString += getMonitorDataCount(MonitorFactory.getMonitor("picked refinement size", "count"));
269                    statString += getMonitorDataCount(MonitorFactory.getMonitor("refinement size increase", "count"));
270                    statString += "\n";
271                    
272    //              statString += getMonitorData(MonitorFactory.getMonitor("extend label", "ms."));
273    //              statString += getMonitorData(MonitorFactory.getMonitor("refine label", "ms."));
274    //              statString += getMonitorData(MonitorFactory.getMonitor("refine edge", "ms."));
275    //              statString += getMonitorData(MonitorFactory.getMonitor("attach tree", "ms."));
276    //              statString += getMonitorData(MonitorFactory.getMonitor("as.merge trees", "ms."));
277    //              statString += getMonitorData(MonitorFactory.getMonitor("as.complex check", "ms."));
278    //              statString += getMonitorData(MonitorFactory.getMonitor("as.tmp", "ms."));
279    //              statString += getMonitorData(MonitorFactory.getMonitor("el.tmp", "ms."));
280    //              statString += getMonitorDataBoolean(MonitorFactory.getMonitor("as.minimal", "boolean"));
281    //              statString += getMonitorDataBoolean(MonitorFactory.getMonitor("as.check", "boolean"));          
282    //              statString += getMonitorData(MonitorFactory.getMonitor("tree clone", "ms."));
283    //              statString += getMonitorData(MonitorFactory.getMonitor("simulation update", "ms."));
284                    statString += getMonitorData(MonitorFactory.getMonitor("disjointness reasoning", "ms."));
285                    double reasoningPercentage = 100 * MonitorFactory.getMonitor("disjointness reasoning", "ms.").getTotal()/MonitorFactory.getMonitor("operator application time", "ms.").getTotal();
286                    statString += "disjointness reasoning percentage: " + df.format(reasoningPercentage) + "%\n";
287                    
288                    Files.createFile(statFile, statString);
289                    
290                    reasoner.releaseKB();
291                    cm.freeAllComponents();
292            }
293            
294            private static String getMonitorData(Monitor mon) {
295                    return mon.getLabel() + ": av " + df.format(mon.getAvg()) + "ms  (stddev " + df.format(mon.getStdDev()) + "ms,  min " + df.format(mon.getMin()) +  "ms, max " + df.format(mon.getMax()) + "ms, " +  df.format(mon.getTotal()/1000) + "s total, " + (int)mon.getHits() + " hits)\n";
296            }
297            
298            private static String getMonitorDataCount(Monitor mon) {
299                    return mon.getLabel() + ": av " + df.format(mon.getAvg()) + " (stddev " + df.format(mon.getStdDev()) + ",  min " + df.format(mon.getMin()) +  ", max " + df.format(mon.getMax()) + ", " +  df.format(mon.getTotal()) + " total, " + (int)mon.getHits() + " hits)\n";            
300            }
301            
302            @SuppressWarnings("unused")
303            private static String getMonitorDataBoolean(Monitor mon) {
304                    return mon.getLabel() + ": " + df.format(mon.getAvg()*100) + "%\n";             
305            }       
306    }