001 /**
002 * Copyright (C) 2007-2008, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program. If not, see <http://www.gnu.org/licenses/>.
018 *
019 */
020 package org.dllearner.scripts.evaluation;
021
022 import java.io.File;
023 import java.io.IOException;
024 import java.text.DecimalFormat;
025 import java.text.SimpleDateFormat;
026 import java.util.Date;
027 import java.util.List;
028 import java.util.Random;
029
030 import org.dllearner.algorithms.el.ELDescriptionTree;
031 import org.dllearner.core.ComponentInitException;
032 import org.dllearner.core.ComponentManager;
033 import org.dllearner.core.KnowledgeSource;
034 import org.dllearner.core.ReasonerComponent;
035 import org.dllearner.core.owl.Thing;
036 import org.dllearner.kb.OWLFile;
037 import org.dllearner.reasoning.OWLAPIReasoner;
038 import org.dllearner.refinementoperators.ELDown2;
039 import org.dllearner.utilities.Files;
040 import org.dllearner.utilities.statistics.Stat;
041
042 import com.jamonapi.Monitor;
043 import com.jamonapi.MonitorFactory;
044
045 /**
046 * An evaluation of the EL refinement operator {@link ELDown2}. It creates
047 * a set of artificial ontologies with varying complexity and performs
048 * refinement steps on them.
049 *
050 * @author Jens Lehmann
051 *
052 */
053 public class ELOperatorBenchmark {
054
055 private static Random rand = new Random(1);
056 private static DecimalFormat df = new DecimalFormat();
057
058 public static void main(String[] args) throws ComponentInitException, IOException {
059
060 // Logger logger = Logger.getRootLogger();
061 // logger.setLevel(Level.TRACE);
062 // SimpleLayout layout = new SimpleLayout();
063 // FileAppender app = new FileAppender(layout, "log/el/log.txt", false);
064 // logger.removeAllAppenders();
065 // logger.addAppender(app);
066
067 // create a directory for log files
068 Date dt = new Date();
069 SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd_HH:mm:ss");
070 String statDir = "log/el/" + df.format(dt) + "/";
071 new File(statDir).mkdir();
072
073 // single ontology test
074 // String example = "/home/jl/promotion/ontologien/galen2.owl";
075 // for(int i=10; i<17; i++) {
076 // rand = new Random(1);
077 // testOntology(statDir, example, 100, i);
078 // }
079
080 // real world ontology tests //
081 String base = "/home/jl/promotion/ontologien/el_benchmark/";
082 String[] onts = new String[] {
083 "go",
084 // is ok at length 10
085 "galen2",
086 // is ok at length 8
087 "cton",
088 // is ok at length 8
089 "earthrealm2",
090 // is ok at length 8
091 "process",
092 // is ok at length 8
093 "tambis",
094 // ontology file seems broken "thesaurus",
095 // is ok at length 8
096 "transportation",
097 // takes too long to read in
098 "pto",
099 // ontology file seems broken "iso_19115",
100 // ontology file seems broken "nci",
101 // cannot even be read
102 "fma_owl_dl_component_1.4.0"
103 };
104
105 for(String ont : onts) {
106 String file = base + ont + ".owl";
107 rand = new Random(1);
108 testOntology(statDir, file, 100, 7);
109 }
110
111 // artificial ontology tests //
112 /*
113 // number of concepts and roles
114 int[] conceptCounts = new int[] { 5, 10, 50, 100 }; //, 500, 1000 };
115 int[] roleCounts = new int[] { 5, 10, 50, 100, 500, 1000};
116 String base = "/home/jl/downloads/uni-leipzig/OTAGen-v1/generated/generated_";
117
118 // loop through all artificial ontologies
119 for(int conceptCount : conceptCounts) {
120 for(int roleCount : roleCounts) {
121 String file = base + "c" + conceptCount + "_r" + roleCount + ".owl";
122 rand = new Random(1);
123 testOntology(statDir, file, 100, 7);
124 }
125 }
126
127 System.exit(0);
128 */
129 /*
130
131 // number of applications of operator
132 int opApplications = 10;
133
134 // statistics directory
135 String statDir = "/log/stat/el/";
136 String statFile = statDir + "stats.txt";
137 String gnuPlotApplicationTimeFile = statDir + "application.gp";
138 String gnuPlotRefinementTimeFile = statDir + "refinement.gp";
139 boolean writeOntologies = true;
140 String ontologyDir = "/log/stat/el/ontologies/";
141
142
143
144 for(int conceptCount : conceptCounts) {
145 for(int roleCount : roleCounts) {
146 // code for ontology creation
147 KB kb = new KB();
148
149 // create class hierarchy (concept 0 is owl:Thing)
150 for(int i=1; i<=conceptCount; i++) {
151 // create class
152 NamedClass nc = new NamedClass("a" + i);
153 // pick an existing class as super class
154 int j = (i == 0) ? 0 : rand.nextInt(i);
155 Description superClass;
156 if(j==0) {
157 superClass = Thing.instance;
158 } else {
159 superClass = new NamedClass("a" + j);
160 }
161 kb.addAxiom(new SubClassAxiom(nc, superClass));
162 // disjointness with siblings
163 }
164
165
166 // save ontology
167 File f = new File(ontologyDir + "c" + conceptCount + "r" + roleCount + ".owl");
168 kb.export(f, OntologyFormat.RDF_XML);
169
170
171 }
172 }
173 */
174 // ELDown2 operator = new ELDown2();
175 }
176
177 private static void testOntology(String statDir, String ont, int nrOfChains, int chainLength) throws ComponentInitException, IOException {
178 System.out.print("Reading in " + ont + " ... ");
179 ComponentManager cm = ComponentManager.getInstance();
180 // reading ontology into a reasoner
181 KnowledgeSource source = cm.knowledgeSource(OWLFile.class);
182 File ontFile = new File(ont);
183 cm.applyConfigEntry(source, "url", ontFile.toURI().toURL());
184 source.init();
185 ReasonerComponent reasoner = cm.reasoner(OWLAPIReasoner.class, source);
186 reasoner.init();
187 System.out.println("done.");
188 System.out.println();
189
190 // log file name
191 String name = ontFile.getName();
192 String statFileName = name.substring(0, name.lastIndexOf(".")) + "_" + chainLength + ".txt";
193 File statFile = new File(statDir + statFileName);
194
195 String statString = "";
196 int refinementMaxSizeOverall = 0;
197 MonitorFactory.reset();
198 for(int loop = 0; loop < nrOfChains; loop++) {
199
200 // application of operator and statistics recording
201 ELDescriptionTree currTree = new ELDescriptionTree(reasoner, Thing.instance);
202 ELDown2 operator = new ELDown2(reasoner);
203 Stat runtime = new Stat();
204 Stat runtimePerRefinement = new Stat();
205
206 System.out.println("Testing operator (applying it " + chainLength + " times):");
207 for(int i=0; i < chainLength; i++) {
208 // System.out.println(currTree.transformToDescription().toKBSyntaxString());
209 System.out.print("current concept: " + currTree.transformToDescription().toString(reasoner.getBaseURI(), reasoner.getPrefixes()));
210 // apply operator on current description
211 long start = System.nanoTime();
212 List<ELDescriptionTree> refinements = operator.refine(currTree);
213 long time = System.nanoTime() - start;
214 runtime.addNumber(time/1000000d);
215 runtimePerRefinement.addNumber(time/1000000d/refinements.size());
216 MonitorFactory.add("operator application time", "ms.", time/1000000d);
217 MonitorFactory.add("operator application time per refinement", "ms.", time/1000000d/refinements.size());
218 MonitorFactory.add("refinement count", "count", refinements.size());
219
220 int sizeSum = 0;
221 for(ELDescriptionTree tree : refinements) {
222 // System.out.println(" " + tree.toDescriptionString());
223 int size = tree.getSize();
224 sizeSum += size;
225 refinementMaxSizeOverall = Math.max(size, refinementMaxSizeOverall);
226 }
227
228 MonitorFactory.add("refinement size", "count", sizeSum/(double)refinements.size());
229 MonitorFactory.add("refinement size increase", "count", (sizeSum-refinements.size()*currTree.getSize())/(double)refinements.size());
230
231 System.out.println(" [has " + refinements.size() + " refinements]");
232
233 // pick a refinement randomly - this has the disadvantage that we have huge
234 // variations over different runs
235 int index = rand.nextInt(refinements.size());
236 currTree = refinements.get(index);
237
238 // we pick a/the median of the refinements as next refinement
239 // ELDescriptionTreeComparator treeComp = new ELDescriptionTreeComparator();
240 // TreeSet<ELDescriptionTree> refinementsSet = new TreeSet<ELDescriptionTree>(treeComp);
241 // refinementsSet.addAll(refinements);
242 // List<ELDescriptionTree> refinementList = new LinkedList<ELDescriptionTree>(refinements);
243 // sort by size (first criterion of comparator)
244 // Collections.sort(refinementList, treeComp);
245 // currTree = refinementList.get((int)(refinementList.size()*0.5));
246 // System.out.println(rand.nextGaussian());
247 // currTree = refinementList.get((int)(refinementList.size()*rand.nextGaussian()));
248
249 MonitorFactory.add("picked refinement size", "count", currTree.getSize());
250 }
251 System.out.println("operator time: " + runtime.prettyPrint("ms"));
252 System.out.println("operator time per refinement: " + runtimePerRefinement.prettyPrint("ms"));
253 System.out.println();
254
255 }
256
257 statString += "file: " + name + "\n";
258 statString += "nr of refinement chains: " + nrOfChains + "\n";
259 statString += "refinement chain length: " + chainLength + "\n\n";
260
261 statString += getMonitorData(MonitorFactory.getMonitor("operator application time", "ms."));
262 statString += getMonitorData(MonitorFactory.getMonitor("operator application time per refinement", "ms."));
263 statString += "\n";
264
265 statString += getMonitorDataCount(MonitorFactory.getMonitor("refinement count", "count"));
266 statString += getMonitorDataCount(MonitorFactory.getMonitor("refinement size", "count"));
267 statString += "refinement max size overall: " + refinementMaxSizeOverall + "\n";
268 statString += getMonitorDataCount(MonitorFactory.getMonitor("picked refinement size", "count"));
269 statString += getMonitorDataCount(MonitorFactory.getMonitor("refinement size increase", "count"));
270 statString += "\n";
271
272 // statString += getMonitorData(MonitorFactory.getMonitor("extend label", "ms."));
273 // statString += getMonitorData(MonitorFactory.getMonitor("refine label", "ms."));
274 // statString += getMonitorData(MonitorFactory.getMonitor("refine edge", "ms."));
275 // statString += getMonitorData(MonitorFactory.getMonitor("attach tree", "ms."));
276 // statString += getMonitorData(MonitorFactory.getMonitor("as.merge trees", "ms."));
277 // statString += getMonitorData(MonitorFactory.getMonitor("as.complex check", "ms."));
278 // statString += getMonitorData(MonitorFactory.getMonitor("as.tmp", "ms."));
279 // statString += getMonitorData(MonitorFactory.getMonitor("el.tmp", "ms."));
280 // statString += getMonitorDataBoolean(MonitorFactory.getMonitor("as.minimal", "boolean"));
281 // statString += getMonitorDataBoolean(MonitorFactory.getMonitor("as.check", "boolean"));
282 // statString += getMonitorData(MonitorFactory.getMonitor("tree clone", "ms."));
283 // statString += getMonitorData(MonitorFactory.getMonitor("simulation update", "ms."));
284 statString += getMonitorData(MonitorFactory.getMonitor("disjointness reasoning", "ms."));
285 double reasoningPercentage = 100 * MonitorFactory.getMonitor("disjointness reasoning", "ms.").getTotal()/MonitorFactory.getMonitor("operator application time", "ms.").getTotal();
286 statString += "disjointness reasoning percentage: " + df.format(reasoningPercentage) + "%\n";
287
288 Files.createFile(statFile, statString);
289
290 reasoner.releaseKB();
291 cm.freeAllComponents();
292 }
293
294 private static String getMonitorData(Monitor mon) {
295 return mon.getLabel() + ": av " + df.format(mon.getAvg()) + "ms (stddev " + df.format(mon.getStdDev()) + "ms, min " + df.format(mon.getMin()) + "ms, max " + df.format(mon.getMax()) + "ms, " + df.format(mon.getTotal()/1000) + "s total, " + (int)mon.getHits() + " hits)\n";
296 }
297
298 private static String getMonitorDataCount(Monitor mon) {
299 return mon.getLabel() + ": av " + df.format(mon.getAvg()) + " (stddev " + df.format(mon.getStdDev()) + ", min " + df.format(mon.getMin()) + ", max " + df.format(mon.getMax()) + ", " + df.format(mon.getTotal()) + " total, " + (int)mon.getHits() + " hits)\n";
300 }
301
302 @SuppressWarnings("unused")
303 private static String getMonitorDataBoolean(Monitor mon) {
304 return mon.getLabel() + ": " + df.format(mon.getAvg()*100) + "%\n";
305 }
306 }