001 /**
002 * Copyright (C) 2007-2008, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program. If not, see <http://www.gnu.org/licenses/>.
018 *
019 */
020 package org.dllearner.examples;
021
022 import java.io.File;
023 import java.io.FileNotFoundException;
024 import java.io.IOException;
025 import java.net.URI;
026 import java.util.ArrayList;
027 import java.util.Arrays;
028 import java.util.HashMap;
029 import java.util.HashSet;
030 import java.util.LinkedList;
031 import java.util.List;
032 import java.util.Map;
033 import java.util.Set;
034 import java.util.TreeSet;
035
036 import org.dllearner.core.owl.BooleanDatatypePropertyAssertion;
037 import org.dllearner.core.owl.Description;
038 import org.dllearner.core.owl.DifferentIndividualsAxiom;
039 import org.dllearner.core.owl.DisjointClassesAxiom;
040 import org.dllearner.core.owl.NamedClass;
041 import org.dllearner.core.owl.Axiom;
042 import org.dllearner.core.owl.ClassAssertionAxiom;
043 import org.dllearner.core.owl.DatatypeProperty;
044 import org.dllearner.core.owl.DatatypePropertyAssertion;
045 import org.dllearner.core.owl.DoubleDatatypePropertyAssertion;
046 import org.dllearner.core.owl.Individual;
047 import org.dllearner.core.owl.KB;
048 import org.dllearner.core.owl.ObjectProperty;
049 import org.dllearner.core.owl.ObjectPropertyAssertion;
050 import org.dllearner.core.owl.SubClassAxiom;
051 import org.dllearner.parser.KBParser;
052 import org.dllearner.parser.ParseException;
053 import org.dllearner.parser.PrologParser;
054 import org.dllearner.prolog.Atom;
055 import org.dllearner.prolog.Clause;
056 import org.dllearner.prolog.Program;
057 import org.dllearner.reasoning.OWLAPIReasoner;
058 import org.dllearner.utilities.Files;
059 import org.dllearner.utilities.Helper;
060
061 /**
062 * This class maps the carcinogenesis Prolog files to an OWL file. In a first
063 * step, a Prolog parser is used to read all files. The main step involves
064 * applying mapping Prolog clauses to OWL axioms through domain specific mapping
065 * rules.
066 *
067 * The carcinogenesis Prolog files are available here:
068 * http://web.comlab.ox.ac.uk/oucl/research/areas/machlearn/cancer.html
069 *
070 * .f files contain positive and .n files contain negative examples. pte1.n and
071 * pte.f contain the PTE-1 challenge examples. train.n and train.f contain other
072 * examples which can be used to train for PTE-1.
073 *
074 * The PTE-2 directory contains PTE-2 files, i.e. all substances referred to in
075 * those files are only those of the PTE-2 challenge.
076 *
077 * @author Jens Lehmann
078 *
079 */
080 public class Carcinogenesis {
081
082 private static URI ontologyURI = URI.create("http://dl-learner.org/carcinogenesis");
083
084 // directory of Prolog files
085 private static final String prologDirectory = "examples/carcinogenesis/prolog/";
086
087 // mapping of symbols to names of chemical elements
088 private static Map<String, String> chemElements;
089
090 // structures in newgroups.pl
091 private static Set<String> newGroups = new TreeSet<String>();
092
093 // types of atoms, bonds, and structures
094 private static Set<String> atomTypes = new TreeSet<String>();
095 private static Set<String> bondTypes = new TreeSet<String>();
096 private static Set<String> structureTypes = new TreeSet<String>();
097
098 // we need a counter for bonds, because they are instances in OWL
099 // but not in Prolog
100 private static int bondNr = 0;
101 private static int structureNr = 0;
102
103 // list of all individuals in the knowlege base
104 // private static Set<String> individuals = new TreeSet<String>();
105 // list of all compounds
106 private static Set<String> compounds = new TreeSet<String>();
107 // compounds with positive ames test
108 private static Set<String> compoundsAmes = new TreeSet<String>();
109 // list of all bonds
110 private static Set<String> bonds = new TreeSet<String>();
111
112 // list of all "hasProperty" test
113 private static Set<String> tests = new TreeSet<String>();
114
115 // we ignore the ames test since its distribution in PTE-2 is so
116 // different from the training substances that a different testing
117 // strategy was probably in use
118 private static boolean ignoreAmes = false;
119 private static boolean ignoreSalmonella = false;;
120 private static boolean ignoreCytogenCa = false;
121 private static boolean includeMutagenesis = true;
122 // if true we learn carcinogenic, if false we learn non-carcinogenic
123 private static boolean learnCarcinogenic = true;
124 private static boolean useNewGroups = true;
125
126 private static boolean createPTE1Conf = false;
127 private static boolean createPTE2Conf = false;
128
129 /**
130 * @param args
131 * No arguments supported.
132 * @throws IOException
133 * @throws FileNotFoundException
134 * @throws ParseException
135 */
136 public static void main(String[] args) throws FileNotFoundException, IOException,
137 ParseException {
138
139 String[] files = new String[] { "newgroups.pl", "ames.pl", "atoms.pl", "bonds.pl", "gentoxprops.pl",
140 "ind_nos.pl", "ind_pos.pl"};
141 // "pte2/canc_nos.pl", "pte2/pte2ames.pl", "pte2/pte2atoms.pl",
142 // "pte2/pte2bonds.pl", "pte2/pte2gentox.pl", "pte2/pte2ind_nos.pl", "pte2/pte2newgroups.pl"
143 // "train.b" => not a pure Prolog file but Progol/Aleph specific
144 // };
145 File owlFile = new File("examples/carcinogenesis/carcinogenesis.owl");
146
147 Program program = null;
148 long startTime, duration;
149 String time;
150
151 // reading files
152 System.out.print("Reading in carcinogenesis Prolog files ... ");
153 startTime = System.nanoTime();
154 String content = "";
155 for (String file : files) {
156 content += Files.readFile(new File(prologDirectory + file));
157 }
158 duration = System.nanoTime() - startTime;
159 time = Helper.prettyPrintNanoSeconds(duration, false, false);
160 System.out.println("OK (" + time + ").");
161
162 // parsing files
163 System.out.print("Parsing Prolog files ... ");
164 startTime = System.nanoTime();
165 PrologParser pp = new PrologParser();
166 program = pp.parseProgram(content);
167 duration = System.nanoTime() - startTime;
168 time = Helper.prettyPrintNanoSeconds(duration, false, false);
169 System.out.println("OK (" + time + ").");
170
171 // prepare mapping
172 KB kb = new KB();
173 createChemElementsMapping();
174 createNewGroups();
175 // create subclasses of atom
176 NamedClass atomClass = getAtomicConcept("Atom");
177 for (String element : chemElements.values()) {
178 NamedClass elClass = getAtomicConcept(element);
179 SubClassAxiom sc = new SubClassAxiom(elClass, atomClass);
180 kb.addAxiom(sc);
181 }
182 // define properties including domain and range
183 String kbString = "DPDOMAIN(" + getURI2("charge") + ") = " + getURI2("Atom") + ".\n";
184 kbString += "DPRANGE(" + getURI2("charge") + ") = DOUBLE.\n";
185 if(!ignoreAmes) {
186 kbString += "DPDOMAIN(" + getURI2("amesTestPositive") + ") = " + getURI2("Compound") + ".\n";
187 kbString += "DPRANGE(" + getURI2("amesTestPositive") + ") = BOOLEAN.\n";
188 }
189 if(includeMutagenesis) {
190 kbString += "DPDOMAIN(" + getURI2("isMutagenic") + ") = " + getURI2("Compound") + ".\n";
191 kbString += "DPRANGE(" + getURI2("isMutagenic") + ") = BOOLEAN.\n";
192 }
193 kbString += "OPDOMAIN(" + getURI2("hasAtom") + ") = " + getURI2("Compound") + ".\n";
194 kbString += "OPRANGE(" + getURI2("hasAtom") + ") = " + getURI2("Atom") + ".\n";
195 kbString += "OPDOMAIN(" + getURI2("hasBond") + ") = " + getURI2("Compound") + ".\n";
196 kbString += "OPRANGE(" + getURI2("hasBond") + ") = " + getURI2("Bond") + ".\n";
197 kbString += "OPDOMAIN(" + getURI2("inBond") + ") = " + getURI2("Bond") + ".\n";
198 kbString += "OPRANGE(" + getURI2("inBond") + ") = " + getURI2("Atom") + ".\n";
199 kbString += "OPDOMAIN(" + getURI2("hasStructure") + ") = " + getURI2("Compound") + ".\n";
200 kbString += "OPRANGE(" + getURI2("hasStructure") + ") = " + getURI2("Structure") + ".\n";
201 kbString += getURI2("Di") + " SUB " + getURI2("Structure") + ".\n";
202 kbString += getURI2("Halide") + " SUB " + getURI2("Structure") + ".\n";
203 kbString += getURI2("Ring") + " SUB " + getURI2("Structure") + ".\n";
204 KB kb2 = KBParser.parseKBFile(kbString);
205 kb.addKB(kb2);
206
207 // mapping clauses to axioms
208 System.out.print("Mapping clauses to axioms ... ");
209 startTime = System.nanoTime();
210 ArrayList<Clause> clauses = program.getClauses();
211 for (Clause clause : clauses) {
212 List<Axiom> axioms = mapClause(clause);
213 for (Axiom axiom : axioms)
214 kb.addAxiom(axiom);
215 }
216
217 if(includeMutagenesis)
218 addMutagenesis(kb);
219
220 // special handling for ames test (we assume the ames test
221 // was performed on all compounds but only the positive ones
222 // are in ames.pl [the rest is negative in Prolog by CWA], so
223 // we add negative test results here)
224 for(String compound : compounds) {
225 if(!ignoreAmes && !compoundsAmes.contains(compound)) {
226 BooleanDatatypePropertyAssertion ames = getBooleanDatatypePropertyAssertion(compound, "amesTestPositive", false);
227 kb.addAxiom(ames);
228 }
229 }
230
231 // disjoint classes axioms
232 // OWL API is also buggy here, it adds a strange unused prefix
233 // and cannot parser its own generated file
234 // DisjointClassesAxiom disjointAtomTypes = getDisjointClassesAxiom(atomTypes);
235 // kb.addAxiom(disjointAtomTypes);
236 String[] mainClasses = new String[] {"Compound", "Atom", "Bond", "Structure"};
237 Set<String> mainClassesSet = new HashSet<String>(Arrays.asList(mainClasses));
238 DisjointClassesAxiom disjointAtomTypes = getDisjointClassesAxiom(mainClassesSet);
239 kb.addAxiom(disjointAtomTypes);
240
241 // all different axiom (UNA)
242 // exporting differentIndividuals axioms is broken in OWL API
243 // individuals.addAll(compounds);
244 // individuals.addAll(bonds);
245 // DifferentIndividualsAxiom una = getDifferentIndividualsAxiom(individuals);
246 // kb.addAxiom(una);
247
248 duration = System.nanoTime() - startTime;
249 time = Helper.prettyPrintNanoSeconds(duration, false, false);
250 System.out.println("OK (" + time + ").");
251
252 // writing generated knowledge base
253 System.out.print("Writing OWL file ... ");
254 startTime = System.nanoTime();
255 OWLAPIReasoner.exportKBToOWL(owlFile, kb, ontologyURI);
256 duration = System.nanoTime() - startTime;
257 time = Helper.prettyPrintNanoSeconds(duration, false, false);
258 System.out.println("OK (" + time + ").");
259
260 // generating conf files
261 File confTrainFile = new File("examples/carcinogenesis/train.conf");
262 Files.clearFile(confTrainFile);
263 String confHeader = "import(\"carcinogenesis.owl\");\n\n";
264 confHeader += "reasoner = fastInstanceChecker;\n";
265 confHeader += "algorithm = refexamples;\n";
266 confHeader += "refexamples.noisePercentage = 31;\n";
267 confHeader += "refexamples.startClass = " + getURI2("Compound") + ";\n";
268 confHeader += "refexamples.writeSearchTree = false;\n";
269 confHeader += "refexamples.searchTreeFile = \"log/carcinogenesis/searchTree.log\";\n";
270 confHeader += "\n";
271 Files.appendFile(confTrainFile, confHeader);
272
273 // generating training examples
274 File trainingFilePositives = new File(prologDirectory + "train.f");
275 File trainingFileNegatives = new File(prologDirectory + "train.n");
276
277 List<Individual> posTrainExamples = getExamples(trainingFilePositives);
278 List<Individual> negTrainExamples = getExamples(trainingFileNegatives);
279 appendPosExamples(confTrainFile, posTrainExamples);
280 appendNegExamples(confTrainFile, negTrainExamples);
281
282 // generating test examples for PTE-1
283 // => put all in one file, because they were used as training for PTE-2
284 File confPTE1File = new File("examples/carcinogenesis/testpte1.conf");
285 File testPTE1Positives = new File(prologDirectory + "pte1.f");
286 File testPTE1Negatives = new File(prologDirectory + "pte1.n");
287
288 List<Individual> posPTE1Examples = getExamples(testPTE1Positives);
289 List<Individual> negPTE1Examples = getExamples(testPTE1Negatives);
290 appendPosExamples(confTrainFile, posPTE1Examples);
291 appendNegExamples(confTrainFile, negPTE1Examples);
292 if(createPTE1Conf) {
293 Files.clearFile(confPTE1File);
294 Files.appendFile(confPTE1File, "import(\"pte.owl\");\nreasoner=fastInstanceChecker;\n\n");
295 appendPosExamples(confPTE1File, posPTE1Examples);
296 appendNegExamples(confPTE1File, negPTE1Examples);
297 }
298
299 // create a PTE-2 test file
300 if(createPTE2Conf) {
301 File confPTE2File = new File("examples/carcinogenesis/testpte2.conf");
302 Files.clearFile(confPTE2File);
303 Files.appendFile(confPTE2File, "import(\"pte.owl\");\nreasoner=fastInstanceChecker;\n\n");
304 Files.appendFile(confPTE2File, getPTE2Examples());
305 }
306
307 }
308
309 private static List<Axiom> mapClause(Clause clause) throws IOException, ParseException {
310 List<Axiom> axioms = new LinkedList<Axiom>();
311 Atom head = clause.getHead();
312 String headName = head.getName();
313 // Body body = clause.getBody();
314 // ArrayList<Literal> literals = body.getLiterals();
315 // handle: atm(compound,atom,element,atomtype,charge)
316
317 // Ames-Test: http://en.wikipedia.org/wiki/Ames_test
318 // problem: the file apparently mentions only positive
319 // tests (why is it different from the other tests e.g. in
320 // gentoxprops.pl?) => we need to add negative axioms for the
321 // remaining stuff or use closed world assumption in the
322 // TBox dematerialisation later on
323 if(headName.equals("ames")) {
324 if(!ignoreAmes) {
325 String compoundName = head.getArgument(0).toPLString();
326 BooleanDatatypePropertyAssertion ames = getBooleanDatatypePropertyAssertion(compoundName, "amesTestPositive", true);
327 axioms.add(ames);
328 compoundsAmes.add(compoundName);
329 }
330 } else if (headName.equals("atm")) {
331 String compoundName = head.getArgument(0).toPLString();
332 String atomName = head.getArgument(1).toPLString();
333 String elementName = head.getArgument(2).toPLString();
334 String type = head.getArgument(3).toPLString();
335 double charge = Double.parseDouble(head.getArgument(4).toPLString());
336 // make the compound an instance of the Compound class
337 ClassAssertionAxiom cmpAxiom = getConceptAssertion("Compound", compoundName);
338 axioms.add(cmpAxiom);
339 compounds.add(compoundName);
340 // relate compound and atom
341 ObjectPropertyAssertion ra = getRoleAssertion("hasAtom", compoundName, atomName);
342 axioms.add(ra);
343 // atom is made instance of the correct class
344 String atomClass = getAtomClass(elementName, type);
345 ClassAssertionAxiom ca = getConceptAssertion(atomClass, atomName);
346 axioms.add(ca);
347 // write subclass axiom if doesn't exist already
348 if (!atomTypes.contains(atomClass)) {
349 NamedClass subClass = getAtomicConcept(atomClass);
350 NamedClass superClass = getAtomicConcept(getFullElementName(elementName));
351 SubClassAxiom sc = new SubClassAxiom(subClass, superClass);
352 axioms.add(sc);
353 atomTypes.add(atomClass);
354 }
355 // charge of atom
356 DatatypePropertyAssertion dpa = getDoubleDatatypePropertyAssertion(atomName, "charge",
357 charge);
358 axioms.add(dpa);
359 } else if (headName.equals("bond")) {
360 String compoundName = head.getArgument(0).toPLString();
361 String atom1Name = head.getArgument(1).toPLString();
362 String atom2Name = head.getArgument(2).toPLString();
363 String bondType = head.getArgument(3).toPLString();
364 String bondClass = "Bond-" + bondType;
365 String bondInstance = "bond" + bondNr;
366 bonds.add(bondInstance);
367 ObjectPropertyAssertion op = getRoleAssertion("hasBond", compoundName, "bond" + bondNr);
368 axioms.add(op);
369 // make Bond-X subclass of Bond if that hasn't been done already
370 if (!bondTypes.contains(bondClass)) {
371 NamedClass subClass = getAtomicConcept(bondClass);
372 SubClassAxiom sc = new SubClassAxiom(subClass, getAtomicConcept("Bond"));
373 axioms.add(sc);
374 bondTypes.add(bondClass);
375 }
376 // make e.g. bond382 instance of Bond-3
377 ClassAssertionAxiom ca = getConceptAssertion(bondClass, bondInstance);
378 axioms.add(ca);
379 bondNr++;
380 // connect atoms with bond
381 ObjectPropertyAssertion op1 = getRoleAssertion("inBond", bondInstance, atom1Name);
382 ObjectPropertyAssertion op2 = getRoleAssertion("inBond", bondInstance, atom2Name);
383 axioms.add(op1);
384 axioms.add(op2);
385 } else if (headName.equals("has_property")) {
386 String compoundName = head.getArgument(0).toPLString();
387 String testName = head.getArgument(1).toPLString();
388 if(!(ignoreSalmonella && testName.equals("salmonella"))
389 && !(ignoreCytogenCa && testName.equals("cytogen_ca"))) {
390 String resultStr = head.getArgument(2).toPLString();
391 boolean testResult = (resultStr.equals("p")) ? true : false;
392
393 // create a new datatype property if it does not exist already
394 if(!tests.contains(testName)) {
395 String axiom1 = "DPDOMAIN(" + getURI2(testName) + ") = " + getURI2("Compound") + ".\n";
396 String axiom2 = "DPRANGE(" + getURI2(testName) + ") = BOOLEAN.\n";
397 KB kb = KBParser.parseKBFile(axiom1 + axiom2);
398 axioms.addAll(kb.getAxioms());
399 }
400 // create an axiom with the test result
401 DatatypePropertyAssertion dpa = getBooleanDatatypePropertyAssertion(compoundName, testName,
402 testResult);
403 axioms.add(dpa);
404 }
405 // either parse this or ashby_alert - not both - ashby_alert contains
406 // all information in ind already
407 } else if (headName.equals("ind") || headName.equals("ring_no")) {
408 // parse this only if the new groups are not parsed
409 // if(!useNewGroups) {
410 String compoundName = head.getArgument(0).toPLString();
411 String structureName = head.getArgument(1).toPLString();
412 int count = Integer.parseInt(head.getArgument(2).toPLString());
413 // upper case first letter
414 String structureClass = structureName.substring(0,1).toUpperCase() + structureName.substring(1);;
415 String structureInstance = structureName + "-" + structureNr;
416
417 addStructureSubclass(axioms, structureClass);
418
419 for(int i=0; i<count; i++) {
420 ObjectPropertyAssertion op = getRoleAssertion("hasStructure", compoundName, structureInstance);
421 axioms.add(op);
422 // make e.g. halide10-382 instance of Bond-3
423 ClassAssertionAxiom ca = getConceptAssertion(structureClass, structureInstance);
424 axioms.add(ca);
425 structureNr++;
426 }
427 // }
428 } else if (headName.equals("ashby_alert")) {
429 // ... currently ignored ...
430 } else if (newGroups.contains(headName)) {
431 if(useNewGroups) {
432 String compoundName = head.getArgument(0).toPLString();
433 String structureName = headName;
434 // upper case first letter
435 String structureClass = structureName.substring(0,1).toUpperCase() + structureName.substring(1);;
436 String structureInstance = structureName + "-" + structureNr;
437
438 addStructureSubclass(axioms, structureClass);
439
440 ObjectPropertyAssertion op = getRoleAssertion("hasStructure", compoundName, structureInstance);
441 axioms.add(op);
442 ClassAssertionAxiom ca = getConceptAssertion(structureClass, structureInstance);
443 axioms.add(ca);
444 structureNr++;
445 }
446 } else {
447 // print clauses which are not supported yet
448 System.out.println("unsupported clause");
449 System.out.println(clause.toPLString());
450 System.out.println(clause);
451 System.exit(0);
452 }
453 return axioms;
454 }
455
456 private static void addStructureSubclass(List<Axiom> axioms, String structureClass) {
457 // build in more fine-grained subclasses e.g. Di+number is subclass of Di
458 if (!structureTypes.contains(structureClass)) {
459 NamedClass nc = getAtomicConcept("Structure");
460 if(structureClass.contains("Di"))
461 nc = getAtomicConcept("Di");
462 else if(structureClass.contains("ring") || structureClass.contains("Ring"))
463 nc = getAtomicConcept("Ring");
464 else if(structureClass.contains("halide") || structureClass.contains("Halide"))
465 nc = getAtomicConcept("Halide");
466 NamedClass subClass = getAtomicConcept(structureClass);
467 SubClassAxiom sc = new SubClassAxiom(subClass, nc);
468 axioms.add(sc);
469 structureTypes.add(structureClass);
470 }
471 }
472
473 // takes a *.f or *.n file as input and returns the
474 // contained examples
475 private static List<Individual> getExamples(File file) throws FileNotFoundException, IOException, ParseException {
476 String content = Files.readFile(file);
477 PrologParser pp = new PrologParser();
478 Program programPos = pp.parseProgram(content);
479 List<Individual> ret = new LinkedList<Individual>();
480 for(Clause c : programPos.getClauses()) {
481 String example = c.getHead().getArgument(0).toPLString();
482 ret.add(getIndividual(example));
483 }
484 return ret;
485 }
486
487 private static void appendPosExamples(File file, List<Individual> examples) {
488 StringBuffer content = new StringBuffer();
489 for(Individual example : examples) {
490 if(learnCarcinogenic)
491 content.append("+\""+example.toString()+"\"\n");
492 else
493 content.append("-\""+example.toString()+"\"\n");
494 }
495 Files.appendFile(file, content.toString());
496 }
497
498 private static void appendNegExamples(File file, List<Individual> examples) {
499 StringBuffer content = new StringBuffer();
500 for(Individual example : examples) {
501 if(learnCarcinogenic)
502 content.append("-\""+example.toString()+"\"\n");
503 else
504 content.append("+\""+example.toString()+"\"\n");
505 }
506 Files.appendFile(file, content.toString());
507 }
508
509 private static String getAtomClass(String element, String atomType) {
510 return getFullElementName(element) + "-" + atomType;
511 }
512
513 private static ClassAssertionAxiom getConceptAssertion(String concept, String i) {
514 Individual ind = getIndividual(i);
515 NamedClass c = getAtomicConcept(concept);
516 return new ClassAssertionAxiom(c, ind);
517 }
518
519 private static ObjectPropertyAssertion getRoleAssertion(String role, String i1, String i2) {
520 Individual ind1 = getIndividual(i1);
521 Individual ind2 = getIndividual(i2);
522 ObjectProperty ar = getRole(role);
523 return new ObjectPropertyAssertion(ar, ind1, ind2);
524 }
525
526 private static BooleanDatatypePropertyAssertion getBooleanDatatypePropertyAssertion(
527 String individual, String datatypeProperty, boolean value) {
528 Individual ind = getIndividual(individual);
529 DatatypeProperty dp = getDatatypeProperty(datatypeProperty);
530 return new BooleanDatatypePropertyAssertion(dp, ind, value);
531 }
532
533 private static DoubleDatatypePropertyAssertion getDoubleDatatypePropertyAssertion(
534 String individual, String datatypeProperty, double value) {
535 Individual ind = getIndividual(individual);
536 DatatypeProperty dp = getDatatypeProperty(datatypeProperty);
537 return new DoubleDatatypePropertyAssertion(dp, ind, value);
538 }
539
540 @SuppressWarnings({"unused"})
541 private static DisjointClassesAxiom getDisjointClassesAxiom(Set<String> classes) {
542 Set<Description> descriptions = new HashSet<Description>();
543 for(String namedClass : classes)
544 descriptions.add(new NamedClass(getURI(namedClass)));
545 return new DisjointClassesAxiom(descriptions);
546 }
547
548 @SuppressWarnings({"unused"})
549 private static DifferentIndividualsAxiom getDifferentIndividualsAxiom(Set<String> individuals) {
550 Set<Individual> inds = new HashSet<Individual>();
551 for(String i : individuals)
552 inds.add(new Individual(i));
553 return new DifferentIndividualsAxiom(inds);
554 }
555
556 private static Individual getIndividual(String name) {
557 return new Individual(ontologyURI + "#" + name);
558 }
559
560 private static ObjectProperty getRole(String name) {
561 return new ObjectProperty(ontologyURI + "#" + name);
562 }
563
564 private static DatatypeProperty getDatatypeProperty(String name) {
565 return new DatatypeProperty(ontologyURI + "#" + name);
566 }
567
568 private static NamedClass getAtomicConcept(String name) {
569 return new NamedClass(ontologyURI + "#" + name);
570 }
571
572 private static String getURI(String name) {
573 return ontologyURI + "#" + name;
574 }
575
576 // returns URI including quotationsmark (need for KBparser)
577 private static String getURI2(String name) {
578 return "\"" + getURI(name) + "\"";
579 }
580
581 private static String getFullElementName(String abbreviation) {
582 // return corresponding element or throw an error if it
583 // is not in the list
584 String result = chemElements.get(abbreviation);
585 if (result == null)
586 throw new Error("Unknown element " + abbreviation);
587 else
588 return result;
589 }
590
591 // create chemical element list
592 private static void createChemElementsMapping() {
593 chemElements = new HashMap<String, String>();
594 chemElements.put("as", "Arsenic");
595 chemElements.put("ba", "Barium");
596 chemElements.put("br", "Bromine");
597 chemElements.put("c", "Carbon");
598 chemElements.put("ca", "Calcium");
599 chemElements.put("cl", "Chlorine");
600 chemElements.put("cu", "Copper");
601 chemElements.put("f", "Fluorine");
602 chemElements.put("ga", "Gallium");
603 chemElements.put("h", "Hydrogen");
604 chemElements.put("hg", "Mercury");
605 chemElements.put("i", "Iodine");
606 chemElements.put("k", "Krypton");
607 chemElements.put("mn", "Manganese");
608 chemElements.put("mo", "Molybdenum");
609 chemElements.put("n", "Nitrogen");
610 chemElements.put("na", "Sodium");
611 chemElements.put("o", "Oxygen");
612 chemElements.put("p", "Phosphorus");
613 chemElements.put("pb", "Lead");
614 chemElements.put("s", "Sulfur");
615 chemElements.put("se", "Selenium");
616 chemElements.put("sn", "Tin");
617 chemElements.put("te", "Tellurium");
618 chemElements.put("ti", "Titanium");
619 chemElements.put("v", "Vanadium");
620 chemElements.put("zn", "Zinc");
621 }
622
623 private static void createNewGroups() {
624 String[] groups = new String[] {"six_ring", "non_ar_6c_ring",
625 "ketone", "amine", "alcohol", "ether", "ar_halide",
626 "five_ring", "non_ar_5c_ring", "alkyl_halide",
627 "methyl", "non_ar_hetero_5_ring", "nitro", "sulfo",
628 "methoxy", "amine", "aldehyde", "sulfide",
629 "non_ar_hetero_6_ring", "phenol", "carboxylic_acid",
630 "ester", "imine",
631 };
632
633 List<String> list = Arrays.asList(groups);
634 newGroups.addAll(list);
635 }
636
637 /**
638 * <p>To find out whether a substance is carinogenetic go to
639 * "http://ntp-server.niehs.nih.gov/" and click
640 * on "Testing Status of Agents at NTP".</p>
641 *
642 * Levels:
643 * <ul>
644 * <li>CE = clear evidence</li>
645 * <li>SE = some evidence</li>
646 * <li>E = equivocal evidence</li>
647 * <li>NE = no evidence</li>
648 * </ul>
649 * Levels CE and SE are positive examples. E and NE negative examples.
650 * Experiments are performed on rats and mice of both genders, so we
651 * have four evidence values. An example is positive if at least one
652 * value is SE or CE.
653 *
654 * <p>Some values are taken from the IJCAI-97 paper of Muggleton.</p>
655 *
656 * <p>Positives (19): <br />
657 * <ul>
658 * <li>t3 (SE+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCACAFD4-123F-7908-7B521E4F665EFBD9</li>
659 * <li>t4 (3CE+NE) - contradicts IJCAI-97 paper and should probably be case 75-52-5 instead of 75-52-8: http://ntp.niehs.nih.gov/index.cfm?objectid=BCE49084-123F-7908-7BE127F7AF1FFBB5</li>
660 * <li>t5: paper</li>
661 * <li>t7: paper</li>
662 * <li>t8: paper</li>
663 * <li>t9 (3CE+SE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD7C6869-123F-7908-7BDEA4CFAA55CEA8</li>
664 * <li>t10: paper</li>
665 * <li>t12 (2SE+E+NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCB0ADE0-123F-7908-7BEC101C7309C4DE</li>
666 * <li>t14 (2CE+2NE) probably 111-42-2 instead of 11-42-2: http://ntp.niehs.nih.gov/index.cfm?objectid=BCC60FF1-123F-7908-7B2D579AA48DE90C</li>
667 * <li>t15: paper</li>
668 * <li>t16 (2CE+SE+E): http://ntp.niehs.nih.gov/index.cfm?objectid=BCC5D9CE-123F-7908-7B959CCE5262468A</li>
669 * <li>t18 (2SE+E+NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCA087AA-123F-7908-7B79FDFDE3CDCF87</li>
670 * <li>t19 (2CE+E+NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCAE5690-123F-7908-7B02E35E2BB57694</li>
671 * <li>t20 (2SE+E+NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCF95607-123F-7908-7B0761D3C515CC12</li>
672 * <li>t21 (CE+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCFCB63C-123F-7908-7BF910C2783AE9FE</li>
673 * <li>t22 (SE+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD8345C2-123F-7908-7BC52FEF80F110E1</li>
674 * <li>t23 (4CE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCADD2D9-123F-7908-7B5C8180FE80B22F</li>
675 * <li>t24 (CE+E): http://ntp.niehs.nih.gov/index.cfm?objectid=BCFB19FF-123F-7908-7B845E176F13E6E1</li>
676 * <li>t25 (3CE+SE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD2D2A62-123F-7908-7B0DA824E782754C</li>
677 * <li>t30 (2CE+SE+E) : http://ntp.niehs.nih.gov/index.cfm?objectid=BCB13734-123F-7908-7BEBA533E35A48B7</li>
678 * </ul>
679 * </p>
680 *
681 * <p>Negatives (10):
682 * <ul>
683 * <li>t1 (4NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD9FF53C-123F-7908-7B123DAE0A25B122 </li>
684 * <li>t2 (4NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCF8651E-123F-7908-7B21DD5ED83CD0FF </li>
685 * <li><strike>t4: paper</strike></li>
686 * <li>t6: paper</li>
687 * <li>t11: paper</li>
688 * <li>t13 (4NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD136ED6-123F-7908-7B619EE79F2FD062</li>
689 * <li>t17: paper</li>
690 * <li>t26 (2E+2NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD1E6209-123F-7908-7B95EB8BAE662CE7</li>
691 * <li>t27 (E+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCAC5D00-123F-7908-7BC46ECB72A6C91B</li>
692 * <li>t28 (E+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD34E02A-123F-7908-7BC6791917B591DF</li>
693 * </ul>
694 * </p>
695 *
696 * <p>Unclear (1):
697 * <ul>
698 * <li>t29: probably a negative (see http://ntp.niehs.nih.gov/index.cfm?objectid=BD855EA1-123F-7908-7B573FC3C08188DC) but
699 * no tests directly for this substance</li>
700 * </ul>
701 *
702 * <p>The following examples are probably not part of the IJCAI PTE-2 challenge
703 * (reports younger than 1998):
704 * <ul>
705 * <li>pos: t21 (5/99), t25 (9/04), t30(10/01)</li>
706 * <li>neg: t26 (5/99), t27 (05/01), t28 (05/00), t29 (09/02)</li>
707 * </ul>
708 * </p>
709 * </p>
710 * @return A string for all examples as used in the conf file.
711 */
712 public static String getPTE2Examples() {
713 String[] pos = new String[] {"t3","t4","t5","t7","t8",
714 "t9",
715 "t10","t12",
716 "t14","t15","t16","t18","t19","t20",
717 "t21",
718 "t22",
719 "t23",
720 "t24",
721 "t25",
722 "t30"};
723 String[] neg = new String[] {"t1", "t2",
724 "t6", "t11", "t13",
725 "t17","t26","t27",
726 "t28","t29"
727 };
728
729 String ret = "";
730 for(String posEx : pos) {
731 if(learnCarcinogenic)
732 ret += "+" + getURI2(posEx) + "\n";
733 else
734 ret += "-" + getURI2(posEx) + "\n";
735 }
736 for(String negEx : neg) {
737 if(learnCarcinogenic)
738 ret += "-" + getURI2(negEx) + "\n";
739 else
740 ret += "+" + getURI2(negEx) + "\n";
741 }
742
743 return ret;
744 }
745
746 private static void addMutagenesis(KB kb) {
747 String[] mutagenicCompounds = new String[] {
748 "d101", "d104", "d106", "d107", "d112", "d113", "d117",
749 "d121", "d123", "d126", "d128", "d13", "d135", "d137",
750 "d139", "d140", "d143", "d144", "d145", "d146", "d147",
751 "d152", "d153", "d154", "d155", "d156", "d159", "d160",
752 "d161", "d163", "d164", "d166", "d168", "d171", "d173",
753 "d174", "d177", "d179", "d18", "d180", "d182", "d183",
754 "d185", "d186", "d187", "d188", "d189", "d19", "d191",
755 "d192", "d193", "d195", "d197", "d2", "d201", "d202",
756 "d205", "d206", "d207", "d211", "d214", "d215", "d216",
757 "d224", "d225", "d227", "d228", "d229", "d231", "d235",
758 "d237", "d239", "d242", "d245", "d246", "d249", "d251",
759 "d254", "d257", "d258", "d261", "d264", "d266", "d269",
760 "d27", "d270", "d271", "d28", "d288", "d292", "d297",
761 "d300", "d308", "d309", "d311", "d313", "d314", "d322",
762 "d323", "d324", "d329", "d330", "d332", "d334", "d35",
763 "d36", "d37", "d38", "d41", "d42", "d48", "d50", "d51",
764 "d54", "d58", "d61", "d62", "d63", "d66", "d69", "d72",
765 "d76", "d77", "d78", "d84", "d86", "d89", "d92", "d96"};
766 TreeSet<String> mutagenic = new TreeSet<String>(Arrays.asList(mutagenicCompounds));
767
768 for(String compound : compounds) {
769 if(mutagenic.contains(compound)) {
770 BooleanDatatypePropertyAssertion muta = getBooleanDatatypePropertyAssertion(compound, "isMutagenic", true);
771 kb.addAxiom(muta);
772 } else {
773 BooleanDatatypePropertyAssertion muta = getBooleanDatatypePropertyAssertion(compound, "isMutagenic", false);
774 kb.addAxiom(muta);
775 }
776 }
777 }
778 }