001    /**
002     * Copyright (C) 2007-2008, Jens Lehmann
003     *
004     * This file is part of DL-Learner.
005     * 
006     * DL-Learner is free software; you can redistribute it and/or modify
007     * it under the terms of the GNU General Public License as published by
008     * the Free Software Foundation; either version 3 of the License, or
009     * (at your option) any later version.
010     *
011     * DL-Learner is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014     * GNU General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018     *
019     */
020    package org.dllearner.examples;
021    
022    import java.io.File;
023    import java.io.FileNotFoundException;
024    import java.io.IOException;
025    import java.net.URI;
026    import java.util.ArrayList;
027    import java.util.Arrays;
028    import java.util.HashMap;
029    import java.util.HashSet;
030    import java.util.LinkedList;
031    import java.util.List;
032    import java.util.Map;
033    import java.util.Set;
034    import java.util.TreeSet;
035    
036    import org.dllearner.core.owl.BooleanDatatypePropertyAssertion;
037    import org.dllearner.core.owl.Description;
038    import org.dllearner.core.owl.DifferentIndividualsAxiom;
039    import org.dllearner.core.owl.DisjointClassesAxiom;
040    import org.dllearner.core.owl.NamedClass;
041    import org.dllearner.core.owl.Axiom;
042    import org.dllearner.core.owl.ClassAssertionAxiom;
043    import org.dllearner.core.owl.DatatypeProperty;
044    import org.dllearner.core.owl.DatatypePropertyAssertion;
045    import org.dllearner.core.owl.DoubleDatatypePropertyAssertion;
046    import org.dllearner.core.owl.Individual;
047    import org.dllearner.core.owl.KB;
048    import org.dllearner.core.owl.ObjectProperty;
049    import org.dllearner.core.owl.ObjectPropertyAssertion;
050    import org.dllearner.core.owl.SubClassAxiom;
051    import org.dllearner.parser.KBParser;
052    import org.dllearner.parser.ParseException;
053    import org.dllearner.parser.PrologParser;
054    import org.dllearner.prolog.Atom;
055    import org.dllearner.prolog.Clause;
056    import org.dllearner.prolog.Program;
057    import org.dllearner.reasoning.OWLAPIReasoner;
058    import org.dllearner.utilities.Files;
059    import org.dllearner.utilities.Helper;
060    
061    /**
062     * This class maps the carcinogenesis Prolog files to an OWL file. In a first
063     * step, a Prolog parser is used to read all files. The main step involves
064     * applying mapping Prolog clauses to OWL axioms through domain specific mapping
065     * rules.
066     * 
067     * The carcinogenesis Prolog files are available here:
068     * http://web.comlab.ox.ac.uk/oucl/research/areas/machlearn/cancer.html
069     * 
070     * .f files contain positive and .n files contain negative examples. pte1.n and
071     * pte.f contain the PTE-1 challenge examples. train.n and train.f contain other
072     * examples which can be used to train for PTE-1.
073     * 
074     * The PTE-2 directory contains PTE-2 files, i.e. all substances referred to in
075     * those files are only those of the PTE-2 challenge.
076     * 
077     * @author Jens Lehmann
078     * 
079     */
080    public class Carcinogenesis {
081    
082            private static URI ontologyURI = URI.create("http://dl-learner.org/carcinogenesis");
083    
084            // directory of Prolog files
085            private static final String prologDirectory = "examples/carcinogenesis/prolog/";        
086            
087            // mapping of symbols to names of chemical elements
088            private static Map<String, String> chemElements;
089    
090            // structures in newgroups.pl
091            private static Set<String> newGroups = new TreeSet<String>();
092            
093            // types of atoms, bonds, and structures
094            private static Set<String> atomTypes = new TreeSet<String>();
095            private static Set<String> bondTypes = new TreeSet<String>();
096            private static Set<String> structureTypes = new TreeSet<String>();
097    
098            // we need a counter for bonds, because they are instances in OWL
099            // but not in Prolog
100            private static int bondNr = 0;
101            private static int structureNr = 0;
102            
103            // list of all individuals in the knowlege base
104    //      private static Set<String> individuals = new TreeSet<String>();     
105            // list of all compounds
106            private static Set<String> compounds = new TreeSet<String>(); 
107            // compounds with positive ames test
108            private static Set<String> compoundsAmes = new TreeSet<String>();
109            // list of all bonds
110            private static Set<String> bonds = new TreeSet<String>();
111            
112            // list of all "hasProperty" test
113            private static Set<String> tests = new TreeSet<String>();
114            
115            // we ignore the ames test since its distribution in PTE-2 is so
116            // different from the training substances that a different testing
117            // strategy was probably in use
118            private static boolean ignoreAmes = false;
119            private static boolean ignoreSalmonella = false;;
120            private static boolean ignoreCytogenCa = false;
121            private static boolean includeMutagenesis = true;
122            // if true we learn carcinogenic, if false we learn non-carcinogenic
123            private static boolean learnCarcinogenic = true;
124            private static boolean useNewGroups = true;
125            
126            private static boolean createPTE1Conf = false;
127            private static boolean createPTE2Conf = false;
128            
129            /**
130             * @param args
131             *            No arguments supported.
132             * @throws IOException
133             * @throws FileNotFoundException
134             * @throws ParseException
135             */
136            public static void main(String[] args) throws FileNotFoundException, IOException,
137                            ParseException {
138    
139                    String[] files = new String[] { "newgroups.pl", "ames.pl", "atoms.pl", "bonds.pl", "gentoxprops.pl",
140                                    "ind_nos.pl", "ind_pos.pl"};
141                    // "pte2/canc_nos.pl", "pte2/pte2ames.pl", "pte2/pte2atoms.pl",
142                    //              "pte2/pte2bonds.pl", "pte2/pte2gentox.pl", "pte2/pte2ind_nos.pl", "pte2/pte2newgroups.pl"
143                    // "train.b" => not a pure Prolog file but Progol/Aleph specific
144                    // };
145                    File owlFile = new File("examples/carcinogenesis/carcinogenesis.owl");
146    
147                    Program program = null;
148                    long startTime, duration;
149                    String time;
150    
151                    // reading files
152                    System.out.print("Reading in carcinogenesis Prolog files ... ");
153                    startTime = System.nanoTime();
154                    String content = "";
155                    for (String file : files) {
156                            content += Files.readFile(new File(prologDirectory + file));
157                    }
158                    duration = System.nanoTime() - startTime;
159                    time = Helper.prettyPrintNanoSeconds(duration, false, false);
160                    System.out.println("OK (" + time + ").");
161    
162                    // parsing files
163                    System.out.print("Parsing Prolog files ... ");
164                    startTime = System.nanoTime();
165                    PrologParser pp = new PrologParser();
166                    program = pp.parseProgram(content);
167                    duration = System.nanoTime() - startTime;
168                    time = Helper.prettyPrintNanoSeconds(duration, false, false);
169                    System.out.println("OK (" + time + ").");
170    
171                    // prepare mapping
172                    KB kb = new KB();
173                    createChemElementsMapping();
174                    createNewGroups();
175                    // create subclasses of atom
176                    NamedClass atomClass = getAtomicConcept("Atom");
177                    for (String element : chemElements.values()) {
178                            NamedClass elClass = getAtomicConcept(element);
179                            SubClassAxiom sc = new SubClassAxiom(elClass, atomClass);
180                            kb.addAxiom(sc);
181                    }
182                    // define properties including domain and range
183                    String kbString = "DPDOMAIN(" + getURI2("charge") + ") = " + getURI2("Atom") + ".\n";
184                    kbString += "DPRANGE(" + getURI2("charge") + ") = DOUBLE.\n";
185                    if(!ignoreAmes) {
186                            kbString += "DPDOMAIN(" + getURI2("amesTestPositive") + ") = " + getURI2("Compound") + ".\n";
187                            kbString += "DPRANGE(" + getURI2("amesTestPositive") + ") = BOOLEAN.\n";
188                    }
189                    if(includeMutagenesis) {
190                            kbString += "DPDOMAIN(" + getURI2("isMutagenic") + ") = " + getURI2("Compound") + ".\n";
191                            kbString += "DPRANGE(" + getURI2("isMutagenic") + ") = BOOLEAN.\n";
192                    }
193                    kbString += "OPDOMAIN(" + getURI2("hasAtom") + ") = " + getURI2("Compound") + ".\n";
194                    kbString += "OPRANGE(" + getURI2("hasAtom") + ") = " + getURI2("Atom") + ".\n";
195                    kbString += "OPDOMAIN(" + getURI2("hasBond") + ") = " + getURI2("Compound") + ".\n";
196                    kbString += "OPRANGE(" + getURI2("hasBond") + ") = " + getURI2("Bond") + ".\n";
197                    kbString += "OPDOMAIN(" + getURI2("inBond") + ") = " + getURI2("Bond") + ".\n";
198                    kbString += "OPRANGE(" + getURI2("inBond") + ") = " + getURI2("Atom") + ".\n";
199                    kbString += "OPDOMAIN(" + getURI2("hasStructure") + ") = " + getURI2("Compound") + ".\n";
200                    kbString += "OPRANGE(" + getURI2("hasStructure") + ") = " + getURI2("Structure") + ".\n";
201                    kbString += getURI2("Di") + " SUB " + getURI2("Structure") + ".\n";
202                    kbString += getURI2("Halide") + " SUB " + getURI2("Structure") + ".\n";
203                    kbString += getURI2("Ring") + " SUB " + getURI2("Structure") + ".\n";
204                    KB kb2 = KBParser.parseKBFile(kbString);
205                    kb.addKB(kb2);
206    
207                    // mapping clauses to axioms
208                    System.out.print("Mapping clauses to axioms ... ");
209                    startTime = System.nanoTime();
210                    ArrayList<Clause> clauses = program.getClauses();
211                    for (Clause clause : clauses) {
212                            List<Axiom> axioms = mapClause(clause);
213                            for (Axiom axiom : axioms)
214                                    kb.addAxiom(axiom);
215                    }
216                    
217                    if(includeMutagenesis)
218                            addMutagenesis(kb);
219                    
220                    // special handling for ames test (we assume the ames test
221                    // was performed on all compounds but only the positive ones
222                    // are in ames.pl [the rest is negative in Prolog by CWA], so
223                    // we add negative test results here)
224                    for(String compound : compounds) {
225                            if(!ignoreAmes && !compoundsAmes.contains(compound)) {
226                                    BooleanDatatypePropertyAssertion ames = getBooleanDatatypePropertyAssertion(compound, "amesTestPositive", false);
227                                    kb.addAxiom(ames);
228                            }
229                    }
230                    
231                    // disjoint classes axioms
232                    // OWL API is also buggy here, it adds a strange unused prefix
233                    // and cannot parser its own generated file
234    //              DisjointClassesAxiom disjointAtomTypes = getDisjointClassesAxiom(atomTypes);
235    //              kb.addAxiom(disjointAtomTypes);
236                    String[] mainClasses = new String[] {"Compound", "Atom", "Bond", "Structure"};
237                    Set<String> mainClassesSet = new HashSet<String>(Arrays.asList(mainClasses));
238                    DisjointClassesAxiom disjointAtomTypes = getDisjointClassesAxiom(mainClassesSet);
239                    kb.addAxiom(disjointAtomTypes);         
240                    
241                    // all different axiom (UNA)
242                    // exporting differentIndividuals axioms is broken in OWL API
243    //              individuals.addAll(compounds);
244    //              individuals.addAll(bonds);
245    //              DifferentIndividualsAxiom una = getDifferentIndividualsAxiom(individuals);
246    //              kb.addAxiom(una);
247                    
248                    duration = System.nanoTime() - startTime;
249                    time = Helper.prettyPrintNanoSeconds(duration, false, false);
250                    System.out.println("OK (" + time + ").");
251    
252                    // writing generated knowledge base
253                    System.out.print("Writing OWL file ... ");
254                    startTime = System.nanoTime();
255                    OWLAPIReasoner.exportKBToOWL(owlFile, kb, ontologyURI);
256                    duration = System.nanoTime() - startTime;
257                    time = Helper.prettyPrintNanoSeconds(duration, false, false);
258                    System.out.println("OK (" + time + ").");
259    
260                    // generating conf files
261                    File confTrainFile = new File("examples/carcinogenesis/train.conf");
262                    Files.clearFile(confTrainFile);
263                    String confHeader = "import(\"carcinogenesis.owl\");\n\n";
264                    confHeader += "reasoner = fastInstanceChecker;\n";
265                    confHeader += "algorithm = refexamples;\n";
266                    confHeader += "refexamples.noisePercentage = 31;\n";
267                    confHeader += "refexamples.startClass = " + getURI2("Compound") + ";\n";
268                    confHeader += "refexamples.writeSearchTree = false;\n";
269                    confHeader += "refexamples.searchTreeFile = \"log/carcinogenesis/searchTree.log\";\n";
270                    confHeader += "\n";
271                    Files.appendFile(confTrainFile, confHeader);
272                    
273                    // generating training examples
274                    File trainingFilePositives = new File(prologDirectory + "train.f");
275                    File trainingFileNegatives = new File(prologDirectory + "train.n");
276    
277                    List<Individual> posTrainExamples = getExamples(trainingFilePositives);
278                    List<Individual> negTrainExamples = getExamples(trainingFileNegatives);
279                    appendPosExamples(confTrainFile, posTrainExamples);
280                    appendNegExamples(confTrainFile, negTrainExamples);
281                    
282                    // generating test examples for PTE-1
283                    // => put all in one file, because they were used as training for PTE-2
284                    File confPTE1File = new File("examples/carcinogenesis/testpte1.conf");
285                    File testPTE1Positives = new File(prologDirectory + "pte1.f");
286                    File testPTE1Negatives = new File(prologDirectory + "pte1.n");
287                    
288                    List<Individual> posPTE1Examples = getExamples(testPTE1Positives);
289                    List<Individual> negPTE1Examples = getExamples(testPTE1Negatives);
290                    appendPosExamples(confTrainFile, posPTE1Examples);
291                    appendNegExamples(confTrainFile, negPTE1Examples);
292                    if(createPTE1Conf) {
293                            Files.clearFile(confPTE1File);
294                            Files.appendFile(confPTE1File, "import(\"pte.owl\");\nreasoner=fastInstanceChecker;\n\n");
295                            appendPosExamples(confPTE1File, posPTE1Examples);
296                            appendNegExamples(confPTE1File, negPTE1Examples);
297                    }
298                    
299                    // create a PTE-2 test file
300                    if(createPTE2Conf) {
301                            File confPTE2File = new File("examples/carcinogenesis/testpte2.conf");
302                            Files.clearFile(confPTE2File);
303                            Files.appendFile(confPTE2File, "import(\"pte.owl\");\nreasoner=fastInstanceChecker;\n\n");
304                            Files.appendFile(confPTE2File, getPTE2Examples());
305                    }
306    
307            }
308    
309            private static List<Axiom> mapClause(Clause clause) throws IOException, ParseException {
310                    List<Axiom> axioms = new LinkedList<Axiom>();
311                    Atom head = clause.getHead();
312                    String headName = head.getName();
313                    // Body body = clause.getBody();
314                    // ArrayList<Literal> literals = body.getLiterals();
315                    // handle: atm(compound,atom,element,atomtype,charge)
316                    
317                    // Ames-Test: http://en.wikipedia.org/wiki/Ames_test
318                    // problem: the file apparently mentions only positive
319                    // tests (why is it different from the other tests e.g. in
320                    // gentoxprops.pl?) => we need to add negative axioms for the
321                    // remaining stuff or use closed world assumption in the 
322                    // TBox dematerialisation later on
323                    if(headName.equals("ames")) {
324                            if(!ignoreAmes) {
325                            String compoundName = head.getArgument(0).toPLString();
326                            BooleanDatatypePropertyAssertion ames = getBooleanDatatypePropertyAssertion(compoundName, "amesTestPositive", true);
327                            axioms.add(ames);
328                            compoundsAmes.add(compoundName);
329                            }
330                    } else if (headName.equals("atm")) {
331                            String compoundName = head.getArgument(0).toPLString();
332                            String atomName = head.getArgument(1).toPLString();
333                            String elementName = head.getArgument(2).toPLString();
334                            String type = head.getArgument(3).toPLString();
335                            double charge = Double.parseDouble(head.getArgument(4).toPLString());
336                            // make the compound an instance of the Compound class
337                            ClassAssertionAxiom cmpAxiom = getConceptAssertion("Compound", compoundName);
338                            axioms.add(cmpAxiom);
339                            compounds.add(compoundName);
340                            // relate compound and atom
341                            ObjectPropertyAssertion ra = getRoleAssertion("hasAtom", compoundName, atomName);
342                            axioms.add(ra);
343                            // atom is made instance of the correct class
344                            String atomClass = getAtomClass(elementName, type);
345                            ClassAssertionAxiom ca = getConceptAssertion(atomClass, atomName);
346                            axioms.add(ca);
347                            // write subclass axiom if doesn't exist already
348                            if (!atomTypes.contains(atomClass)) {
349                                    NamedClass subClass = getAtomicConcept(atomClass);
350                                    NamedClass superClass = getAtomicConcept(getFullElementName(elementName));
351                                    SubClassAxiom sc = new SubClassAxiom(subClass, superClass);
352                                    axioms.add(sc);
353                                    atomTypes.add(atomClass);
354                            }
355                            // charge of atom
356                            DatatypePropertyAssertion dpa = getDoubleDatatypePropertyAssertion(atomName, "charge",
357                                            charge);
358                            axioms.add(dpa);
359                    } else if (headName.equals("bond")) {
360                            String compoundName = head.getArgument(0).toPLString();
361                            String atom1Name = head.getArgument(1).toPLString();
362                            String atom2Name = head.getArgument(2).toPLString();
363                            String bondType = head.getArgument(3).toPLString();
364                            String bondClass = "Bond-" + bondType;
365                            String bondInstance = "bond" + bondNr;
366                            bonds.add(bondInstance);
367                            ObjectPropertyAssertion op = getRoleAssertion("hasBond", compoundName, "bond" + bondNr);
368                            axioms.add(op);
369                            // make Bond-X subclass of Bond if that hasn't been done already
370                            if (!bondTypes.contains(bondClass)) {
371                                    NamedClass subClass = getAtomicConcept(bondClass);
372                                    SubClassAxiom sc = new SubClassAxiom(subClass, getAtomicConcept("Bond"));
373                                    axioms.add(sc);
374                                    bondTypes.add(bondClass);
375                            }
376                            // make e.g. bond382 instance of Bond-3
377                            ClassAssertionAxiom ca = getConceptAssertion(bondClass, bondInstance);
378                            axioms.add(ca);
379                            bondNr++;
380                            // connect atoms with bond
381                            ObjectPropertyAssertion op1 = getRoleAssertion("inBond", bondInstance, atom1Name);
382                            ObjectPropertyAssertion op2 = getRoleAssertion("inBond", bondInstance, atom2Name);
383                            axioms.add(op1);
384                            axioms.add(op2);
385                    } else if (headName.equals("has_property")) {
386                            String compoundName = head.getArgument(0).toPLString();
387                            String testName = head.getArgument(1).toPLString();
388                            if(!(ignoreSalmonella && testName.equals("salmonella"))
389                                    && !(ignoreCytogenCa && testName.equals("cytogen_ca"))) {
390                                    String resultStr = head.getArgument(2).toPLString();
391                                    boolean testResult = (resultStr.equals("p")) ? true : false;
392                                            
393                                    // create a new datatype property if it does not exist already
394                                    if(!tests.contains(testName)) {
395                                            String axiom1 = "DPDOMAIN(" + getURI2(testName) + ") = " + getURI2("Compound") + ".\n";
396                                            String axiom2 = "DPRANGE(" + getURI2(testName) + ") = BOOLEAN.\n";
397                                            KB kb = KBParser.parseKBFile(axiom1 + axiom2);
398                                            axioms.addAll(kb.getAxioms());
399                                    }
400                                    // create an axiom with the test result
401                                    DatatypePropertyAssertion dpa = getBooleanDatatypePropertyAssertion(compoundName, testName,
402                                                    testResult);
403                                    axioms.add(dpa);
404                            }
405                    // either parse this or ashby_alert - not both - ashby_alert contains
406                    // all information in ind already
407                    } else if (headName.equals("ind") || headName.equals("ring_no")) {
408                            // parse this only if the new groups are not parsed
409    //                      if(!useNewGroups) {
410                            String compoundName = head.getArgument(0).toPLString();
411                            String structureName = head.getArgument(1).toPLString();
412                            int count = Integer.parseInt(head.getArgument(2).toPLString());
413                            // upper case first letter
414                            String structureClass = structureName.substring(0,1).toUpperCase() + structureName.substring(1);;
415                            String structureInstance = structureName + "-" + structureNr;
416                            
417                            addStructureSubclass(axioms, structureClass);   
418                            
419                            for(int i=0; i<count; i++) {
420                                    ObjectPropertyAssertion op = getRoleAssertion("hasStructure", compoundName, structureInstance);
421                                    axioms.add(op);
422                                    // make e.g. halide10-382 instance of Bond-3
423                                    ClassAssertionAxiom ca = getConceptAssertion(structureClass, structureInstance);
424                                    axioms.add(ca);
425                                    structureNr++;
426                            }
427    //                      }
428                    } else if (headName.equals("ashby_alert")) {
429                            // ... currently ignored ...
430                    } else if (newGroups.contains(headName)) {
431                            if(useNewGroups) {
432                            String compoundName = head.getArgument(0).toPLString();
433                            String structureName = headName;
434                            // upper case first letter
435                            String structureClass = structureName.substring(0,1).toUpperCase() + structureName.substring(1);;
436                            String structureInstance = structureName + "-" + structureNr;
437                            
438                            addStructureSubclass(axioms, structureClass);
439                            
440                                    ObjectPropertyAssertion op = getRoleAssertion("hasStructure", compoundName, structureInstance);
441                                    axioms.add(op);
442                                    ClassAssertionAxiom ca = getConceptAssertion(structureClass, structureInstance);
443                                    axioms.add(ca);
444                                    structureNr++;
445                            }
446                    } else {
447                            // print clauses which are not supported yet
448                            System.out.println("unsupported clause");
449                            System.out.println(clause.toPLString());
450                            System.out.println(clause);
451                            System.exit(0);
452                    }
453                    return axioms;
454            }
455    
456            private static void addStructureSubclass(List<Axiom> axioms, String structureClass) {
457                    // build in more fine-grained subclasses e.g. Di+number is subclass of Di
458                    if (!structureTypes.contains(structureClass)) {
459                            NamedClass nc = getAtomicConcept("Structure");
460                            if(structureClass.contains("Di"))
461                                    nc = getAtomicConcept("Di");
462                            else if(structureClass.contains("ring") || structureClass.contains("Ring"))
463                                    nc = getAtomicConcept("Ring");
464                            else if(structureClass.contains("halide") || structureClass.contains("Halide"))
465                                    nc = getAtomicConcept("Halide");
466                            NamedClass subClass = getAtomicConcept(structureClass);
467                            SubClassAxiom sc = new SubClassAxiom(subClass, nc);
468                            axioms.add(sc);
469                            structureTypes.add(structureClass);
470                    }                       
471            }
472            
473            // takes a *.f or *.n file as input and returns the 
474            // contained examples
475            private static List<Individual> getExamples(File file) throws FileNotFoundException, IOException, ParseException {
476                    String content = Files.readFile(file);
477                    PrologParser pp = new PrologParser();
478                    Program programPos = pp.parseProgram(content);
479                    List<Individual> ret = new LinkedList<Individual>();
480                    for(Clause c : programPos.getClauses()) {
481                            String example = c.getHead().getArgument(0).toPLString();
482                            ret.add(getIndividual(example));
483                    }
484                    return ret;
485            }
486            
487            private static void appendPosExamples(File file, List<Individual> examples) {
488                    StringBuffer content = new StringBuffer();
489                    for(Individual example : examples) {
490                            if(learnCarcinogenic)
491                                    content.append("+\""+example.toString()+"\"\n");
492                            else
493                                    content.append("-\""+example.toString()+"\"\n");
494                    }
495                    Files.appendFile(file, content.toString());
496            }
497            
498            private static void appendNegExamples(File file, List<Individual> examples) {
499                    StringBuffer content = new StringBuffer();
500                    for(Individual example : examples) {
501                            if(learnCarcinogenic)
502                                    content.append("-\""+example.toString()+"\"\n");
503                            else
504                                    content.append("+\""+example.toString()+"\"\n");
505                    }
506                    Files.appendFile(file, content.toString());
507            }       
508            
509            private static String getAtomClass(String element, String atomType) {
510                    return getFullElementName(element) + "-" + atomType;
511            }
512    
513            private static ClassAssertionAxiom getConceptAssertion(String concept, String i) {
514                    Individual ind = getIndividual(i);
515                    NamedClass c = getAtomicConcept(concept);
516                    return new ClassAssertionAxiom(c, ind);
517            }
518    
519            private static ObjectPropertyAssertion getRoleAssertion(String role, String i1, String i2) {
520                    Individual ind1 = getIndividual(i1);
521                    Individual ind2 = getIndividual(i2);
522                    ObjectProperty ar = getRole(role);
523                    return new ObjectPropertyAssertion(ar, ind1, ind2);
524            }
525    
526            private static BooleanDatatypePropertyAssertion getBooleanDatatypePropertyAssertion(
527                            String individual, String datatypeProperty, boolean value) {
528                    Individual ind = getIndividual(individual);
529                    DatatypeProperty dp = getDatatypeProperty(datatypeProperty);
530                    return new BooleanDatatypePropertyAssertion(dp, ind, value);
531            }       
532            
533            private static DoubleDatatypePropertyAssertion getDoubleDatatypePropertyAssertion(
534                            String individual, String datatypeProperty, double value) {
535                    Individual ind = getIndividual(individual);
536                    DatatypeProperty dp = getDatatypeProperty(datatypeProperty);
537                    return new DoubleDatatypePropertyAssertion(dp, ind, value);
538            }
539    
540            @SuppressWarnings({"unused"})
541            private static DisjointClassesAxiom getDisjointClassesAxiom(Set<String> classes) {
542                    Set<Description> descriptions = new HashSet<Description>();
543                    for(String namedClass : classes)
544                            descriptions.add(new NamedClass(getURI(namedClass)));
545                    return new DisjointClassesAxiom(descriptions);
546            }
547            
548            @SuppressWarnings({"unused"})
549            private static DifferentIndividualsAxiom getDifferentIndividualsAxiom(Set<String> individuals) {
550                    Set<Individual> inds = new HashSet<Individual>();
551                    for(String i : individuals)
552                            inds.add(new Individual(i));
553                    return new DifferentIndividualsAxiom(inds);
554            }       
555            
556            private static Individual getIndividual(String name) {
557                    return new Individual(ontologyURI + "#" + name);
558            }
559    
560            private static ObjectProperty getRole(String name) {
561                    return new ObjectProperty(ontologyURI + "#" + name);
562            }
563    
564            private static DatatypeProperty getDatatypeProperty(String name) {
565                    return new DatatypeProperty(ontologyURI + "#" + name);
566            }
567    
568            private static NamedClass getAtomicConcept(String name) {
569                    return new NamedClass(ontologyURI + "#" + name);
570            }
571    
572            private static String getURI(String name) {
573                    return ontologyURI + "#" + name;
574            }
575            
576            // returns URI including quotationsmark (need for KBparser)
577            private static String getURI2(String name) {
578                    return "\"" + getURI(name) + "\"";
579            }       
580            
581            private static String getFullElementName(String abbreviation) {
582                    // return corresponding element or throw an error if it
583                    // is not in the list
584                    String result = chemElements.get(abbreviation);
585                    if (result == null)
586                            throw new Error("Unknown element " + abbreviation);
587                    else
588                            return result;
589            }
590    
591            // create chemical element list
592            private static void createChemElementsMapping() {
593                    chemElements = new HashMap<String, String>();
594                    chemElements.put("as", "Arsenic");
595                    chemElements.put("ba", "Barium");
596                    chemElements.put("br", "Bromine");
597                    chemElements.put("c", "Carbon");
598                    chemElements.put("ca", "Calcium");
599                    chemElements.put("cl", "Chlorine");
600                    chemElements.put("cu", "Copper");
601                    chemElements.put("f", "Fluorine");
602                    chemElements.put("ga", "Gallium");
603                    chemElements.put("h", "Hydrogen");
604                    chemElements.put("hg", "Mercury");
605                    chemElements.put("i", "Iodine");
606                    chemElements.put("k", "Krypton");
607                    chemElements.put("mn", "Manganese");
608                    chemElements.put("mo", "Molybdenum");
609                    chemElements.put("n", "Nitrogen");
610                    chemElements.put("na", "Sodium");
611                    chemElements.put("o", "Oxygen");
612                    chemElements.put("p", "Phosphorus");
613                    chemElements.put("pb", "Lead");
614                    chemElements.put("s", "Sulfur");
615                    chemElements.put("se", "Selenium");
616                    chemElements.put("sn", "Tin");
617                    chemElements.put("te", "Tellurium");
618                    chemElements.put("ti", "Titanium");
619                    chemElements.put("v", "Vanadium");
620                    chemElements.put("zn", "Zinc");
621            }
622            
623            private static void createNewGroups() {         
624                    String[] groups = new String[] {"six_ring", "non_ar_6c_ring",
625                                    "ketone", "amine", "alcohol", "ether", "ar_halide",
626                                    "five_ring", "non_ar_5c_ring", "alkyl_halide",
627                                    "methyl", "non_ar_hetero_5_ring", "nitro", "sulfo",
628                                    "methoxy", "amine", "aldehyde", "sulfide",
629                                    "non_ar_hetero_6_ring", "phenol", "carboxylic_acid",
630                                    "ester", "imine", 
631                    };
632                    
633                    List<String> list = Arrays.asList(groups);
634                    newGroups.addAll(list);
635            }
636    
637            /**
638             * <p>To find out whether a substance is carinogenetic go to 
639             * "http://ntp-server.niehs.nih.gov/" and click
640             * on "Testing Status of Agents at NTP".</p>
641             * 
642             * Levels:
643             * <ul>
644             *      <li>CE = clear evidence</li>
645             *  <li>SE = some evidence</li>
646             *  <li>E = equivocal evidence</li>
647             *  <li>NE = no evidence</li>
648             * </ul>
649             * Levels CE and SE are positive examples. E and NE negative examples.
650             * Experiments are performed on rats and mice of both genders, so we
651             * have four evidence values. An example is positive if at least one
652             * value is SE or CE.
653             * 
654             * <p>Some values are taken from the IJCAI-97 paper of Muggleton.</p>
655             * 
656             * <p>Positives (19): <br />
657             * <ul>
658             * <li>t3 (SE+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCACAFD4-123F-7908-7B521E4F665EFBD9</li>
659             * <li>t4 (3CE+NE) - contradicts IJCAI-97 paper and should probably be case 75-52-5 instead of 75-52-8: http://ntp.niehs.nih.gov/index.cfm?objectid=BCE49084-123F-7908-7BE127F7AF1FFBB5</li>
660             * <li>t5: paper</li>
661             * <li>t7: paper</li>
662             * <li>t8: paper</li>
663             * <li>t9 (3CE+SE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD7C6869-123F-7908-7BDEA4CFAA55CEA8</li>
664             * <li>t10: paper</li>
665             * <li>t12 (2SE+E+NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCB0ADE0-123F-7908-7BEC101C7309C4DE</li>
666             * <li>t14 (2CE+2NE) probably 111-42-2 instead of 11-42-2: http://ntp.niehs.nih.gov/index.cfm?objectid=BCC60FF1-123F-7908-7B2D579AA48DE90C</li>
667             * <li>t15: paper</li>
668             * <li>t16 (2CE+SE+E): http://ntp.niehs.nih.gov/index.cfm?objectid=BCC5D9CE-123F-7908-7B959CCE5262468A</li>
669             * <li>t18 (2SE+E+NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCA087AA-123F-7908-7B79FDFDE3CDCF87</li>
670             * <li>t19 (2CE+E+NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCAE5690-123F-7908-7B02E35E2BB57694</li>
671             * <li>t20 (2SE+E+NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCF95607-123F-7908-7B0761D3C515CC12</li>
672             * <li>t21 (CE+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCFCB63C-123F-7908-7BF910C2783AE9FE</li>
673             * <li>t22 (SE+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD8345C2-123F-7908-7BC52FEF80F110E1</li>
674             * <li>t23 (4CE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCADD2D9-123F-7908-7B5C8180FE80B22F</li>
675             * <li>t24 (CE+E): http://ntp.niehs.nih.gov/index.cfm?objectid=BCFB19FF-123F-7908-7B845E176F13E6E1</li>
676             * <li>t25 (3CE+SE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD2D2A62-123F-7908-7B0DA824E782754C</li>
677             * <li>t30 (2CE+SE+E) : http://ntp.niehs.nih.gov/index.cfm?objectid=BCB13734-123F-7908-7BEBA533E35A48B7</li>
678             * </ul>
679             * </p>
680             * 
681             * <p>Negatives (10):
682             * <ul>
683             * <li>t1 (4NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD9FF53C-123F-7908-7B123DAE0A25B122 </li>
684             * <li>t2 (4NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCF8651E-123F-7908-7B21DD5ED83CD0FF </li>
685             * <li><strike>t4: paper</strike></li>
686             * <li>t6: paper</li>
687             * <li>t11: paper</li>
688             * <li>t13 (4NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD136ED6-123F-7908-7B619EE79F2FD062</li>
689             * <li>t17: paper</li>
690             * <li>t26 (2E+2NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD1E6209-123F-7908-7B95EB8BAE662CE7</li>
691             * <li>t27 (E+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BCAC5D00-123F-7908-7BC46ECB72A6C91B</li>
692             * <li>t28 (E+3NE): http://ntp.niehs.nih.gov/index.cfm?objectid=BD34E02A-123F-7908-7BC6791917B591DF</li>
693             * </ul>
694             * </p>
695             * 
696             * <p>Unclear (1):
697             * <ul>
698             * <li>t29: probably a negative (see http://ntp.niehs.nih.gov/index.cfm?objectid=BD855EA1-123F-7908-7B573FC3C08188DC) but
699             * no tests directly for this substance</li>
700             * </ul>
701             * 
702             * <p>The following examples are probably not part of the IJCAI PTE-2 challenge
703             * (reports younger than 1998):
704             * <ul>
705             * <li>pos: t21 (5/99), t25 (9/04), t30(10/01)</li>
706             * <li>neg: t26 (5/99), t27 (05/01), t28 (05/00), t29 (09/02)</li>
707             * </ul>
708             * </p>
709             * </p>
710             * @return A string for all examples as used in the conf file.
711             */
712            public static String getPTE2Examples() {
713                    String[] pos = new String[] {"t3","t4","t5","t7","t8",
714                                    "t9",
715                                    "t10","t12",
716                                    "t14","t15","t16","t18","t19","t20",
717                                    "t21",
718                                    "t22",
719                                    "t23",
720                                    "t24",
721                                    "t25",
722                                    "t30"};
723                    String[] neg = new String[] {"t1", "t2",
724                                    "t6", "t11", "t13",
725                                    "t17","t26","t27",
726                                    "t28","t29"
727                                    };
728    
729                    String ret = "";
730                    for(String posEx : pos) {
731                            if(learnCarcinogenic)
732                                    ret += "+" + getURI2(posEx) + "\n";
733                            else
734                                    ret += "-" + getURI2(posEx) + "\n";
735                    }
736                    for(String negEx : neg) {
737                            if(learnCarcinogenic)
738                                    ret += "-" + getURI2(negEx) + "\n";
739                            else
740                                    ret += "+" + getURI2(negEx) + "\n";
741                    }
742                    
743                    return ret;
744            }
745            
746            private static void addMutagenesis(KB kb) {
747                    String[] mutagenicCompounds = new String[] {
748                            "d101", "d104", "d106", "d107", "d112", "d113", "d117", 
749                            "d121", "d123", "d126", "d128", "d13", "d135", "d137", 
750                            "d139", "d140", "d143", "d144", "d145", "d146", "d147",
751                            "d152", "d153", "d154", "d155", "d156", "d159", "d160",
752                            "d161", "d163", "d164", "d166", "d168", "d171", "d173",
753                            "d174", "d177", "d179", "d18", "d180", "d182", "d183",
754                            "d185", "d186", "d187", "d188", "d189", "d19", "d191",
755                            "d192", "d193", "d195", "d197", "d2", "d201", "d202", 
756                            "d205", "d206", "d207", "d211", "d214", "d215", "d216",
757                            "d224", "d225", "d227", "d228", "d229", "d231", "d235",
758                            "d237", "d239", "d242", "d245", "d246", "d249", "d251",
759                            "d254", "d257", "d258", "d261", "d264", "d266", "d269",
760                            "d27", "d270", "d271", "d28", "d288", "d292", "d297",
761                            "d300", "d308", "d309", "d311", "d313", "d314", "d322",
762                            "d323", "d324", "d329", "d330", "d332", "d334", "d35",
763                            "d36", "d37", "d38", "d41", "d42", "d48", "d50", "d51",
764                            "d54", "d58", "d61", "d62", "d63", "d66", "d69", "d72",
765                            "d76", "d77", "d78", "d84", "d86", "d89", "d92", "d96"};
766                    TreeSet<String> mutagenic = new TreeSet<String>(Arrays.asList(mutagenicCompounds));
767            
768                    for(String compound : compounds) {
769                            if(mutagenic.contains(compound)) {
770                                    BooleanDatatypePropertyAssertion muta = getBooleanDatatypePropertyAssertion(compound, "isMutagenic", true);
771                                    kb.addAxiom(muta);
772                            } else {
773                                    BooleanDatatypePropertyAssertion muta = getBooleanDatatypePropertyAssertion(compound, "isMutagenic", false);
774                                    kb.addAxiom(muta);
775                            }
776                    }
777            }
778    }