001 package org.dllearner.examples.corpus;
002
003 import java.net.URI;
004 import java.util.ArrayList;
005 import java.util.List;
006 import java.util.StringTokenizer;
007
008 import org.dllearner.examples.Corpus;
009 import org.dllearner.utilities.URLencodeUTF8;
010 import org.semanticweb.owl.model.OWLClass;
011 import org.semanticweb.owl.model.OWLDescription;
012 import org.semanticweb.owl.model.OWLIndividual;
013 import org.semanticweb.owl.model.OWLObjectProperty;
014
015 public class Sentence {
016 int id ;
017 OWLIndividual sentenceURI;
018 List<String> sentence;
019 List<String> wordsInOrder;
020 List<String> urisInOrder;
021
022 OWLClass element;
023 OWLClass structElement;
024 OWLClass wordElement;
025 OWLClass sentenceClass;
026
027 OWLClass tagClass;
028 OWLClass morphClass;
029 OWLClass edgeClass;
030
031 OWLObjectProperty hasElement;
032
033 public Sentence(int id, List<String> sentence) {
034 super();
035 this.id = id;
036 this.sentence = sentence;
037 this.sentenceURI = Corpus.factory.getOWLIndividual(URI.create(Corpus.namespace+"#"+"satz"+id));
038
039 this.urisInOrder = new ArrayList<String>();
040 this.wordsInOrder = new ArrayList<String>();
041
042 element = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#Element"));
043 structElement = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#StructureElement"));
044 wordElement = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#WordElement"));
045 sentenceClass = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#Sentence"));
046 tagClass = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#Tag"));
047 morphClass = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#Morph"));
048 edgeClass = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#Edge"));
049
050 hasElement = Corpus.factory.getOWLObjectProperty(URI.create(Corpus.namespace+"#hasElement"));
051
052 Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(this.sentenceURI,sentenceClass ));
053 }
054
055 public void processSentence(){
056
057 int pos=0;
058 for (String line : sentence) {
059
060 processLine(line,pos);
061 pos++;
062 }
063 }
064
065
066 public void processLine(String line, int pos){
067 String elementURL = Corpus.namespace+"#";
068 OWLIndividual lineElement;
069 StringTokenizer st = new StringTokenizer(line);
070
071 //%String %% word lemma tag morph edge parent secedge comment
072 String word = st.nextToken();
073 // String lemma = st.nextToken();
074 String tag = st.nextToken();
075 String morph = st.nextToken();
076 String edge = st.nextToken();
077 // String parent = st.nextToken();
078 //word
079 if(word.startsWith("#")){
080 elementURL+="s_"+id+"_"+word.substring(1);
081 lineElement = Corpus.factory.getOWLIndividual(URI.create(elementURL));
082 Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(lineElement, structElement));
083
084 }else{
085 elementURL+="s_"+id+"_"+pos+"_"+URLencodeUTF8.encode(word);
086 wordsInOrder.add(word);
087 urisInOrder.add(elementURL);
088 lineElement = Corpus.factory.getOWLIndividual(URI.create(elementURL));
089 Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(lineElement, wordElement));
090 Corpus.addAxiom(Corpus.factory.getOWLEntityAnnotationAxiom(lineElement, Corpus.factory.getCommentAnnotation(line)));
091 Corpus.addAxiom(Corpus.factory.getOWLEntityAnnotationAxiom(lineElement, Corpus.factory.getOWLLabelAnnotation(word)));
092 }
093
094 Corpus.addAxiom(Corpus.factory.getOWLObjectPropertyAssertionAxiom(sentenceURI, hasElement, lineElement));
095
096 //tag
097 tag = (tag.equals("$("))?"SentenceBoundary":tag;
098 //morph
099 morph= "m_"+URLencodeUTF8.encode(morph);
100 makeClasses(lineElement, tag,morph,edge);
101
102 }
103
104 void makeClasses(OWLIndividual lineElement, String tag, String morph, String edge){
105 if(!tag.equals("--")){
106 OWLDescription d = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#"+tag));
107 Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(lineElement,d ));
108 Corpus.addAxiom(Corpus.factory.getOWLSubClassAxiom(d, tagClass));
109 }
110 if(!morph.equals("m_--")){
111
112 OWLDescription d = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#"+morph));
113 Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(lineElement,d ));
114 Corpus.addAxiom(Corpus.factory.getOWLSubClassAxiom(d, morphClass));
115 }
116 if(!edge.equals("--")){
117 OWLDescription d = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#"+edge));
118 Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(lineElement,d ));
119 Corpus.addAxiom(Corpus.factory.getOWLSubClassAxiom(d, edgeClass));
120 }
121 }
122 }