001    package org.dllearner.examples.corpus;
002    
003    import java.net.URI;
004    import java.util.ArrayList;
005    import java.util.List;
006    import java.util.StringTokenizer;
007    
008    import org.dllearner.examples.Corpus;
009    import org.dllearner.utilities.URLencodeUTF8;
010    import org.semanticweb.owl.model.OWLClass;
011    import org.semanticweb.owl.model.OWLDescription;
012    import org.semanticweb.owl.model.OWLIndividual;
013    import org.semanticweb.owl.model.OWLObjectProperty;
014    
015    public class Sentence {
016            int id ;
017            OWLIndividual sentenceURI;
018            List<String> sentence;
019            List<String> wordsInOrder;
020            List<String> urisInOrder;
021            
022            OWLClass element;
023            OWLClass structElement;
024            OWLClass wordElement;
025            OWLClass sentenceClass;
026            
027            OWLClass tagClass;
028            OWLClass morphClass;
029            OWLClass edgeClass;
030            
031            OWLObjectProperty hasElement;
032            
033            public Sentence(int id, List<String> sentence) {
034                    super();
035                    this.id = id;
036                    this.sentence = sentence;
037                    this.sentenceURI = Corpus.factory.getOWLIndividual(URI.create(Corpus.namespace+"#"+"satz"+id));
038            
039                    this.urisInOrder = new ArrayList<String>();
040                    this.wordsInOrder = new ArrayList<String>();
041                    
042                    element = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#Element"));
043                    structElement = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#StructureElement"));
044                    wordElement = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#WordElement"));
045                    sentenceClass = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#Sentence"));
046                    tagClass = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#Tag"));
047                    morphClass = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#Morph"));
048                    edgeClass = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#Edge"));
049            
050                    hasElement = Corpus.factory.getOWLObjectProperty(URI.create(Corpus.namespace+"#hasElement"));
051                    
052                    Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(this.sentenceURI,sentenceClass ));
053            }
054            
055            public void processSentence(){
056                    
057                    int pos=0;
058                    for (String line : sentence) {
059                            
060                            processLine(line,pos);
061                            pos++;
062                    }
063            }
064            
065            
066            public void processLine(String line, int pos){
067                    String elementURL = Corpus.namespace+"#";
068                    OWLIndividual lineElement;
069                    StringTokenizer st = new StringTokenizer(line);
070                    
071                    //%String %% word                       lemma                   tag     morph           edge    parent  secedge comment
072                    String word = st.nextToken();
073    //              String lemma =  st.nextToken();
074                    String tag =  st.nextToken();
075                    String morph =  st.nextToken();
076                    String edge =  st.nextToken();
077    //              String parent =  st.nextToken();
078                    //word
079                    if(word.startsWith("#")){
080                            elementURL+="s_"+id+"_"+word.substring(1);
081                            lineElement = Corpus.factory.getOWLIndividual(URI.create(elementURL));
082                            Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(lineElement, structElement));
083                            
084                    }else{
085                            elementURL+="s_"+id+"_"+pos+"_"+URLencodeUTF8.encode(word);
086                            wordsInOrder.add(word);
087                            urisInOrder.add(elementURL);
088                            lineElement = Corpus.factory.getOWLIndividual(URI.create(elementURL));
089                            Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(lineElement, wordElement));
090                            Corpus.addAxiom(Corpus.factory.getOWLEntityAnnotationAxiom(lineElement, Corpus.factory.getCommentAnnotation(line)));
091                            Corpus.addAxiom(Corpus.factory.getOWLEntityAnnotationAxiom(lineElement, Corpus.factory.getOWLLabelAnnotation(word)));
092                    }
093                    
094                    Corpus.addAxiom(Corpus.factory.getOWLObjectPropertyAssertionAxiom(sentenceURI, hasElement, lineElement));
095                    
096                    //tag
097                    tag = (tag.equals("$("))?"SentenceBoundary":tag;
098                    //morph
099                    morph= "m_"+URLencodeUTF8.encode(morph);
100                    makeClasses(lineElement, tag,morph,edge);
101                    
102            }
103            
104            void makeClasses(OWLIndividual lineElement, String tag, String morph, String edge){
105                    if(!tag.equals("--")){
106                            OWLDescription d = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#"+tag));
107                            Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(lineElement,d ));
108                            Corpus.addAxiom(Corpus.factory.getOWLSubClassAxiom(d, tagClass));
109                    }
110                    if(!morph.equals("m_--")){
111                            
112                            OWLDescription d = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#"+morph));
113                            Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(lineElement,d ));
114                            Corpus.addAxiom(Corpus.factory.getOWLSubClassAxiom(d, morphClass));
115                    }
116                    if(!edge.equals("--")){
117                            OWLDescription d = Corpus.factory.getOWLClass(URI.create(Corpus.namespace+"#"+edge));
118                            Corpus.addAxiom(Corpus.factory.getOWLClassAssertionAxiom(lineElement,d ));
119                            Corpus.addAxiom(Corpus.factory.getOWLSubClassAxiom(d, edgeClass));
120                    }
121            }
122    }