001    /**
002     * Copyright (C) 2007-2011, Jens Lehmann
003     *
004     * This file is part of DL-Learner.
005     *
006     * DL-Learner is free software; you can redistribute it and/or modify
007     * it under the terms of the GNU General Public License as published by
008     * the Free Software Foundation; either version 3 of the License, or
009     * (at your option) any later version.
010     *
011     * DL-Learner is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014     * GNU General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018     */
019    
020    package org.dllearner.learningproblems;
021    
022    import java.text.DecimalFormat;
023    import java.util.Set;
024    import java.util.SortedSet;
025    
026    import org.dllearner.core.owl.Individual;
027    import org.dllearner.utilities.Helper;
028    
029    /**
030     * Computes the score (a negative value) by comparing the classification results
031     * with ideal results.
032     * 
033     * TODO: The implementation is not very efficient, because some things are 
034     * only computed to be able to present the score results. This means that
035     * it would be better to compute only the necessary computations and do
036     * the other ones only when they are needed to calculate statistical values.
037     * 
038     * @author Jens Lehmann
039     *
040     */
041    public class ScoreThreeValued extends ScorePosNeg {
042            
043            private static final long serialVersionUID = -1780084688122949685L;
044    
045            public enum ScoreMethod {POSITIVE, FULL};
046            
047            // configuration options
048            private double accuracyPenalty;
049            private double errorPenalty;
050            private boolean penaliseNeutralExamples;
051            private double percentPerLengthUnit;
052            
053            // potential configuration options (not implemented as such, but one
054            // could so)
055            private boolean showCorrectClassifications = false;
056            private static ScoreMethod scoreMethod = ScoreMethod.POSITIVE;
057            
058            private SortedSet<Individual> posClassified;
059            private SortedSet<Individual> neutClassified;
060            private SortedSet<Individual> negClassified;
061            private SortedSet<Individual> posExamples;
062            private SortedSet<Individual> neutExamples;
063            private SortedSet<Individual> negExamples;
064            
065        private Set<Individual> posAsNeg;
066        private Set<Individual> negAsPos;
067        private Set<Individual> posAsNeut;
068        private Set<Individual> neutAsPos;
069        private Set<Individual> neutAsNeg;
070        private Set<Individual> negAsNeut;
071        private Set<Individual> posAsPos;
072        private Set<Individual> negAsNeg;
073        private Set<Individual> neutAsNeut;      
074        
075        private double score;
076        private double accuracy;
077        private double accuracyOnExamples;
078        private double accuracyOnPositiveExamples;
079        private double errorRate;
080        
081        private int nrOfExamples;
082        private int conceptLength;
083        
084        public ScoreThreeValued(int conceptLength,
085                    double accuracyPenalty,
086                    double errorPenalty,
087                    boolean penaliseNeutralExamples,
088                    double percentPerLengthUnit,
089                    SortedSet<Individual> posClassified,
090                    SortedSet<Individual> neutClassified,
091                    SortedSet<Individual> negClassified,
092                    SortedSet<Individual> posExamples,
093                    SortedSet<Individual> neutExamples,
094                    SortedSet<Individual> negExamples) {
095            this.conceptLength = conceptLength;
096            this.accuracyPenalty = accuracyPenalty;
097            this.errorPenalty = errorPenalty;
098            this.penaliseNeutralExamples = penaliseNeutralExamples;
099            this.percentPerLengthUnit = percentPerLengthUnit;
100            this.posClassified = posClassified;
101            this.neutClassified = neutClassified;
102            this.negClassified = negClassified;
103            this.posExamples = posExamples;
104            this.neutExamples = neutExamples;
105            this.negExamples = negExamples;
106            nrOfExamples = posExamples.size()+negExamples.size();
107            computeClassificationMatrix();
108            computeStatistics();
109        }
110        
111        private void computeClassificationMatrix() {
112            posAsNeg = Helper.intersection(posExamples,negClassified);
113            negAsPos = Helper.intersection(negExamples,posClassified);
114            posAsNeut = Helper.intersection(posExamples,neutClassified);
115            neutAsPos = Helper.intersection(neutExamples,posClassified);
116            neutAsNeg = Helper.intersection(neutExamples,negClassified);
117            negAsNeut = Helper.intersection(negExamples,neutClassified);
118            // die 3 Berechnungen sind nicht so wichtig f�r die Punktzahl, d.h. falls
119            // es Performance bringt, dann kann man sie auch ausgliedern
120            posAsPos = Helper.intersection(posExamples,posClassified);
121            negAsNeg = Helper.intersection(negExamples,negClassified);
122            neutAsNeut = Helper.intersection(neutExamples,neutClassified);          
123        }
124        
125        private void computeStatistics() {     
126            score = - posAsNeg.size()*errorPenalty
127            - negAsPos.size()*errorPenalty
128            - posAsNeut.size()*accuracyPenalty;
129            
130            if(scoreMethod==ScoreMethod.FULL)
131                    score -= negAsNeut.size()*accuracyPenalty;
132            
133            if(penaliseNeutralExamples)
134                    score -= (neutAsPos.size()*accuracyPenalty        
135                + neutAsNeg.size()*accuracyPenalty);
136            
137            // TODO: man könnte hier statt error penality auch accuracy penalty
138            // nehmen
139            double worstValue = nrOfExamples * errorPenalty;
140            // ergibt Zahl zwischen -1 und 0
141            score = score / worstValue;
142            score -= percentPerLengthUnit * conceptLength;
143            
144            // die folgenden Berechnungen k�nnten aus Performancegr�nden auch
145            // ausgegliedert werden
146            // int domainSize = abox.domain.size();
147            int numberOfExamples = posExamples.size()+negExamples.size();
148            int domainSize = numberOfExamples + neutExamples.size(); 
149            int correctlyClassified = posAsPos.size() + negAsNeg.size() + neutAsNeut.size();
150            int correctOnExamples = posAsPos.size() + negAsNeg.size();
151            int errors = posAsNeg.size() + negAsPos.size();
152            
153            // Accuracy = Quotient von richtig klassifizierten durch Anzahl Domainelemente
154            accuracy = (double) correctlyClassified/domainSize;
155            
156            // Accuracy on Examples = Quotient von richtig klassifizierten durch Anzahl pos.
157            // und neg. Beispiele
158            accuracyOnExamples = (double) correctOnExamples/numberOfExamples;
159            
160            accuracyOnPositiveExamples = (double) posAsPos.size()/posExamples.size(); 
161            
162            // Error = Quotient von komplett falsch klassifizierten durch Anzahl pos.
163            // und neg. Beispiele 
164            errorRate = (double) errors/numberOfExamples;
165        }
166    
167        @Override
168        public double getScoreValue() {
169            return score;
170        }
171        
172        @Override
173        public String toString() {
174            DecimalFormat df = new DecimalFormat("0.00");
175            String str = "";
176            str += "score method ";
177            if(scoreMethod == ScoreMethod.FULL)
178                    str += "full";
179            else
180                    str += "positive";
181            if(!penaliseNeutralExamples)
182                    str += " (neutral examples not penalized)";
183            str += "\n";
184            if(showCorrectClassifications) {
185                str += "Correctly classified:\n";
186                str += "  positive --> positive: " + posAsPos + "\n";
187                str += "  neutral --> neutral: " + neutAsNeut + "\n";
188                str += "  negative --> negative: " + negAsNeg + "\n";
189            }
190            str += "Inaccurately classified (penalty of " + df.format(accuracyPenalty) + " per instance):\n";
191            str += "  positive --> neutral: " + posAsNeut + "\n";
192            if(penaliseNeutralExamples) {
193                    str += "  neutral --> positive: " + neutAsPos + "\n";  
194                    str += "  neutral --> negative: " + neutAsNeg + "\n";
195            }
196            if(scoreMethod == ScoreMethod.FULL)
197                    str += "  negative --> neutral: " + negAsNeut + "\n"; 
198            str += "Classification errors (penalty of " + df.format(errorPenalty) + " per instance):\n";
199            str += "  positive --> negative: " + posAsNeg + "\n";
200            str += "  negative --> positive: " + negAsPos + "\n";
201            str += "Statistics:\n";
202            str += "  Score: " + df.format(score) + "\n";
203            str += "  Accuracy: " + df.format(accuracy*100) + "%\n";
204            str += "  Accuracy on examples: " + df.format(accuracyOnExamples*100) + "%\n";
205            str += "  Accuracy on positive examples: " + df.format(accuracyOnPositiveExamples*100) + "%\n";        
206            str += "  Error rate: " + df.format(errorRate*100) + "%\n";
207            return str;
208        }
209    
210            public SortedSet<Individual> getNegClassified() {
211                    return negClassified;
212            }
213    
214            public SortedSet<Individual> getPosClassified() {
215                    return posClassified;
216            }
217    
218            @Override
219            public Set<Individual> getCoveredNegatives() {
220                    return negAsPos;
221            }
222    
223            @Override
224            public Set<Individual> getCoveredPositives() {
225                    return posAsPos;
226            }
227            
228            @Override
229            public Set<Individual> getNotCoveredPositives() {
230                    return posAsNeg;
231            }
232    
233            @Override
234            public ScorePosNeg getModifiedLengthScore(int newLength) {
235                    return new ScoreThreeValued(newLength, accuracyPenalty, errorPenalty, penaliseNeutralExamples, percentPerLengthUnit, posClassified, neutClassified, negClassified, posExamples, neutExamples, negExamples);
236            }
237    
238            /* (non-Javadoc)
239             * @see org.dllearner.core.Score#getAccuracy()
240             */
241            @Override
242            public double getAccuracy() {
243                    return accuracy;
244            }
245    
246            /* (non-Javadoc)
247             * @see org.dllearner.core.Score#getNotCoveredNegatives()
248             */
249            @Override
250            public Set<Individual> getNotCoveredNegatives() {
251                    return negAsNeg;
252            }       
253        
254    }