001 /**
002 * Copyright (C) 2007-2011, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program. If not, see <http://www.gnu.org/licenses/>.
018 */
019
020 package org.dllearner.learningproblems;
021
022 import java.text.DecimalFormat;
023 import java.util.Set;
024 import java.util.SortedSet;
025
026 import org.dllearner.core.owl.Individual;
027 import org.dllearner.utilities.Helper;
028
029 /**
030 * Computes the score (a negative value) by comparing the classification results
031 * with ideal results.
032 *
033 * TODO: The implementation is not very efficient, because some things are
034 * only computed to be able to present the score results. This means that
035 * it would be better to compute only the necessary computations and do
036 * the other ones only when they are needed to calculate statistical values.
037 *
038 * @author Jens Lehmann
039 *
040 */
041 public class ScoreThreeValued extends ScorePosNeg {
042
043 private static final long serialVersionUID = -1780084688122949685L;
044
045 public enum ScoreMethod {POSITIVE, FULL};
046
047 // configuration options
048 private double accuracyPenalty;
049 private double errorPenalty;
050 private boolean penaliseNeutralExamples;
051 private double percentPerLengthUnit;
052
053 // potential configuration options (not implemented as such, but one
054 // could so)
055 private boolean showCorrectClassifications = false;
056 private static ScoreMethod scoreMethod = ScoreMethod.POSITIVE;
057
058 private SortedSet<Individual> posClassified;
059 private SortedSet<Individual> neutClassified;
060 private SortedSet<Individual> negClassified;
061 private SortedSet<Individual> posExamples;
062 private SortedSet<Individual> neutExamples;
063 private SortedSet<Individual> negExamples;
064
065 private Set<Individual> posAsNeg;
066 private Set<Individual> negAsPos;
067 private Set<Individual> posAsNeut;
068 private Set<Individual> neutAsPos;
069 private Set<Individual> neutAsNeg;
070 private Set<Individual> negAsNeut;
071 private Set<Individual> posAsPos;
072 private Set<Individual> negAsNeg;
073 private Set<Individual> neutAsNeut;
074
075 private double score;
076 private double accuracy;
077 private double accuracyOnExamples;
078 private double accuracyOnPositiveExamples;
079 private double errorRate;
080
081 private int nrOfExamples;
082 private int conceptLength;
083
084 public ScoreThreeValued(int conceptLength,
085 double accuracyPenalty,
086 double errorPenalty,
087 boolean penaliseNeutralExamples,
088 double percentPerLengthUnit,
089 SortedSet<Individual> posClassified,
090 SortedSet<Individual> neutClassified,
091 SortedSet<Individual> negClassified,
092 SortedSet<Individual> posExamples,
093 SortedSet<Individual> neutExamples,
094 SortedSet<Individual> negExamples) {
095 this.conceptLength = conceptLength;
096 this.accuracyPenalty = accuracyPenalty;
097 this.errorPenalty = errorPenalty;
098 this.penaliseNeutralExamples = penaliseNeutralExamples;
099 this.percentPerLengthUnit = percentPerLengthUnit;
100 this.posClassified = posClassified;
101 this.neutClassified = neutClassified;
102 this.negClassified = negClassified;
103 this.posExamples = posExamples;
104 this.neutExamples = neutExamples;
105 this.negExamples = negExamples;
106 nrOfExamples = posExamples.size()+negExamples.size();
107 computeClassificationMatrix();
108 computeStatistics();
109 }
110
111 private void computeClassificationMatrix() {
112 posAsNeg = Helper.intersection(posExamples,negClassified);
113 negAsPos = Helper.intersection(negExamples,posClassified);
114 posAsNeut = Helper.intersection(posExamples,neutClassified);
115 neutAsPos = Helper.intersection(neutExamples,posClassified);
116 neutAsNeg = Helper.intersection(neutExamples,negClassified);
117 negAsNeut = Helper.intersection(negExamples,neutClassified);
118 // die 3 Berechnungen sind nicht so wichtig f�r die Punktzahl, d.h. falls
119 // es Performance bringt, dann kann man sie auch ausgliedern
120 posAsPos = Helper.intersection(posExamples,posClassified);
121 negAsNeg = Helper.intersection(negExamples,negClassified);
122 neutAsNeut = Helper.intersection(neutExamples,neutClassified);
123 }
124
125 private void computeStatistics() {
126 score = - posAsNeg.size()*errorPenalty
127 - negAsPos.size()*errorPenalty
128 - posAsNeut.size()*accuracyPenalty;
129
130 if(scoreMethod==ScoreMethod.FULL)
131 score -= negAsNeut.size()*accuracyPenalty;
132
133 if(penaliseNeutralExamples)
134 score -= (neutAsPos.size()*accuracyPenalty
135 + neutAsNeg.size()*accuracyPenalty);
136
137 // TODO: man könnte hier statt error penality auch accuracy penalty
138 // nehmen
139 double worstValue = nrOfExamples * errorPenalty;
140 // ergibt Zahl zwischen -1 und 0
141 score = score / worstValue;
142 score -= percentPerLengthUnit * conceptLength;
143
144 // die folgenden Berechnungen k�nnten aus Performancegr�nden auch
145 // ausgegliedert werden
146 // int domainSize = abox.domain.size();
147 int numberOfExamples = posExamples.size()+negExamples.size();
148 int domainSize = numberOfExamples + neutExamples.size();
149 int correctlyClassified = posAsPos.size() + negAsNeg.size() + neutAsNeut.size();
150 int correctOnExamples = posAsPos.size() + negAsNeg.size();
151 int errors = posAsNeg.size() + negAsPos.size();
152
153 // Accuracy = Quotient von richtig klassifizierten durch Anzahl Domainelemente
154 accuracy = (double) correctlyClassified/domainSize;
155
156 // Accuracy on Examples = Quotient von richtig klassifizierten durch Anzahl pos.
157 // und neg. Beispiele
158 accuracyOnExamples = (double) correctOnExamples/numberOfExamples;
159
160 accuracyOnPositiveExamples = (double) posAsPos.size()/posExamples.size();
161
162 // Error = Quotient von komplett falsch klassifizierten durch Anzahl pos.
163 // und neg. Beispiele
164 errorRate = (double) errors/numberOfExamples;
165 }
166
167 @Override
168 public double getScoreValue() {
169 return score;
170 }
171
172 @Override
173 public String toString() {
174 DecimalFormat df = new DecimalFormat("0.00");
175 String str = "";
176 str += "score method ";
177 if(scoreMethod == ScoreMethod.FULL)
178 str += "full";
179 else
180 str += "positive";
181 if(!penaliseNeutralExamples)
182 str += " (neutral examples not penalized)";
183 str += "\n";
184 if(showCorrectClassifications) {
185 str += "Correctly classified:\n";
186 str += " positive --> positive: " + posAsPos + "\n";
187 str += " neutral --> neutral: " + neutAsNeut + "\n";
188 str += " negative --> negative: " + negAsNeg + "\n";
189 }
190 str += "Inaccurately classified (penalty of " + df.format(accuracyPenalty) + " per instance):\n";
191 str += " positive --> neutral: " + posAsNeut + "\n";
192 if(penaliseNeutralExamples) {
193 str += " neutral --> positive: " + neutAsPos + "\n";
194 str += " neutral --> negative: " + neutAsNeg + "\n";
195 }
196 if(scoreMethod == ScoreMethod.FULL)
197 str += " negative --> neutral: " + negAsNeut + "\n";
198 str += "Classification errors (penalty of " + df.format(errorPenalty) + " per instance):\n";
199 str += " positive --> negative: " + posAsNeg + "\n";
200 str += " negative --> positive: " + negAsPos + "\n";
201 str += "Statistics:\n";
202 str += " Score: " + df.format(score) + "\n";
203 str += " Accuracy: " + df.format(accuracy*100) + "%\n";
204 str += " Accuracy on examples: " + df.format(accuracyOnExamples*100) + "%\n";
205 str += " Accuracy on positive examples: " + df.format(accuracyOnPositiveExamples*100) + "%\n";
206 str += " Error rate: " + df.format(errorRate*100) + "%\n";
207 return str;
208 }
209
210 public SortedSet<Individual> getNegClassified() {
211 return negClassified;
212 }
213
214 public SortedSet<Individual> getPosClassified() {
215 return posClassified;
216 }
217
218 @Override
219 public Set<Individual> getCoveredNegatives() {
220 return negAsPos;
221 }
222
223 @Override
224 public Set<Individual> getCoveredPositives() {
225 return posAsPos;
226 }
227
228 @Override
229 public Set<Individual> getNotCoveredPositives() {
230 return posAsNeg;
231 }
232
233 @Override
234 public ScorePosNeg getModifiedLengthScore(int newLength) {
235 return new ScoreThreeValued(newLength, accuracyPenalty, errorPenalty, penaliseNeutralExamples, percentPerLengthUnit, posClassified, neutClassified, negClassified, posExamples, neutExamples, negExamples);
236 }
237
238 /* (non-Javadoc)
239 * @see org.dllearner.core.Score#getAccuracy()
240 */
241 @Override
242 public double getAccuracy() {
243 return accuracy;
244 }
245
246 /* (non-Javadoc)
247 * @see org.dllearner.core.Score#getNotCoveredNegatives()
248 */
249 @Override
250 public Set<Individual> getNotCoveredNegatives() {
251 return negAsNeg;
252 }
253
254 }