001 /**
002 * Copyright (C) 2007-2011, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program. If not, see <http://www.gnu.org/licenses/>.
018 */
019
020 package org.dllearner.learningproblems;
021
022 import java.util.Collection;
023 import java.util.Collections;
024 import java.util.LinkedList;
025 import java.util.List;
026 import java.util.Random;
027 import java.util.Set;
028 import java.util.SortedSet;
029 import java.util.TreeSet;
030
031 import org.dllearner.core.AbstractLearningProblem;
032 import org.dllearner.core.AbstractReasonerComponent;
033 import org.dllearner.core.configurators.PosOnlyLPConfigurator;
034 import org.dllearner.core.options.CommonConfigMappings;
035 import org.dllearner.core.options.ConfigEntry;
036 import org.dllearner.core.options.ConfigOption;
037 import org.dllearner.core.options.InvalidConfigOptionValueException;
038 import org.dllearner.core.options.StringSetConfigOption;
039 import org.dllearner.core.owl.Description;
040 import org.dllearner.core.owl.Individual;
041
042 /**
043 * A learning problem, where we learn from positive examples only.
044 *
045 * @author Jens Lehmann
046 *
047 */
048 public class PosOnlyLP extends AbstractLearningProblem {
049
050 protected SortedSet<Individual> positiveExamples;
051 private List<Individual> positiveExamplesShuffled;
052 // protected SortedSet<Individual> pseudoNegatives;
053 private List<Individual> individuals;
054
055 // approximation of accuracy +- 0.03 %
056 private static final double approx = 0.03;
057
058 // private PosNegLPStandard definitionLP;
059 private PosOnlyLPConfigurator configurator;
060
061 @Override
062 public PosOnlyLPConfigurator getConfigurator(){
063 return configurator;
064 }
065
066 public PosOnlyLP() {
067 super(null);
068 configurator = new PosOnlyLPConfigurator(this);
069 }
070
071 public PosOnlyLP(AbstractReasonerComponent reasoningService) {
072 super(reasoningService);
073 configurator = new PosOnlyLPConfigurator(this);
074 }
075
076 /*
077 * (non-Javadoc)
078 *
079 * @see org.dllearner.core.Component#applyConfigEntry(org.dllearner.core.ConfigEntry)
080 */
081 @Override
082 @SuppressWarnings( { "unchecked" })
083 public <T> void applyConfigEntry(ConfigEntry<T> entry) throws InvalidConfigOptionValueException {
084 String name = entry.getOptionName();
085 if (name.equals("positiveExamples"))
086 positiveExamples = CommonConfigMappings
087 .getIndividualSet((Set<String>) entry.getValue());
088 }
089
090 public static Collection<ConfigOption<?>> createConfigOptions() {
091 Collection<ConfigOption<?>> options = new LinkedList<ConfigOption<?>>();
092 options.add(new StringSetConfigOption("positiveExamples",
093 "positive examples", null, true, false));
094 return options;
095 }
096
097 public static String getName() {
098 return "pos only learning problem";
099 }
100
101 @Override
102 public void init() {
103
104 // old init code, where pos only was realised through pseudo negatives
105
106 // by default we test all other instances of the knowledge base
107 // pseudoNegatives = Helper.difference(reasoner.getIndividuals(), positiveExamples);
108
109 // create an instance of a standard definition learning problem
110 // instanciated with pseudo-negatives
111 // definitionLP = ComponentFactory.getPosNegLPStandard(
112 // reasoner,
113 // SetManipulation.indToString(positiveExamples),
114 // SetManipulation.indToString(pseudoNegatives));
115 //definitionLP = new PosNegDefinitionLP(reasoningService, positiveExamples, pseudoNegatives);
116 // TODO: we must make sure that the problem also gets the same
117 // reasoning options (i.e. options are the same up to reversed example sets)
118 // definitionLP.init();
119
120 individuals = new LinkedList<Individual>(reasoner.getIndividuals());
121 positiveExamplesShuffled = new LinkedList<Individual>(positiveExamples);
122 Random rand = new Random(1);
123 Collections.shuffle(individuals, rand);
124 Collections.shuffle(positiveExamplesShuffled, rand);
125 }
126
127 public SortedSet<Individual> getPositiveExamples() {
128 return positiveExamples;
129 }
130
131 /**
132 * @return the pseudoNegatives
133 */
134 // public SortedSet<Individual> getPseudoNegatives() {
135 // return pseudoNegatives;
136 // }
137
138
139 // public int coveredPseudoNegativeExamplesOrTooWeak(Description concept) {
140 // return definitionLP.coveredNegativeExamplesOrTooWeak(concept);
141 // }
142
143 /* (non-Javadoc)
144 * @see org.dllearner.core.LearningProblem#computeScore(org.dllearner.core.owl.Description)
145 */
146 @Override
147 public ScorePosOnly computeScore(Description description) {
148 Set<Individual> retrieval = reasoner.getIndividuals(description);
149
150 Set<Individual> instancesCovered = new TreeSet<Individual>();
151 Set<Individual> instancesNotCovered = new TreeSet<Individual>();
152 for(Individual ind : positiveExamples) {
153 if(retrieval.contains(ind)) {
154 instancesCovered.add(ind);
155 } else {
156 instancesNotCovered.add(ind);
157 }
158 }
159
160 double coverage = instancesCovered.size()/(double)positiveExamples.size();
161 double protusion = retrieval.size() == 0 ? 0 : instancesCovered.size()/(double)retrieval.size();
162
163 // pass only additional instances to score object
164 retrieval.removeAll(instancesCovered);
165 return new ScorePosOnly(instancesCovered, instancesNotCovered, coverage, retrieval, protusion, getAccuracy(coverage, protusion));
166 }
167
168 /* (non-Javadoc)
169 * @see org.dllearner.core.LearningProblem#evaluate(org.dllearner.core.owl.Description)
170 */
171 @Override
172 public EvaluatedDescriptionPosOnly evaluate(Description description) {
173 ScorePosOnly score = computeScore(description);
174 return new EvaluatedDescriptionPosOnly(description, score);
175 }
176
177 /* (non-Javadoc)
178 * @see org.dllearner.core.LearningProblem#getAccuracy(org.dllearner.core.owl.Description)
179 */
180 @Override
181 public double getAccuracy(Description description) {
182 Set<Individual> retrieval = reasoner.getIndividuals(description);
183
184 int instancesCovered = 0;
185 for(Individual ind : positiveExamples) {
186 if(retrieval.contains(ind)) {
187 instancesCovered++;
188 }
189 }
190
191 double coverage = instancesCovered/(double)positiveExamples.size();
192 double protusion = retrieval.size() == 0 ? 0 : instancesCovered/(double)retrieval.size();
193
194 return getAccuracy(coverage, protusion);
195 }
196
197 /* (non-Javadoc)
198 * @see org.dllearner.core.LearningProblem#getAccuracyOrTooWeak(org.dllearner.core.owl.Description, double)
199 */
200 @Override
201 public double getAccuracyOrTooWeak(Description description, double noise) {
202
203 // instead of using the standard operation, we use optimisation
204 // and approximation here
205
206 // we abort when there are too many uncovered positives
207 int maxNotCovered = (int) Math.ceil(noise*positiveExamples.size());
208 int instancesCovered = 0;
209 int instancesNotCovered = 0;
210 int total = 0;
211 boolean estimatedA = false;
212
213 double lowerBorderA = 0;
214 int lowerEstimateA = 0;
215 double upperBorderA = 1;
216 int upperEstimateA = positiveExamples.size();
217
218 for(Individual ind : positiveExamplesShuffled) {
219 if(reasoner.hasType(description, ind)) {
220 instancesCovered++;
221 } else {
222 instancesNotCovered ++;
223 if(instancesNotCovered > maxNotCovered) {
224 return -1;
225 }
226 }
227
228 // approximation step (starting after 10 tests)
229 total = instancesCovered + instancesNotCovered;
230 if(total > 10) {
231 // compute confidence interval
232 double p1 = p1(instancesCovered, total);
233 double p2 = p3(p1, total);
234 lowerBorderA = Math.max(0, p1 - p2);
235 upperBorderA = Math.min(1, p1 + p2);
236 double size = upperBorderA - lowerBorderA;
237 // if the interval has a size smaller than 10%, we can be confident
238 if(size < 2 * approx) {
239 // we have to distinguish the cases that the accuracy limit is
240 // below, within, or above the limit and that the mean is below
241 // or above the limit
242 double mean = instancesCovered/(double)total;
243
244 // if the mean is greater than the required minimum, we can accept;
245 // we also accept if the interval is small and close to the minimum
246 // (worst case is to accept a few inaccurate descriptions)
247 if(mean > noise || (upperBorderA > mean && size < 0.03)) {
248 instancesCovered = (int) (instancesCovered/(double)total * positiveExamples.size());
249 upperEstimateA = (int) (upperBorderA * positiveExamples.size());
250 lowerEstimateA = (int) (lowerBorderA * positiveExamples.size());
251 estimatedA = true;
252 break;
253 }
254
255 // reject only if the upper border is far away (we are very
256 // certain not to lose a potential solution)
257 if(upperBorderA + 0.1 < noise) {
258 return -1;
259 }
260 }
261 }
262 }
263
264 double coverage = instancesCovered/(double)positiveExamples.size();
265
266 int testsPerformed = 0;
267 int instancesDescription = 0;
268
269 for(Individual ind : individuals) {
270
271 if(reasoner.hasType(description, ind)) {
272 instancesDescription++;
273 }
274
275 testsPerformed++;
276
277 if(testsPerformed > 10) {
278
279 // compute confidence interval
280 double p1 = p1(instancesDescription, testsPerformed);
281 double p2 = p3(p1, testsPerformed);
282 double lowerBorder = Math.max(0, p1 - p2);
283 double upperBorder = Math.min(1, p1 + p2);
284 int lowerEstimate = (int) (lowerBorder * individuals.size());
285 int upperEstimate = (int) (upperBorder * individuals.size());
286
287 double size;
288 if(estimatedA) {
289 // size = 1/(coverageFactor+1) * (coverageFactor * (upperBorderA-lowerBorderA) + Math.sqrt(upperEstimateA/(upperEstimateA+lowerEstimate)) + Math.sqrt(lowerEstimateA/(lowerEstimateA+upperEstimate)));
290 size = getAccuracy(upperBorderA, upperEstimateA/(double)(upperEstimateA+lowerEstimate)) - getAccuracy(lowerBorderA, lowerEstimateA/(double)(lowerEstimateA+upperEstimate));
291 } else {
292 // size = 1/(coverageFactor+1) * (coverageFactor * coverage + Math.sqrt(instancesCovered/(instancesCovered+lowerEstimate)) + Math.sqrt(instancesCovered/(instancesCovered+upperEstimate)));
293 size = getAccuracy(coverage, instancesCovered/(double)(instancesCovered+lowerEstimate)) - getAccuracy(coverage, instancesCovered/(double)(instancesCovered+upperEstimate));
294 }
295
296 if(size < 0.1) {
297 // System.out.println(instancesDescription + " of " + testsPerformed);
298 // System.out.println("interval from " + lowerEstimate + " to " + upperEstimate);
299 // System.out.println("size: " + size);
300
301 // estimatedB = true;
302 // calculate total number of instances
303 instancesDescription = (int) (instancesDescription/(double)testsPerformed * individuals.size());
304 break;
305 }
306 }
307 }
308
309 // since we measured/estimated accuracy only on instances outside A (superClassInstances
310 // does not include instances of A), we need to add it in the denominator
311 double protusion = instancesCovered/(double)(instancesDescription+instancesCovered);
312 if(instancesCovered + instancesDescription == 0) {
313 protusion = 0;
314 }
315
316 return getAccuracy(coverage, protusion);
317 }
318
319 // see paper: expression used in confidence interval estimation
320 private static double p3(double p1, int total) {
321 return 1.96 * Math.sqrt(p1*(1-p1)/(total+4));
322 }
323
324 // see paper: p'
325 private static double p1(int success, int total) {
326 return (success+2)/(double)(total+4);
327 }
328
329 private double getAccuracy(double coverage, double protusion) {
330 return 0.5 * (coverage + Math.sqrt(protusion));
331 }
332 }