001 /**
002 * Copyright (C) 2007-2008, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program. If not, see <http://www.gnu.org/licenses/>.
018 *
019 */
020 package org.dllearner.scripts.improveWikipedia;
021
022 import java.io.File;
023 import java.util.ArrayList;
024 import java.util.List;
025
026 import org.dllearner.learningproblems.EvaluatedDescriptionPosNeg;
027 import org.dllearner.utilities.Files;
028 import org.dllearner.utilities.datastructures.SetManipulation;
029
030 /**
031 * This is a simple class, it might be worked into other classes later. filters
032 * concepts and records some results
033 *
034 * @author Sebastian Hellmann
035 *
036 */
037 public class ConceptSelector {
038
039 private static final long WASH = 1216800000000L;
040
041 // List<EvaluatedDescription> concepts;
042
043 public ConceptSelector() {
044 super();
045 // this.concepts = concepts;
046 // this.recordConceptClasses();
047
048 }
049
050 public List<EvaluatedDescriptionPosNeg> getAllConceptsWithoutOR(
051 List<EvaluatedDescriptionPosNeg> concepts) {
052 return getConceptsNotContainingString(concepts, "OR");
053 }
054
055 @SuppressWarnings("unchecked")
056 public List<EvaluatedDescriptionPosNeg> getConceptsNotContainingString(
057 List<EvaluatedDescriptionPosNeg> concepts, String filterString,
058 int limitSize) {
059 // List<EvaluatedDescription> tmp =
060 // getConceptsNotContainingString(filterString);
061 // List<EvaluatedDescription> result = new
062 // ArrayList<EvaluatedDescription>();
063 return SetManipulation.getFirst(getConceptsNotContainingString(
064 concepts, filterString), limitSize);
065 /*
066 * while ((!tmp.isEmpty()) && (result.size() <= limitSize)) {
067 * result.add(tmp.remove(0)); } return result;
068 */
069 }
070
071 public List<EvaluatedDescriptionPosNeg> getConceptsNotContainingString(
072 List<EvaluatedDescriptionPosNeg> concepts, String filterString) {
073
074 List<EvaluatedDescriptionPosNeg> result = new ArrayList<EvaluatedDescriptionPosNeg>();
075 for (EvaluatedDescriptionPosNeg description : concepts) {
076 if (!description.toString().contains(filterString)) {
077 result.add(description);
078 }
079
080 }
081 return result;
082 }
083
084 public void recordConceptClasses(List<EvaluatedDescriptionPosNeg> concepts) {
085 StringBuffer result = new StringBuffer();
086 StringBuffer result1 = new StringBuffer("\n\n ***********Entity*****\n");
087 StringBuffer result2 = new StringBuffer("\n\n ***********OR*****\n");
088 int result1count = 1;
089 int result2count = 1;
090
091 int x = 0;
092 for (EvaluatedDescriptionPosNeg description : concepts) {
093 if (x < 50) {
094 x++;
095 result.append(description + "\n");
096 }
097
098 if (!description.toString().contains("Entity")) {
099 result1.append(description + "\n");
100 result1count++;
101 }
102 if (!description.toString().contains("OR")) {
103 result2.append(description + "\n");
104 result2count++;
105 }
106 }
107 result.append("full size: " + concepts.size());
108 result.append(result1.toString() + " size: " + result1count + "\n");
109 result.append(result2.toString() + " size: " + result2count + "\n");
110
111 Files.createFile(new File("results/descriptions/concepts" + time()
112 + ".txt"), result.toString());
113 }
114
115 public static String time() {
116 return ("" + (System.currentTimeMillis() - WASH)).substring(0, 7);
117
118 }
119
120 }