001 /**
002 * Copyright (C) 2007-2011, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program. If not, see <http://www.gnu.org/licenses/>.
018 */
019
020 package org.dllearner.kb.sparql;
021
022 import java.io.File;
023 import java.io.FileInputStream;
024 import java.io.FileOutputStream;
025 import java.io.IOException;
026 import java.io.ObjectInputStream;
027 import java.io.ObjectOutputStream;
028 import java.io.Serializable;
029 import java.security.MessageDigest;
030 import java.security.NoSuchAlgorithmException;
031 import java.util.LinkedList;
032
033 import org.apache.log4j.Logger;
034 import org.dllearner.utilities.Files;
035 import org.dllearner.utilities.JamonMonitorLogger;
036
037 import com.jamonapi.Monitor;
038
039 /**
040 * SPARQL query cache to avoid possibly expensive multiple queries. The queries
041 * and their results are written to files. A cache has an associated cache
042 * directory where all files are written.
043 *
044 * Each SPARQL query and its result is written to one file. The name of this
045 * file is a hash of the query. The result of the query is written as JSON
046 * serialisation of the SPARQL XML result, see
047 * http://www.w3.org/TR/rdf-sparql-json-res/.
048 *
049 * Apart from the query and its result, a timestamp of the query is stored.
050 * After a configurable amount of time, query results are considered outdated.
051 * If a cached result of a SPARQL query exists, but is too old, the cache
052 * behaves as if the cached result would not exist.
053 *
054 * TODO: We are doing md5 hashing at the moment, so in rare cases different
055 * SPARQL queries can be mapped to the same file. Support for such scenarios
056 * needs to be included.
057 *
058 * @author Sebastian Hellmann
059 * @author Sebastian Knappe
060 * @author Jens Lehmann
061 */
062 public class Cache implements Serializable {
063
064 private static Logger logger = Logger.getLogger(Cache.class);
065
066 // true = H2 embedded database is used; false = stored in files
067 private boolean useDatabase = false;
068 private ExtractionDBCache h2;
069
070 private static final long serialVersionUID = 843308736471742205L;
071
072 // maps hash of a SPARQL queries to JSON representation
073 // of its results; this
074 // private HashMap<String, String> hm;
075
076 private transient String cacheDir = "";
077 private transient String fileEnding = ".cache";
078 // private long timestamp;
079
080 // specifies after how many seconds a cached result becomes invalid
081 private long freshnessSeconds = 15 * 24 * 60 * 60;
082
083 /**
084 * same ad Cache(String) default is "cache"
085 */
086 /*public Cache() {
087 this("cache");
088 } */
089
090 /**
091 * A Persistant cache is stored in the folder cachePersistant.
092 * It has longer freshness 365 days and is mainly usefull for developing
093 * @return a Cache onject
094 */
095 public static Cache getPersistentCache(){
096 Cache c = new Cache(getPersistantCacheDir());
097 c.setFreshnessInDays(365);
098 return c;
099 }
100
101 /**
102 * @return the default cache object
103 */
104 public static Cache getDefaultCache(){
105 Cache c = new Cache( getDefaultCacheDir());
106 return c;
107 }
108
109 /**
110 * the default cachedir normally is "cache".
111 * @return Default Cache Dir
112 */
113 public static String getDefaultCacheDir(){
114 return "cache";
115 }
116
117 /**
118 * a more persistant cache used for example generation."cachePersistant"
119 * @return persistant Cache Dir
120 */
121 public static String getPersistantCacheDir(){
122 return "cachePersistant";
123 }
124
125 /**
126 * Constructor for the cache itself.
127 *
128 * @param cacheDir
129 * Where the base path to the cache is .
130 */
131 public Cache(String cacheDir) {
132 this(cacheDir, false);
133 }
134
135 public Cache(String cacheDir, boolean useDatabase) {
136 this.cacheDir = cacheDir + File.separator;
137 this.useDatabase = useDatabase;
138 if (!new File(cacheDir).exists()) {
139 Files.mkdir(cacheDir);
140 logger.info("Created directory: " + cacheDir + ".");
141 }
142
143 if(this.useDatabase) {
144 h2 = new ExtractionDBCache(cacheDir);
145 }
146 }
147
148 // compute md5-hash
149 private String getHash(String string) {
150 Monitor hashTime = JamonMonitorLogger.getTimeMonitor(Cache.class, "HashTime").start();
151 // calculate md5 hash of the string (code is somewhat
152 // difficult to read, but there doesn't seem to be a
153 // single function call in Java for md5 hashing)
154 MessageDigest md5 = null;
155 try {
156 md5 = MessageDigest.getInstance("MD5");
157 } catch (NoSuchAlgorithmException e) {
158 e.printStackTrace();
159 }
160 md5.reset();
161 md5.update(string.getBytes());
162 byte[] result = md5.digest();
163
164 StringBuffer hexString = new StringBuffer();
165 for (int i = 0; i < result.length; i++) {
166 hexString.append(Integer.toHexString(0xFF & result[i]));
167 }
168 String str = hexString.toString();
169 hashTime.stop();
170 return str;
171 }
172
173 // return filename where the query result should be saved
174 private String getFilename(String sparqlQuery) {
175 return cacheDir + getHash(sparqlQuery) + fileEnding;
176 }
177
178 /**
179 * Gets a result for a query if it is in the cache.
180 *
181 * @param sparqlQuery
182 * SPARQL query to check.
183 * @return Query result as JSON or null if no result has been found or it is
184 * outdated.
185 */
186 @SuppressWarnings({"unchecked"})
187 private String getCacheEntry(String sparqlQuery) {
188
189 String filename = getFilename(sparqlQuery);
190 File file = new File(filename);
191
192 // return null (indicating no result) if file does not exist
193 if(!file.exists()) {
194 return null;
195 }
196
197
198 LinkedList<Object> entry = null;
199 try {
200 FileInputStream fos = new FileInputStream(filename);
201 ObjectInputStream o = new ObjectInputStream(fos);
202 entry = (LinkedList<Object>) o.readObject();
203 o.close();
204 } catch (IOException e) {
205 e.printStackTrace();
206 if(Files.debug){System.exit(0);}
207 } catch (ClassNotFoundException e) {
208 e.printStackTrace();
209 if(Files.debug){System.exit(0);}
210 }
211
212 // TODO: we need to check whether the query is correct
213 // (may not always be the case due to md5 hashing)
214
215 // determine whether query is outdated
216 long timestamp = (Long) entry.get(0);
217 boolean fresh = checkFreshness(timestamp);
218
219 if(!fresh) {
220 // delete file
221 file.delete();
222 // return null indicating no result
223 return null;
224 }
225
226 return (String) entry.get(2);
227 }
228
229 /**
230 * Adds an entry to the cache.
231 *
232 * @param sparqlQuery
233 * The SPARQL query.
234 * @param result
235 * Result of the SPARQL query.
236 */
237 private void addToCache(String sparqlQuery, String result) {
238 String filename = getFilename(sparqlQuery);
239 long timestamp = System.currentTimeMillis();
240
241 // create the object which will be serialised
242 LinkedList<Object> list = new LinkedList<Object>();
243 list.add(timestamp);
244 list.add(sparqlQuery);
245 list.add(result);
246
247 // create the file we want to use
248 //File file = new File(filename);
249 FileOutputStream fos = null;
250 ObjectOutputStream o = null;
251 try {
252 //file.createNewFile();
253 fos = new FileOutputStream(filename, false);
254 o = new ObjectOutputStream(fos);
255 o.writeObject(list);
256 fos.flush();
257
258 } catch (IOException e) {
259 e.printStackTrace();
260 }finally{
261 try{
262 fos.close();
263 o.close();
264 }catch (Exception e2) {
265 e2.printStackTrace();
266 }
267 }
268 }
269
270 // check whether the given timestamp is fresh
271 private boolean checkFreshness(long timestamp) {
272 return ((System.currentTimeMillis() - timestamp) <= (freshnessSeconds * 1000));
273 }
274
275 /**
276 * Takes a SPARQL query (which has not been evaluated yet) as argument and
277 * returns a JSON result set. The result set is taken from this cache if the
278 * query is stored here. Otherwise the query is send and its result added to
279 * the cache and returned. Convenience method.
280 *
281 * @param query
282 * The SPARQL query.
283 * @return Jena result set in JSON format
284 */
285 public String executeSparqlQuery(SparqlQuery query) {
286 if(useDatabase) {
287 return h2.executeSelectQuery(query.getSparqlEndpoint(), query.getSparqlQueryString());
288 }
289
290 Monitor totaltime =JamonMonitorLogger.getTimeMonitor(Cache.class, "TotalTimeExecuteSparqlQuery").start();
291 JamonMonitorLogger.increaseCount(Cache.class, "TotalQueries");
292
293 Monitor readTime = JamonMonitorLogger.getTimeMonitor(Cache.class, "ReadTime").start();
294 String result = getCacheEntry(query.getSparqlQueryString());
295 readTime.stop();
296
297 if (result != null) {
298 // query.setJson(result);
299 //
300 // query.setRunning(false);
301 // SparqlQuery.writeToSparqlLog("***********\nJSON retrieved from cache");
302 // SparqlQuery.writeToSparqlLog("wget -S -O - '\n"+query.getSparqlEndpoint().getHTTPRequest());
303 // SparqlQuery.writeToSparqlLog(query.getSparqlQueryString());
304
305 //SparqlQuery.writeToSparqlLog("JSON: "+result);
306 JamonMonitorLogger.increaseCount(Cache.class, "SuccessfulHits");
307
308 } else {
309
310 //ResultSet rs= query.send();
311 query.send();
312 String json = query.getJson();
313 if (json!=null){
314 addToCache(query.getSparqlQueryString(), json);
315 // SparqlQuery.writeToSparqlLog("result added to cache: "+json);
316 logger.debug("result added to SPARQL cache: "+json);
317 result=json;
318 //query.setJson(result);
319 } else {
320 json="";
321 result="";
322 logger.warn(Cache.class.getSimpleName()+"empty result: "+query.getSparqlQueryString());
323
324 }
325
326 //return json;
327 }
328 totaltime.stop();
329 return result;
330 }
331
332 public boolean executeSparqlAskQuery(SparqlQuery query) {
333 String str = getCacheEntry(query.getSparqlQueryString());
334 JamonMonitorLogger.increaseCount(Cache.class, "TotalQueries");
335 if(str != null) {
336 JamonMonitorLogger.increaseCount(Cache.class, "SuccessfulHits");
337 return Boolean.parseBoolean(str);
338 } else {
339 Boolean result = query.sendAsk();
340 addToCache(query.getSparqlQueryString(), result.toString());
341 return result;
342 }
343 }
344
345 /**
346 * deletes all Files in the cacheDir, does not delete the cacheDir itself,
347 * and can thus still be used without creating a new Cache Object
348 */
349 public void clearCache() {
350
351 File f = new File(cacheDir);
352 String[] files = f.list();
353 for (int i = 0; i < files.length; i++) {
354 Files.deleteFile(new File(cacheDir+"/"+files[i]));
355 }
356 }
357
358 /**
359 * Changes how long cached results will stay fresh (default 15 days).
360 * @param days number of days
361 */
362 public void setFreshnessInDays(int days){
363 freshnessSeconds = days * 24 * 60 * 60;
364 }
365
366 }