001    /**
002     * Copyright (C) 2007-2011, Jens Lehmann
003     *
004     * This file is part of DL-Learner.
005     *
006     * DL-Learner is free software; you can redistribute it and/or modify
007     * it under the terms of the GNU General Public License as published by
008     * the Free Software Foundation; either version 3 of the License, or
009     * (at your option) any later version.
010     *
011     * DL-Learner is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014     * GNU General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018     */
019    
020    package org.dllearner.kb.sparql;
021    
022    import java.io.File;
023    import java.io.FileInputStream;
024    import java.io.FileOutputStream;
025    import java.io.IOException;
026    import java.io.ObjectInputStream;
027    import java.io.ObjectOutputStream;
028    import java.io.Serializable;
029    import java.security.MessageDigest;
030    import java.security.NoSuchAlgorithmException;
031    import java.util.LinkedList;
032    
033    import org.apache.log4j.Logger;
034    import org.dllearner.utilities.Files;
035    import org.dllearner.utilities.JamonMonitorLogger;
036    
037    import com.jamonapi.Monitor;
038    
039    /**
040     * SPARQL query cache to avoid possibly expensive multiple queries. The queries
041     * and their results are written to files. A cache has an associated cache
042     * directory where all files are written.
043     * 
044     * Each SPARQL query and its result is written to one file. The name of this
045     * file is a hash of the query. The result of the query is written as JSON
046     * serialisation of the SPARQL XML result, see
047     * http://www.w3.org/TR/rdf-sparql-json-res/.
048     * 
049     * Apart from the query and its result, a timestamp of the query is stored.
050     * After a configurable amount of time, query results are considered outdated.
051     * If a cached result of a SPARQL query exists, but is too old, the cache
052     * behaves as if the cached result would not exist.
053     * 
054     * TODO: We are doing md5 hashing at the moment, so in rare cases different
055     * SPARQL queries can be mapped to the same file. Support for such scenarios
056     * needs to be included.
057     * 
058     * @author Sebastian Hellmann
059     * @author Sebastian Knappe
060     * @author Jens Lehmann
061     */
062    public class Cache implements Serializable {
063    
064            private static Logger logger = Logger.getLogger(Cache.class);
065            
066            // true = H2 embedded database is used; false = stored in files
067            private boolean useDatabase = false;
068            private ExtractionDBCache h2;
069    
070            private static final long serialVersionUID = 843308736471742205L;
071    
072            // maps hash of a SPARQL queries to JSON representation
073            // of its results; this
074            // private HashMap<String, String> hm;
075    
076            private transient String cacheDir = "";
077            private transient String fileEnding = ".cache";
078            // private long timestamp;
079    
080            // specifies after how many seconds a cached result becomes invalid
081            private long freshnessSeconds = 15 * 24 * 60 * 60;
082    
083            /**
084             *  same ad Cache(String) default is "cache"
085             */
086            /*public Cache() {
087                    this("cache");
088            } */
089            
090            /**
091             * A Persistant cache is stored in the folder cachePersistant.
092             * It has longer freshness 365 days and is mainly usefull for developing
093             * @return a Cache onject
094             */
095            public static Cache getPersistentCache(){
096                    Cache c = new Cache(getPersistantCacheDir()); 
097                    c.setFreshnessInDays(365);
098                    return c;
099            }
100            
101            /**
102             * @return the default cache object
103             */
104            public static Cache getDefaultCache(){
105                    Cache c = new Cache( getDefaultCacheDir()); 
106                    return c;
107            }
108            
109            /**
110             * the default cachedir normally is "cache".
111             * @return Default Cache Dir
112             */
113            public static String getDefaultCacheDir(){
114                    return "cache";
115            }
116            
117            /**
118             * a more persistant cache used for example generation."cachePersistant"
119             * @return persistant Cache Dir
120             */
121            public static String getPersistantCacheDir(){
122                    return "cachePersistant";
123            }
124            
125            /**
126             * Constructor for the cache itself.
127             * 
128             * @param cacheDir
129             *            Where the base path to the cache is .
130             */
131            public Cache(String cacheDir) {
132                    this(cacheDir, false);
133            }
134    
135            public Cache(String cacheDir, boolean useDatabase) {
136                    this.cacheDir = cacheDir + File.separator;
137                    this.useDatabase = useDatabase;
138                    if (!new File(cacheDir).exists()) {
139                            Files.mkdir(cacheDir);
140                            logger.info("Created directory: " + cacheDir + ".");
141                    }
142                    
143                    if(this.useDatabase) {
144                            h2 = new ExtractionDBCache(cacheDir);
145                    }               
146            }
147            
148            // compute md5-hash
149            private String getHash(String string) {
150                    Monitor hashTime = JamonMonitorLogger.getTimeMonitor(Cache.class, "HashTime").start();
151                    // calculate md5 hash of the string (code is somewhat
152                    // difficult to read, but there doesn't seem to be a
153                    // single function call in Java for md5 hashing)
154                    MessageDigest md5 = null;
155                    try {
156                            md5 = MessageDigest.getInstance("MD5");
157                    } catch (NoSuchAlgorithmException e) {
158                            e.printStackTrace();
159                    }
160                    md5.reset();
161                    md5.update(string.getBytes());
162                    byte[] result = md5.digest();
163    
164                    StringBuffer hexString = new StringBuffer();
165                    for (int i = 0; i < result.length; i++) {
166                            hexString.append(Integer.toHexString(0xFF & result[i]));
167                    }
168                    String str = hexString.toString();
169                    hashTime.stop();
170                    return str;
171            }
172    
173            // return filename where the query result should be saved
174            private String getFilename(String sparqlQuery) {
175                    return cacheDir + getHash(sparqlQuery) + fileEnding;
176            }
177    
178            /**
179             * Gets a result for a query if it is in the cache.
180             * 
181             * @param sparqlQuery
182             *            SPARQL query to check.
183             * @return Query result as JSON or null if no result has been found or it is
184             *         outdated.
185             */
186            @SuppressWarnings({"unchecked"})
187            private String getCacheEntry(String sparqlQuery) {
188                    
189                    String filename = getFilename(sparqlQuery);
190                    File file = new File(filename);
191                    
192                    // return null (indicating no result) if file does not exist
193                    if(!file.exists()) {
194                            return null;
195                    }
196                            
197                    
198                    LinkedList<Object> entry = null;
199                    try {
200                            FileInputStream fos = new FileInputStream(filename);
201                            ObjectInputStream o = new ObjectInputStream(fos);
202                            entry = (LinkedList<Object>) o.readObject();
203                            o.close();
204                    } catch (IOException e) {
205                            e.printStackTrace();
206                            if(Files.debug){System.exit(0);}
207                    } catch (ClassNotFoundException e) {
208                            e.printStackTrace();
209                            if(Files.debug){System.exit(0);}
210                    }
211                    
212                    // TODO: we need to check whether the query is correct
213                    // (may not always be the case due to md5 hashing)
214                    
215                    // determine whether query is outdated
216                    long timestamp = (Long) entry.get(0);
217                    boolean fresh = checkFreshness(timestamp);
218                    
219                    if(!fresh) {
220                            // delete file
221                            file.delete();
222                            // return null indicating no result
223                            return null;
224                    }
225                    
226                    return (String) entry.get(2);
227            }
228    
229            /**
230             * Adds an entry to the cache.
231             * 
232             * @param sparqlQuery
233             *            The SPARQL query.
234             * @param result
235             *            Result of the SPARQL query.
236             */
237            private void addToCache(String sparqlQuery, String result) {
238                    String filename = getFilename(sparqlQuery);
239                    long timestamp = System.currentTimeMillis();
240    
241                    // create the object which will be serialised
242                    LinkedList<Object> list = new LinkedList<Object>();
243                    list.add(timestamp);
244                    list.add(sparqlQuery);
245                    list.add(result);
246    
247                    // create the file we want to use
248                    //File file = new File(filename);
249                    FileOutputStream fos = null;
250                    ObjectOutputStream o = null;
251                    try {
252                            //file.createNewFile();
253                            fos = new FileOutputStream(filename, false);
254                            o = new ObjectOutputStream(fos);
255                            o.writeObject(list);
256                            fos.flush();
257                            
258                    } catch (IOException e) {
259                            e.printStackTrace();
260                    }finally{
261                            try{
262                                    fos.close();
263                                    o.close();
264                            }catch (Exception e2) {
265                                     e2.printStackTrace();
266                            }
267                    }
268            }
269    
270            // check whether the given timestamp is fresh
271            private boolean checkFreshness(long timestamp) {
272                    return ((System.currentTimeMillis() - timestamp) <= (freshnessSeconds * 1000));
273            }
274    
275            /**
276             * Takes a SPARQL query (which has not been evaluated yet) as argument and
277             * returns a JSON result set. The result set is taken from this cache if the
278             * query is stored here. Otherwise the query is send and its result added to
279             * the cache and returned. Convenience method.
280             * 
281             * @param query
282             *            The SPARQL query.
283             * @return Jena result set in JSON format
284             */
285            public String executeSparqlQuery(SparqlQuery query) {
286                    if(useDatabase) {
287                            return h2.executeSelectQuery(query.getSparqlEndpoint(), query.getSparqlQueryString());
288                    }
289                    
290                    Monitor totaltime =JamonMonitorLogger.getTimeMonitor(Cache.class, "TotalTimeExecuteSparqlQuery").start();
291                    JamonMonitorLogger.increaseCount(Cache.class, "TotalQueries");
292            
293                    Monitor readTime = JamonMonitorLogger.getTimeMonitor(Cache.class, "ReadTime").start();
294                    String result = getCacheEntry(query.getSparqlQueryString());
295                    readTime.stop();
296                    
297                    if (result != null) {
298    //                      query.setJson(result);
299    //                      
300    //                  query.setRunning(false);
301    //                      SparqlQuery.writeToSparqlLog("***********\nJSON retrieved from cache");
302    //                      SparqlQuery.writeToSparqlLog("wget -S -O - '\n"+query.getSparqlEndpoint().getHTTPRequest());
303    //                      SparqlQuery.writeToSparqlLog(query.getSparqlQueryString());
304                            
305                            //SparqlQuery.writeToSparqlLog("JSON: "+result);
306                            JamonMonitorLogger.increaseCount(Cache.class, "SuccessfulHits");
307                            
308                    } else {
309                            
310                            //ResultSet rs= query.send();
311                            query.send();
312                            String json = query.getJson();
313                            if (json!=null){
314                                    addToCache(query.getSparqlQueryString(), json);
315    //                              SparqlQuery.writeToSparqlLog("result added to cache: "+json);
316                                    logger.debug("result added to SPARQL cache: "+json);
317                                    result=json;
318                                    //query.setJson(result);
319                            } else {
320                                    json="";
321                                    result="";
322                                    logger.warn(Cache.class.getSimpleName()+"empty result: "+query.getSparqlQueryString());
323                                    
324                            }
325                            
326                            //return json;
327                    }
328                    totaltime.stop();
329                    return result;
330            }
331            
332            public boolean executeSparqlAskQuery(SparqlQuery query) {
333                    String str = getCacheEntry(query.getSparqlQueryString());
334                    JamonMonitorLogger.increaseCount(Cache.class, "TotalQueries");
335                    if(str != null) {
336                            JamonMonitorLogger.increaseCount(Cache.class, "SuccessfulHits");
337                            return Boolean.parseBoolean(str);
338                    } else {
339                            Boolean result = query.sendAsk();
340                            addToCache(query.getSparqlQueryString(), result.toString());
341                            return result;
342                    }
343            }
344            
345            /**
346             * deletes all Files in the cacheDir, does not delete the cacheDir itself, 
347             * and can thus still be used without creating a new Cache Object
348             */
349            public void clearCache() {
350                    
351                            File f = new File(cacheDir);
352                        String[] files = f.list();
353                        for (int i = 0; i < files.length; i++) {
354                            Files.deleteFile(new File(cacheDir+"/"+files[i]));
355                        }     
356            }
357            
358            /**
359             * Changes how long cached results will stay fresh (default 15 days).
360             * @param days number of days
361             */
362            public void setFreshnessInDays(int days){
363                    freshnessSeconds = days * 24 * 60 * 60;
364            }
365    
366    }