001 /**
002 * Copyright (C) 2007-2008, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program. If not, see <http://www.gnu.org/licenses/>.
018 *
019 */
020 package org.dllearner.scripts;
021
022 import java.io.BufferedReader;
023 import java.io.File;
024 import java.io.FileNotFoundException;
025 import java.io.FileReader;
026 import java.io.IOException;
027 import java.sql.Connection;
028 import java.sql.DriverManager;
029 import java.sql.ResultSet;
030 import java.sql.SQLException;
031 import java.sql.Statement;
032 import java.util.prefs.BackingStoreException;
033 import java.util.prefs.Preferences;
034
035 import org.ini4j.IniFile;
036
037 /**
038 * Fills that database needed for running DBpedia Navigator.
039 * First move the mentioned DBpedia files to the specified
040 * directory, then execute this script. Database settings are
041 * taken from the settings.ini file of DBpedia Navigator.
042 *
043 * @author Sebastian Knappe
044 * @author Jens Lehmann
045 *
046 */
047 @SuppressWarnings("unused")
048 public class CalculatePageRank {
049
050 private static String datasetDir;
051 private static String dbServer;
052 private static String dbName;
053 private static String dbUser;
054 private static String dbPass;
055
056 private String wikilinks;
057 private String labels;
058 private String categories;
059 private String categoriesNewOntology;
060 private String categoriesNewOntology2;
061
062 private static Connection con;
063
064 public CalculatePageRank() throws BackingStoreException
065 {
066 // reading values from ini file
067 String iniFile = "../src/dbpedia-navigator/settings.ini";
068 Preferences prefs = new IniFile(new File(iniFile));
069 dbServer = prefs.node("database").get("server", null);
070 dbName = prefs.node("database").get("name", null);
071 dbUser = prefs.node("database").get("user", null);
072 dbPass = prefs.node("database").get("pass", null);
073 datasetDir = prefs.node("database").get("datasetDir", null);
074
075 wikilinks = datasetDir + "pagelinks_en.nt";
076 labels = datasetDir + "articles_label_en.nt";
077 categories = datasetDir + "yago_en.nt";
078 categoriesNewOntology = datasetDir + "dbpedia-ontology-schema.nt";
079 categoriesNewOntology2 = datasetDir + "dbpedia-ontology-types.nt";
080 }
081
082 private void calculateLinks()
083 {
084 try{
085 Statement stmt;
086 ResultSet rs;
087 int number;
088
089 stmt = con.createStatement();
090 BufferedReader in = new BufferedReader(new FileReader(wikilinks));
091
092 String line;
093 String[] split;
094 String name;
095 int i=0;
096 while ((line=in.readLine())!=null)
097 {
098 split=line.split(" ");
099 name=split[2].substring(1, split[2].length()-1);
100 rs=stmt.executeQuery("SELECT number FROM rank WHERE name='"+name+"'");
101 if (rs.next()){
102 number=rs.getInt(1);
103 number++;
104 stmt.executeUpdate("UPDATE rank SET number="+number+" WHERE name='"+name+"'");
105 }
106 else{
107 try{
108 stmt.executeUpdate("INSERT INTO rank (name,number) VALUES ('"+name+"',1)");
109 }catch(Exception e)
110 {}
111 }
112 if (i%100000==0) System.out.println(i);
113 i++;
114 }
115
116 in.close();
117 } catch (FileNotFoundException e)
118 {
119 System.out.println("File not found");
120 } catch (IOException e)
121 {
122 System.out.println("IOException");
123 } catch (Exception e)
124 {
125 e.printStackTrace();
126 }
127 }
128
129 private void addLabels()
130 {
131 try{
132 Statement stmt;
133 ResultSet rs;
134
135 stmt = con.createStatement();
136 BufferedReader in = new BufferedReader(new FileReader(labels));
137
138 String line;
139 String[] split;
140 String name;
141 String label;
142 int i=0;
143 while ((line=in.readLine())!=null)
144 {
145 split=line.split(">");
146 name=split[0].substring(1);
147 label=split[2].substring(split[2].indexOf("\"")+1, split[2].lastIndexOf("\""));
148 rs=stmt.executeQuery("SELECT number FROM rank WHERE name='"+name+"'");
149 if (rs.next()){
150 stmt.executeUpdate("UPDATE rank SET label=\""+label+"\" WHERE name='"+name+"'");
151 }
152 else{
153 try{
154 stmt.executeUpdate("INSERT INTO rank (name,label) VALUES ('"+name+"',\""+label+"\")");
155 }catch(Exception e)
156 {}
157 }
158 if (i%100000==0) System.out.println(i);
159 i++;
160 }
161
162 in.close();
163 } catch (FileNotFoundException e)
164 {
165 System.out.println("File not found");
166 } catch (IOException e)
167 {
168 System.out.println("IOException");
169 } catch (Exception e)
170 {
171 e.printStackTrace();
172 }
173 }
174
175 private void calculateCategories()
176 {
177 try{
178 Statement stmt;
179
180 stmt = con.createStatement();
181
182 BufferedReader in = new BufferedReader(new FileReader(categories));
183
184 String line;
185 String[] split;
186 String name;
187 String label;
188 String pred;
189 int i=0;
190 while ((line=in.readLine())!=null)
191 {
192 split=line.split(">");
193 name=split[0].substring(1);
194 pred=split[1].substring(2);
195 if (pred.equals("http://www.w3.org/2000/01/rdf-schema#label"))
196 label=split[2].substring(split[2].indexOf("\"")+1, split[2].lastIndexOf("\""));
197 else
198 label=split[2].substring(2);
199 if (pred.equals("http://www.w3.org/2000/01/rdf-schema#label")){
200 try{
201 stmt.executeUpdate("INSERT INTO categories (category,label) VALUES (\""+name+"\",\""+label+"\")");
202 }catch(Exception e)
203 {}
204 }
205 else{
206 if (name.startsWith("http://dbpedia.org/resource")){
207 try{
208 stmt.executeUpdate("INSERT INTO articlecategories (name,category) VALUES ('"+name+"','"+label+"')");
209 }catch(Exception e)
210 {}
211 }else{
212 try{
213 stmt.executeUpdate("INSERT INTO classhierarchy (father,child) VALUES ('"+label+"','"+name+"')");
214 }catch(Exception e)
215 {}
216 }
217 }
218 if (i%100000==0) System.out.println(i);
219 i++;
220 }
221
222 in.close();
223 } catch (FileNotFoundException e)
224 {
225 System.out.println("File not found");
226 } catch (IOException e)
227 {
228 System.out.println("IOException");
229 } catch (Exception e)
230 {
231 e.printStackTrace();
232 }
233 }
234
235 private void calculateCategoriesNewOntology()
236 {
237 try{
238 Statement stmt;
239
240 stmt = con.createStatement();
241
242 BufferedReader in = new BufferedReader(new FileReader(categoriesNewOntology));
243
244 String line;
245 String[] split;
246 String name;
247 String label;
248 String pred;
249 int i=0;
250 boolean isClassLabel;
251 String className;
252 while ((line=in.readLine())!=null)
253 {
254 split=line.split(">");
255 if (split.length<3) continue;
256 name=split[0].substring(1);
257 pred=split[1].substring(2);
258 if (pred.equals("http://www.w3.org/2000/01/rdf-schema#label")){
259 label=split[2].substring(split[2].indexOf("\"")+1, split[2].lastIndexOf("\""));
260 if (name.length()>name.lastIndexOf("/")+1) className=name.substring(name.lastIndexOf("/")+1,name.lastIndexOf("/")+2);
261 else className="t";
262 if (className.toLowerCase().equals(className))
263 isClassLabel=false;
264 else
265 isClassLabel=true;
266 }
267 else{
268 label=split[2].substring(2);
269 isClassLabel=false;
270 }
271 if (pred.equals("http://www.w3.org/2000/01/rdf-schema#label")&&isClassLabel){
272 try{
273 stmt.executeUpdate("INSERT INTO categories (category,label) VALUES (\""+name+"\",\""+label+"\")");
274 }catch(Exception e)
275 {}
276 }
277 else{
278 if (pred.equals("http://www.w3.org/2000/01/rdf-schema#subClassOf")){
279 try{
280 stmt.executeUpdate("INSERT INTO classhierarchy (father,child) VALUES ('"+label+"','"+name+"')");
281 }catch(Exception e)
282 {}
283 }
284 }
285 if (i%100000==0) System.out.println(i);
286 i++;
287 }
288
289 in.close();
290
291 /*in = new BufferedReader(new FileReader(categoriesNewOntology2));
292
293 i=0;
294 while ((line=in.readLine())!=null)
295 {
296 split=line.split(">");
297 name=split[0].substring(1);
298 label=split[2].substring(2);
299 try{
300 stmt.executeUpdate("INSERT INTO articlecategories (name,category) VALUES ('"+name+"','"+label+"')");
301 }catch(Exception e)
302 {}
303 if (i%100000==0) System.out.println(i);
304 i++;
305 }
306
307 in.close();*/
308 } catch (FileNotFoundException e)
309 {
310 System.out.println("File not found");
311 } catch (IOException e)
312 {
313 System.out.println("IOException");
314 } catch (Exception e)
315 {
316 e.printStackTrace();
317 }
318 }
319
320 private void copyNumbers()
321 {
322 try{
323 Statement stmt;
324
325 stmt = con.createStatement();
326
327 stmt.executeUpdate("UPDATE articlecategories SET number=(SELECT number FROM rank WHERE articlecategories.name=rank.name)");
328
329 } catch (Exception e)
330 {
331 e.printStackTrace();
332 }
333 }
334
335 public static void main(String[] args) throws ClassNotFoundException,SQLException,BackingStoreException{
336 CalculatePageRank cal=new CalculatePageRank();
337 Class.forName("com.mysql.jdbc.Driver");
338 String url =
339 "jdbc:mysql://"+dbServer+":3306/"+dbName;
340
341 con = DriverManager.getConnection(
342 url, dbUser, dbPass);
343 cal.calculateLinks();
344 cal.addLabels();
345 //cal.calculateCategories();
346 cal.calculateCategoriesNewOntology();
347 cal.copyNumbers();
348
349 con.close();
350 }
351 }