001    /**
002     * Copyright (C) 2007-2008, Jens Lehmann
003     *
004     * This file is part of DL-Learner.
005     * 
006     * DL-Learner is free software; you can redistribute it and/or modify
007     * it under the terms of the GNU General Public License as published by
008     * the Free Software Foundation; either version 3 of the License, or
009     * (at your option) any later version.
010     *
011     * DL-Learner is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014     * GNU General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018     *
019     */
020    package org.dllearner.scripts;
021    
022    import java.io.BufferedReader;
023    import java.io.File;
024    import java.io.FileNotFoundException;
025    import java.io.FileReader;
026    import java.io.IOException;
027    import java.sql.Connection;
028    import java.sql.DriverManager;
029    import java.sql.ResultSet;
030    import java.sql.SQLException;
031    import java.sql.Statement;
032    import java.util.prefs.BackingStoreException;
033    import java.util.prefs.Preferences;
034    
035    import org.ini4j.IniFile;
036    
037    /**
038     * Fills that database needed for running DBpedia Navigator.
039     * First move the mentioned DBpedia files to the specified
040     * directory, then execute this script. Database settings are
041     * taken from the settings.ini file of DBpedia Navigator.
042     * 
043     * @author Sebastian Knappe
044     * @author Jens Lehmann
045     *
046     */
047    @SuppressWarnings("unused")
048    public class CalculatePageRank {
049            
050            private static String datasetDir;
051            private static String dbServer;
052            private static String dbName;
053            private static String dbUser;
054            private static String dbPass;
055            
056            private String wikilinks;
057            private String labels;
058            private String categories;
059            private String categoriesNewOntology;
060            private String categoriesNewOntology2;
061            
062            private static Connection con;
063            
064            public CalculatePageRank() throws BackingStoreException
065            {
066                    // reading values from ini file
067                    String iniFile = "../src/dbpedia-navigator/settings.ini";
068                    Preferences prefs = new IniFile(new File(iniFile));
069                    dbServer = prefs.node("database").get("server", null);
070                    dbName = prefs.node("database").get("name", null);
071                    dbUser = prefs.node("database").get("user", null);
072                    dbPass = prefs.node("database").get("pass", null);
073                    datasetDir = prefs.node("database").get("datasetDir", null);
074                    
075                    wikilinks = datasetDir + "pagelinks_en.nt";
076                    labels = datasetDir + "articles_label_en.nt";
077                    categories = datasetDir + "yago_en.nt";
078                    categoriesNewOntology = datasetDir + "dbpedia-ontology-schema.nt";
079                    categoriesNewOntology2 = datasetDir + "dbpedia-ontology-types.nt";
080            }
081            
082            private void calculateLinks()
083            {
084                    try{
085                            Statement stmt;
086                            ResultSet rs;
087                            int number;
088    
089                            stmt = con.createStatement();
090                            BufferedReader in = new BufferedReader(new FileReader(wikilinks));
091                            
092                            String line;
093                            String[] split;
094                            String name;
095                            int i=0;
096                            while ((line=in.readLine())!=null)
097                            {
098                                    split=line.split(" ");
099                                    name=split[2].substring(1, split[2].length()-1);
100                                    rs=stmt.executeQuery("SELECT number FROM rank WHERE name='"+name+"'");
101                                    if (rs.next()){
102                                            number=rs.getInt(1);
103                                            number++;
104                                            stmt.executeUpdate("UPDATE rank SET number="+number+" WHERE name='"+name+"'");
105                                    }
106                                    else{
107                                            try{
108                                                    stmt.executeUpdate("INSERT INTO rank (name,number) VALUES ('"+name+"',1)");
109                                            }catch(Exception e)
110                                            {}
111                                    }
112                                    if (i%100000==0) System.out.println(i);
113                                    i++;
114                            }
115                            
116                            in.close();
117                    } catch (FileNotFoundException e)
118                    {
119                            System.out.println("File not found");
120                    } catch (IOException e)
121                    {
122                            System.out.println("IOException");
123                    } catch (Exception e)
124                    {
125                            e.printStackTrace();
126                    }
127            }
128            
129            private void addLabels()
130            {
131                    try{
132                            Statement stmt;
133                            ResultSet rs;
134                            
135                            stmt = con.createStatement();
136                            BufferedReader in = new BufferedReader(new FileReader(labels));
137                            
138                            String line;
139                            String[] split;
140                            String name;
141                            String label;
142                            int i=0;
143                            while ((line=in.readLine())!=null)
144                            {
145                                    split=line.split(">");
146                                    name=split[0].substring(1);
147                                    label=split[2].substring(split[2].indexOf("\"")+1, split[2].lastIndexOf("\""));
148                                    rs=stmt.executeQuery("SELECT number FROM rank WHERE name='"+name+"'");
149                                    if (rs.next()){
150                                            stmt.executeUpdate("UPDATE rank SET label=\""+label+"\" WHERE name='"+name+"'");
151                                    }
152                                    else{
153                                            try{
154                                                    stmt.executeUpdate("INSERT INTO rank (name,label) VALUES ('"+name+"',\""+label+"\")");
155                                            }catch(Exception e)
156                                            {}
157                                    }
158                                    if (i%100000==0) System.out.println(i);
159                                    i++;
160                            }
161                            
162                            in.close();
163                    } catch (FileNotFoundException e)
164                    {
165                            System.out.println("File not found");
166                    } catch (IOException e)
167                    {
168                            System.out.println("IOException");
169                    } catch (Exception e)
170                    {
171                            e.printStackTrace();
172                    }
173            }
174            
175            private void calculateCategories()
176            {
177                    try{
178                            Statement stmt;
179                                                    
180                            stmt = con.createStatement();
181                            
182                            BufferedReader in = new BufferedReader(new FileReader(categories));
183                            
184                            String line;
185                            String[] split;
186                            String name;
187                            String label;
188                            String pred;
189                            int i=0;
190                            while ((line=in.readLine())!=null)
191                            {
192                                    split=line.split(">");
193                                    name=split[0].substring(1);
194                                    pred=split[1].substring(2);
195                                    if (pred.equals("http://www.w3.org/2000/01/rdf-schema#label"))
196                                            label=split[2].substring(split[2].indexOf("\"")+1, split[2].lastIndexOf("\""));
197                                    else
198                                            label=split[2].substring(2);
199                                    if (pred.equals("http://www.w3.org/2000/01/rdf-schema#label")){
200                                            try{
201                                                    stmt.executeUpdate("INSERT INTO categories (category,label) VALUES (\""+name+"\",\""+label+"\")");
202                                            }catch(Exception e)
203                                            {}
204                                    }
205                                    else{
206                                            if (name.startsWith("http://dbpedia.org/resource")){
207                                                    try{
208                                                            stmt.executeUpdate("INSERT INTO articlecategories (name,category) VALUES ('"+name+"','"+label+"')");
209                                                    }catch(Exception e)
210                                                    {}
211                                            }else{
212                                                    try{
213                                                            stmt.executeUpdate("INSERT INTO classhierarchy (father,child) VALUES ('"+label+"','"+name+"')");
214                                                    }catch(Exception e)
215                                                    {}
216                                            }
217                                    }
218                                    if (i%100000==0) System.out.println(i);
219                                    i++;
220                            }
221                            
222                            in.close();
223                    } catch (FileNotFoundException e)
224                    {
225                            System.out.println("File not found");
226                    } catch (IOException e)
227                    {
228                            System.out.println("IOException");
229                    } catch (Exception e)
230                    {
231                            e.printStackTrace();
232                    }
233            }
234            
235            private void calculateCategoriesNewOntology()
236            {
237                    try{
238                            Statement stmt;
239                                                    
240                            stmt = con.createStatement();
241                            
242                            BufferedReader in = new BufferedReader(new FileReader(categoriesNewOntology));
243                            
244                            String line;
245                            String[] split;
246                            String name;
247                            String label;
248                            String pred;
249                            int i=0;
250                            boolean isClassLabel;
251                            String className;
252                            while ((line=in.readLine())!=null)
253                            {
254                                    split=line.split(">");
255                                    if (split.length<3) continue;
256                                    name=split[0].substring(1);
257                                    pred=split[1].substring(2);
258                                    if (pred.equals("http://www.w3.org/2000/01/rdf-schema#label")){
259                                            label=split[2].substring(split[2].indexOf("\"")+1, split[2].lastIndexOf("\""));
260                                            if (name.length()>name.lastIndexOf("/")+1) className=name.substring(name.lastIndexOf("/")+1,name.lastIndexOf("/")+2);
261                                            else className="t";
262                                            if (className.toLowerCase().equals(className))
263                                                    isClassLabel=false;
264                                            else
265                                                    isClassLabel=true;
266                                    }
267                                    else{
268                                            label=split[2].substring(2);
269                                            isClassLabel=false;
270                                    }
271                                    if (pred.equals("http://www.w3.org/2000/01/rdf-schema#label")&&isClassLabel){
272                                            try{
273                                                    stmt.executeUpdate("INSERT INTO categories (category,label) VALUES (\""+name+"\",\""+label+"\")");
274                                            }catch(Exception e)
275                                            {}
276                                    }
277                                    else{
278                                            if (pred.equals("http://www.w3.org/2000/01/rdf-schema#subClassOf")){
279                                                    try{
280                                                            stmt.executeUpdate("INSERT INTO classhierarchy (father,child) VALUES ('"+label+"','"+name+"')");
281                                                    }catch(Exception e)
282                                                    {}
283                                            }
284                                    }
285                                    if (i%100000==0) System.out.println(i);
286                                    i++;
287                            }
288                            
289                            in.close();
290                            
291                            /*in = new BufferedReader(new FileReader(categoriesNewOntology2));
292                            
293                            i=0;
294                            while ((line=in.readLine())!=null)
295                            {
296                                    split=line.split(">");
297                                    name=split[0].substring(1);
298                                    label=split[2].substring(2);
299                                    try{
300                                            stmt.executeUpdate("INSERT INTO articlecategories (name,category) VALUES ('"+name+"','"+label+"')");
301                                    }catch(Exception e)
302                                    {}
303                                    if (i%100000==0) System.out.println(i);
304                                    i++;
305                            }
306                            
307                            in.close();*/
308                    } catch (FileNotFoundException e)
309                    {
310                            System.out.println("File not found");
311                    } catch (IOException e)
312                    {
313                            System.out.println("IOException");
314                    } catch (Exception e)
315                    {
316                            e.printStackTrace();
317                    }
318            }
319            
320            private void copyNumbers()
321            {
322                    try{
323                            Statement stmt;
324                                                    
325                            stmt = con.createStatement();
326                            
327                            stmt.executeUpdate("UPDATE articlecategories SET number=(SELECT number FROM rank WHERE articlecategories.name=rank.name)");
328                                    
329                    } catch (Exception e)
330                    {
331                            e.printStackTrace();
332                    }
333            }
334            
335            public static void main(String[] args) throws ClassNotFoundException,SQLException,BackingStoreException{
336                    CalculatePageRank cal=new CalculatePageRank();
337                    Class.forName("com.mysql.jdbc.Driver");
338                    String url =
339                "jdbc:mysql://"+dbServer+":3306/"+dbName;
340    
341                    con = DriverManager.getConnection(
342                                     url, dbUser, dbPass);
343                    cal.calculateLinks();
344                    cal.addLabels();
345                    //cal.calculateCategories();
346                    cal.calculateCategoriesNewOntology();
347                    cal.copyNumbers();
348                    
349                    con.close();
350            }
351    }