001    /**
002     * Copyright (C) 2007-2011, Jens Lehmann
003     *
004     * This file is part of DL-Learner.
005     *
006     * DL-Learner is free software; you can redistribute it and/or modify
007     * it under the terms of the GNU General Public License as published by
008     * the Free Software Foundation; either version 3 of the License, or
009     * (at your option) any later version.
010     *
011     * DL-Learner is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
014     * GNU General Public License for more details.
015     *
016     * You should have received a copy of the GNU General Public License
017     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
018     */
019    
020    package org.dllearner.utilities.statistics;
021    
022    import java.text.DecimalFormat;
023    import java.util.Set;
024    
025    /**
026     * Utility class for calculating the mean and standard deviation of a given set
027     * of numbers. The class also contains convenience methods for printing values.
028     * 
029     * @author Jens Lehmann
030     * 
031     */
032    public class Stat {
033    
034        private int count = 0;
035        private double sum = 0;
036        private double squareSum = 0;
037        private double min = Double.MAX_VALUE;
038        private double max = Double.MIN_NORMAL;
039        //used to give a good percentage output
040        private DecimalFormat df = new DecimalFormat( ".00%" ); 
041    
042        public Stat() {
043            
044        }
045        
046        /**
047         * Creates a new stat object by merging two stat objects. The result is the same as if
048         * the numbers, which have been added to stat1 and stat2 would have been added to this
049         * stat object.
050         * @param stat1 Statistical object 1.
051         * @param stat2 Statistical object 2.
052         */
053        public Stat(Stat stat1, Stat stat2) {
054            count = stat1.count + stat2.count;
055            sum = stat1.sum + stat2.sum;
056            squareSum = stat1.squareSum + stat2.squareSum;
057            min = Math.min(stat1.min, stat2.min);
058            max = Math.max(stat1.max, stat2.max);
059        }
060        
061        /**
062         * Creates a new stat object by merging several stat objects. The result is the same as if
063         * the numbers, which have been added to each stat would have been added to this
064         * stat object.
065         * @param stat1 Statistical object 1.
066         * @param stat2 Statistical object 2.
067         */
068        public Stat(Set<Stat> stats) {
069            for(Stat stat : stats){
070                    count += stat.count;
071                    sum += stat.sum;
072                    squareSum += stat.squareSum;
073                    min = Math.min(min, stat.min);
074                    max = Math.max(max, stat.max);
075            }
076        }
077        
078        /**
079         * Add a number to this object.
080         * 
081         * @param number
082         *            The new number.
083         */
084        public void addNumber(double number) {
085            count++;
086            sum += number;
087            squareSum += number * number;
088            if(number<min)
089                    min=number;
090            if(number>max)
091                    max=number;
092        }
093    
094        /**
095         * Gets the number of numbers.
096         * 
097         * @return The number of numbers.
098         */
099        public int getCount() {
100            return count;
101        }
102    
103        /**
104         * Gets the sum of all numbers.
105         * 
106         * @return The sum of all numbers.
107         */
108        public double getSum() {
109            return sum;
110        }
111    
112        /**
113         * Gets the mean of all numbers.
114         * 
115         * @return The mean of all numbers.
116         */
117        public double getMean() {
118            return sum / count;
119        }
120        
121        /**
122         * Gets the mean of all numbers as percentage 
123         * *100 so 0.5678 -> "56.78%"
124         * @return The mean as formatted string.
125         */
126        public String getMeanAsPercentage(){
127            return df.format(getMean());
128        }
129    
130        /**
131         * Gets the standard deviation of all numbers.
132         * 
133         * @return The standard deviation of all numbers.
134         */
135        public double getStandardDeviation() {      
136            if(count <= 1)
137                    return 0.0;
138            
139            // formula from http://de.wikipedia.org/wiki/Standardabweichung
140            double val = (count*squareSum-sum*sum)/(count*(count-1));
141            double root = Math.sqrt(val);
142            
143            // due to rounding errors it can happen that "val" is negative
144            // this means that the real value is 0 (or very close to it), so
145            // we return 0
146            if(Double.isNaN(root)) 
147                    return 0.0;
148            else
149                    return root;
150        }
151    
152            /**
153             * @return the min
154             */
155            public double getMin() {
156                    return min;
157            }
158    
159            /**
160             * @return the max
161             */
162            public double getMax() {
163                    return max;
164            }
165    
166            public String prettyPrint() {
167                    return prettyPrint("");
168            }
169            
170            public String prettyPrint(String unit) {
171                    if(count > 0) {
172                            DecimalFormat df = new DecimalFormat();
173                            String str = "av. " + df.format(getMean()) + unit;
174                            str += " (deviation " + df.format(getStandardDeviation()) + unit + "; ";
175                            str += "min " + df.format(getMin()) + unit + "; ";
176                            str += "max " + df.format(getMax()) + unit + "; ";
177                            str += "count " + count + ")";
178                            return str;
179                    } else {
180                            return "no data collected";
181                    }
182            }       
183            
184            public String prettyPrint(String unit, DecimalFormat df) {
185                    String str = "av. " + df.format(getMean()) + unit;
186                    str += " (deviation " + df.format(getStandardDeviation()) + unit + "; ";
187                    str += "min " + df.format(getMin()) + unit + "; ";
188                    str += "max " + df.format(getMax()) + unit + ")";               
189                    return str;
190            }       
191            
192            /**
193             * Pretty prints the results under the assumption that the input
194             * values are time spans measured in nano seconds.
195             * 
196             * @see System#nanoTime()
197             * @return A string summarising statistical values.
198             */
199    //      public String prettyPrintNanoSeconds() {
200    //              DecimalFormat df = new DecimalFormat();
201    //              String str = "av. " + df.format(getMean()) + unit;
202    //              str += " (deviation " + df.format(getStandardDeviation()) + unit + "; ";
203    //              str += "min " + df.format(getMin()) + unit + "; ";
204    //              str += "max " + df.format(getMax()) + unit + ")";               
205    //              return str;             
206    //      }
207            
208    }