001 /**
002 * Copyright (C) 2007-2011, Jens Lehmann
003 *
004 * This file is part of DL-Learner.
005 *
006 * DL-Learner is free software; you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation; either version 3 of the License, or
009 * (at your option) any later version.
010 *
011 * DL-Learner is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program. If not, see <http://www.gnu.org/licenses/>.
018 */
019
020 package org.dllearner.utilities.statistics;
021
022 import java.text.DecimalFormat;
023 import java.util.Set;
024
025 /**
026 * Utility class for calculating the mean and standard deviation of a given set
027 * of numbers. The class also contains convenience methods for printing values.
028 *
029 * @author Jens Lehmann
030 *
031 */
032 public class Stat {
033
034 private int count = 0;
035 private double sum = 0;
036 private double squareSum = 0;
037 private double min = Double.MAX_VALUE;
038 private double max = Double.MIN_NORMAL;
039 //used to give a good percentage output
040 private DecimalFormat df = new DecimalFormat( ".00%" );
041
042 public Stat() {
043
044 }
045
046 /**
047 * Creates a new stat object by merging two stat objects. The result is the same as if
048 * the numbers, which have been added to stat1 and stat2 would have been added to this
049 * stat object.
050 * @param stat1 Statistical object 1.
051 * @param stat2 Statistical object 2.
052 */
053 public Stat(Stat stat1, Stat stat2) {
054 count = stat1.count + stat2.count;
055 sum = stat1.sum + stat2.sum;
056 squareSum = stat1.squareSum + stat2.squareSum;
057 min = Math.min(stat1.min, stat2.min);
058 max = Math.max(stat1.max, stat2.max);
059 }
060
061 /**
062 * Creates a new stat object by merging several stat objects. The result is the same as if
063 * the numbers, which have been added to each stat would have been added to this
064 * stat object.
065 * @param stat1 Statistical object 1.
066 * @param stat2 Statistical object 2.
067 */
068 public Stat(Set<Stat> stats) {
069 for(Stat stat : stats){
070 count += stat.count;
071 sum += stat.sum;
072 squareSum += stat.squareSum;
073 min = Math.min(min, stat.min);
074 max = Math.max(max, stat.max);
075 }
076 }
077
078 /**
079 * Add a number to this object.
080 *
081 * @param number
082 * The new number.
083 */
084 public void addNumber(double number) {
085 count++;
086 sum += number;
087 squareSum += number * number;
088 if(number<min)
089 min=number;
090 if(number>max)
091 max=number;
092 }
093
094 /**
095 * Gets the number of numbers.
096 *
097 * @return The number of numbers.
098 */
099 public int getCount() {
100 return count;
101 }
102
103 /**
104 * Gets the sum of all numbers.
105 *
106 * @return The sum of all numbers.
107 */
108 public double getSum() {
109 return sum;
110 }
111
112 /**
113 * Gets the mean of all numbers.
114 *
115 * @return The mean of all numbers.
116 */
117 public double getMean() {
118 return sum / count;
119 }
120
121 /**
122 * Gets the mean of all numbers as percentage
123 * *100 so 0.5678 -> "56.78%"
124 * @return The mean as formatted string.
125 */
126 public String getMeanAsPercentage(){
127 return df.format(getMean());
128 }
129
130 /**
131 * Gets the standard deviation of all numbers.
132 *
133 * @return The standard deviation of all numbers.
134 */
135 public double getStandardDeviation() {
136 if(count <= 1)
137 return 0.0;
138
139 // formula from http://de.wikipedia.org/wiki/Standardabweichung
140 double val = (count*squareSum-sum*sum)/(count*(count-1));
141 double root = Math.sqrt(val);
142
143 // due to rounding errors it can happen that "val" is negative
144 // this means that the real value is 0 (or very close to it), so
145 // we return 0
146 if(Double.isNaN(root))
147 return 0.0;
148 else
149 return root;
150 }
151
152 /**
153 * @return the min
154 */
155 public double getMin() {
156 return min;
157 }
158
159 /**
160 * @return the max
161 */
162 public double getMax() {
163 return max;
164 }
165
166 public String prettyPrint() {
167 return prettyPrint("");
168 }
169
170 public String prettyPrint(String unit) {
171 if(count > 0) {
172 DecimalFormat df = new DecimalFormat();
173 String str = "av. " + df.format(getMean()) + unit;
174 str += " (deviation " + df.format(getStandardDeviation()) + unit + "; ";
175 str += "min " + df.format(getMin()) + unit + "; ";
176 str += "max " + df.format(getMax()) + unit + "; ";
177 str += "count " + count + ")";
178 return str;
179 } else {
180 return "no data collected";
181 }
182 }
183
184 public String prettyPrint(String unit, DecimalFormat df) {
185 String str = "av. " + df.format(getMean()) + unit;
186 str += " (deviation " + df.format(getStandardDeviation()) + unit + "; ";
187 str += "min " + df.format(getMin()) + unit + "; ";
188 str += "max " + df.format(getMax()) + unit + ")";
189 return str;
190 }
191
192 /**
193 * Pretty prints the results under the assumption that the input
194 * values are time spans measured in nano seconds.
195 *
196 * @see System#nanoTime()
197 * @return A string summarising statistical values.
198 */
199 // public String prettyPrintNanoSeconds() {
200 // DecimalFormat df = new DecimalFormat();
201 // String str = "av. " + df.format(getMean()) + unit;
202 // str += " (deviation " + df.format(getStandardDeviation()) + unit + "; ";
203 // str += "min " + df.format(getMin()) + unit + "; ";
204 // str += "max " + df.format(getMax()) + unit + ")";
205 // return str;
206 // }
207
208 }