FireSTARR
Loading...
Searching...
No Matches
Statistics.h
1/* Copyright (c) Queen's Printer for Ontario, 2020. */
2/* Copyright (c) His Majesty the King in Right of Canada as represented by the Minister of Natural Resources, 2021-2025. */
3
4/* SPDX-License-Identifier: AGPL-3.0-or-later */
5
6#pragma once
7#include <algorithm>
8#include <numeric>
9#include <vector>
10#include "Log.h"
11#include "Settings.h"
12#include "Util.h"
13namespace fs
14{
15namespace util
16{
20static constexpr array<MathSize, 100> T_VALUES{
21 3.078,
22 1.886,
23 1.638,
24 1.533,
25 1.476,
26 1.440,
27 1.415,
28 1.397,
29 1.383,
30 1.372,
31 1.363,
32 1.356,
33 1.350,
34 1.345,
35 1.341,
36 1.337,
37 1.333,
38 1.330,
39 1.328,
40 1.325,
41 1.323,
42 1.321,
43 1.319,
44 1.318,
45 1.316,
46 1.315,
47 1.314,
48 1.313,
49 1.311,
50 1.310,
51 1.309,
52 1.309,
53 1.308,
54 1.307,
55 1.306,
56 1.306,
57 1.305,
58 1.304,
59 1.304,
60 1.303,
61 1.303,
62 1.302,
63 1.302,
64 1.301,
65 1.301,
66 1.300,
67 1.300,
68 1.299,
69 1.299,
70 1.299,
71 1.298,
72 1.298,
73 1.298,
74 1.297,
75 1.297,
76 1.297,
77 1.297,
78 1.296,
79 1.296,
80 1.296,
81 1.296,
82 1.295,
83 1.295,
84 1.295,
85 1.295,
86 1.295,
87 1.294,
88 1.294,
89 1.294,
90 1.294,
91 1.294,
92 1.293,
93 1.293,
94 1.293,
95 1.293,
96 1.293,
97 1.293,
98 1.292,
99 1.292,
100 1.292,
101 1.292,
102 1.292,
103 1.292,
104 1.292,
105 1.292,
106 1.291,
107 1.291,
108 1.291,
109 1.291,
110 1.291,
111 1.291,
112 1.291,
113 1.291,
114 1.291,
115 1.291,
116 1.290,
117 1.290,
118 1.290,
119 1.290,
120 1.290};
125{
126public:
131 [[nodiscard]] MathSize min() const noexcept
132 {
133 return percentiles_[0];
134 }
139 [[nodiscard]] MathSize max() const noexcept
140 {
141 return percentiles_[100];
142 }
147 [[nodiscard]] MathSize median() const noexcept
148 {
149 return percentiles_[50];
150 }
155 [[nodiscard]] MathSize mean() const noexcept
156 {
157 return mean_;
158 }
163 [[nodiscard]] MathSize standardDeviation() const noexcept
164 {
165 return standard_deviation_;
166 }
171 [[nodiscard]] MathSize sampleVariance() const noexcept
172 {
173 return sample_variance_;
174 }
179 [[nodiscard]] size_t n() const noexcept
180 {
181 return n_;
182 }
188 [[nodiscard]] MathSize percentile(const uint8_t i) const noexcept
189 {
190#ifdef DEBUG_STATISTICS
191 logging::check_fatal(static_cast<size_t>(i) >= percentiles_.size(),
192 "Invalid percentile %d requested",
193 i);
194#endif
195 return percentiles_.at(i);
196 }
201 [[nodiscard]] MathSize confidenceInterval80() const
202 {
203 return confidenceInterval(1.28);
204 }
209 [[nodiscard]] MathSize confidenceInterval90() const
210 {
211 return confidenceInterval(1.645);
212 }
217 [[nodiscard]] MathSize confidenceInterval95() const
218 {
219 return confidenceInterval(1.96);
220 }
225 [[nodiscard]] MathSize confidenceInterval98() const
226 {
227 return confidenceInterval(2.33);
228 }
233 [[nodiscard]] MathSize confidenceInterval99() const
234 {
235 return confidenceInterval(2.58);
236 }
241 explicit Statistics(vector<MathSize> values)
242 {
243 // values should already be sorted
244 // std::sort(values.begin(), values.end());
245 n_ = values.size();
246 min_ = values[0];
247 max_ = values[n_ - 1];
248 median_ = values[n_ / 2];
249 const auto total_sum = std::accumulate(values.begin(),
250 values.end(),
251 0.0,
252 [](const MathSize t, const MathSize x) { return t + x; });
253 mean_ = total_sum / n_;
254 for (size_t i = 0; i < percentiles_.size(); ++i)
255 {
256 const auto pos = std::min(n_ - 1,
257 static_cast<size_t>(truncl(
258 (static_cast<MathSize>(i) / (percentiles_.size() - 1)) * n_)));
259 // note("For %d values %dth percentile is at %d", n_, i, pos);
260 percentiles_[i] = values[pos];
261 }
262 const auto total = std::accumulate(values.begin(),
263 values.end(),
264 0.0,
265 [this](const MathSize t, const MathSize x) { return t + pow_int<2>(x - mean_); });
266 standard_deviation_ = sqrt(total / n_);
267 sample_variance_ = total / (n_ - 1);
268#ifdef DEBUG_STATISTICS
269 logging::check_equal(min_, percentiles_[0], "min");
270 logging::check_equal(max_, percentiles_[100], "max");
271 logging::check_equal(median_, percentiles_[50], "median");
272#endif
273 }
278 [[nodiscard]] MathSize studentsT() const noexcept
279 {
280 const auto result = T_VALUES[std::min(T_VALUES.size(), n()) - 1]
281 * sqrt(sampleVariance() / n()) / abs(mean());
282 // printf("%ld %f %f %f\n", n(), mean(), sampleVariance(), result);
283 return result;
284 }
290 [[nodiscard]] bool isConfident(const MathSize relative_error) const noexcept
291 {
292 const auto st = studentsT();
293 const auto re = relative_error / (1 + relative_error);
294 // printf("%f <= %f is %s\n", st, re, ((st <= re) ? "true" : "false"));
295 return st <= re;
296 }
303 [[nodiscard]] size_t runsRequired(
304 // const size_t cur_runs,
305 const MathSize relative_error) const
306 {
307 const auto re = relative_error / (1 + relative_error);
308 const std::function<MathSize(size_t)> fct = [this](const size_t i) noexcept {
309 return T_VALUES[std::min(T_VALUES.size(), i) - 1]
310 * sqrt(sampleVariance() / i) / abs(mean());
311 };
312 const auto cur_runs = n();
313 return binary_find_checked(cur_runs, 10 * cur_runs, re, fct) - cur_runs;
314 }
315private:
321 [[nodiscard]] MathSize confidenceInterval(const MathSize z) const
322 {
323 return z * mean_ / sqrt(n_);
324 }
328 size_t n_;
332 MathSize min_;
336 MathSize max_;
340 MathSize mean_;
344 MathSize median_;
356 array<MathSize, 101> percentiles_{};
357};
358}
359}
Provides statistics calculation for vectors of values.
Definition Statistics.h:125
MathSize confidenceInterval90() const
90% Confidence Interval
Definition Statistics.h:209
MathSize studentsT() const noexcept
Calculate Student's T value.
Definition Statistics.h:278
MathSize sample_variance_
Sample variance.
Definition Statistics.h:352
MathSize confidenceInterval80() const
80% Confidence Interval
Definition Statistics.h:201
MathSize percentile(const uint8_t i) const noexcept
Value for given percentile.
Definition Statistics.h:188
MathSize max_
Maximum value.
Definition Statistics.h:336
Statistics(vector< MathSize > values)
Calculates statistics on a vector of values.
Definition Statistics.h:241
array< MathSize, 101 > percentiles_
Array of all integer percentile values.
Definition Statistics.h:356
MathSize confidenceInterval98() const
98% Confidence Interval
Definition Statistics.h:225
MathSize max() const noexcept
Maximum value.
Definition Statistics.h:139
bool isConfident(const MathSize relative_error) const noexcept
Whether or not we have less than the relative error and can be confident in the results.
Definition Statistics.h:290
MathSize confidenceInterval99() const
99% Confidence Interval
Definition Statistics.h:233
MathSize min_
Minimum value.
Definition Statistics.h:332
size_t n_
Number of values.
Definition Statistics.h:328
MathSize confidenceInterval(const MathSize z) const
Calculate Confidence Interval for given z value.
Definition Statistics.h:321
MathSize confidenceInterval95() const
95% Confidence Interval
Definition Statistics.h:217
MathSize median() const noexcept
Median value.
Definition Statistics.h:147
MathSize standard_deviation_
Standard Deviation.
Definition Statistics.h:348
MathSize mean_
Mean (average) value.
Definition Statistics.h:340
MathSize sampleVariance() const noexcept
Sample Variance.
Definition Statistics.h:171
MathSize median_
Median value.
Definition Statistics.h:344
MathSize min() const noexcept
Minimum value.
Definition Statistics.h:131
size_t n() const noexcept
Number of data points in the set.
Definition Statistics.h:179
MathSize mean() const noexcept
Mean (average) value.
Definition Statistics.h:155
MathSize standardDeviation() const noexcept
Standard Deviation.
Definition Statistics.h:163
size_t runsRequired(const MathSize relative_error) const
Estimate how many more runs are required to achieve desired confidence.
Definition Statistics.h:303
Definition util.py:1