FireSTARR/Statistics_8h_source.html

/* Copyright (c) Queen's Printer for Ontario, 2020. */

/* Copyright (c) His Majesty the King in Right of Canada as represented by the Minister of Natural Resources, 2021-2025. */


/* SPDX-License-Identifier: AGPL-3.0-or-later */


#pragma once

#include <algorithm>

#include <numeric>

#include <vector>

#include "Log.h"

#include "Settings.h"

#include "Util.h"

namespace fs

{

namespace util

{

static constexpr array<MathSize, 100> T_VALUES{

  3.078,

  1.886,

  1.638,

  1.533,

  1.476,

  1.440,

  1.415,

  1.397,

  1.383,

  1.372,

  1.363,

  1.356,

  1.350,

  1.345,

  1.341,

  1.337,

  1.333,

  1.330,

  1.328,

  1.325,

  1.323,

  1.321,

  1.319,

  1.318,

  1.316,

  1.315,

  1.314,

  1.313,

  1.311,

  1.310,

  1.309,

  1.309,

  1.308,

  1.307,

  1.306,

  1.306,

  1.305,

  1.304,

  1.304,

  1.303,

  1.303,

  1.302,

  1.302,

  1.301,

  1.301,

  1.300,

  1.300,

  1.299,

  1.299,

  1.299,

  1.298,

  1.298,

  1.298,

  1.297,

  1.297,

  1.297,

  1.297,

  1.296,

  1.296,

  1.296,

  1.296,

  1.295,

  1.295,

  1.295,

  1.295,

  1.295,

  1.294,

  1.294,

  1.294,

  1.294,

  1.294,

  1.293,

  1.293,

  1.293,

  1.293,

  1.293,

  1.293,

  1.292,

  1.292,

  1.292,

  1.292,

  1.292,

  1.292,

  1.292,

  1.292,

  1.291,

  1.291,

  1.291,

  1.291,

  1.291,

  1.291,

  1.291,

  1.291,

  1.291,

  1.291,

  1.290,

  1.290,

  1.290,

  1.290,

  1.290};


class Statistics

{

public:


  [[nodiscard]] MathSize min() const noexcept

  {

    return percentiles_[0];

  }


  [[nodiscard]] MathSize max() const noexcept

  {

    return percentiles_[100];

  }


  [[nodiscard]] MathSize median() const noexcept

  {

    return percentiles_[50];

  }


  [[nodiscard]] MathSize mean() const noexcept

  {

    return mean_;

  }


  [[nodiscard]] MathSize standardDeviation() const noexcept

  {

    return standard_deviation_;

  }


  [[nodiscard]] MathSize sampleVariance() const noexcept

  {

    return sample_variance_;

  }


  [[nodiscard]] size_t n() const noexcept

  {

    return n_;

  }


  [[nodiscard]] MathSize percentile(const uint8_t i) const noexcept

  {

#ifdef DEBUG_STATISTICS

    logging::check_fatal(static_cast<size_t>(i) >= percentiles_.size(),

                         "Invalid percentile %d requested",

                         i);

#endif

    return percentiles_.at(i);

  }


  [[nodiscard]] MathSize confidenceInterval80() const

  {

    return confidenceInterval(1.28);

  }


  [[nodiscard]] MathSize confidenceInterval90() const

  {

    return confidenceInterval(1.645);

  }


  [[nodiscard]] MathSize confidenceInterval95() const

  {

    return confidenceInterval(1.96);

  }


  [[nodiscard]] MathSize confidenceInterval98() const

  {

    return confidenceInterval(2.33);

  }


  [[nodiscard]] MathSize confidenceInterval99() const

  {

    return confidenceInterval(2.58);

  }


  explicit Statistics(vector<MathSize> values)

  {

    // values should already be sorted

    //  std::sort(values.begin(), values.end());

    n_ = values.size();

    min_ = values[0];

    max_ = values[n_ - 1];

    median_ = values[n_ / 2];

    const auto total_sum = std::accumulate(values.begin(),

                                           values.end(),

                                           0.0,

                                           [](const MathSize t, const MathSize x) { return t + x; });

    mean_ = total_sum / n_;

    for (size_t i = 0; i < percentiles_.size(); ++i)

    {

      const auto pos = std::min(n_ - 1,

                                static_cast<size_t>(truncl(

                                  (static_cast<MathSize>(i) / (percentiles_.size() - 1)) * n_)));

      // note("For %d values %dth percentile is at %d", n_, i, pos);

      percentiles_[i] = values[pos];

    }

    const auto total = std::accumulate(values.begin(),

                                       values.end(),

                                       0.0,

                                       [this](const MathSize t, const MathSize x) { return t + pow_int<2>(x - mean_); });

    standard_deviation_ = sqrt(total / n_);

    sample_variance_ = total / (n_ - 1);

#ifdef DEBUG_STATISTICS

    logging::check_equal(min_, percentiles_[0], "min");

    logging::check_equal(max_, percentiles_[100], "max");

    logging::check_equal(median_, percentiles_[50], "median");

#endif

  }


  [[nodiscard]] MathSize studentsT() const noexcept

  {

    const auto result = T_VALUES[std::min(T_VALUES.size(), n()) - 1]

                      * sqrt(sampleVariance() / n()) / abs(mean());

    // printf("%ld %f %f %f\n", n(), mean(), sampleVariance(), result);

    return result;

  }


  [[nodiscard]] bool isConfident(const MathSize relative_error) const noexcept

  {

    const auto st = studentsT();

    const auto re = relative_error / (1 + relative_error);

    // printf("%f <= %f is %s\n", st, re, ((st <= re) ? "true" : "false"));

    return st <= re;

  }


  [[nodiscard]] size_t runsRequired(

    // const size_t cur_runs,

    const MathSize relative_error) const

  {

    const auto re = relative_error / (1 + relative_error);

    const std::function<MathSize(size_t)> fct = [this](const size_t i) noexcept {

      return T_VALUES[std::min(T_VALUES.size(), i) - 1]

           * sqrt(sampleVariance() / i) / abs(mean());

    };

    const auto cur_runs = n();

    return binary_find_checked(cur_runs, 10 * cur_runs, re, fct) - cur_runs;

  }


private:


  [[nodiscard]] MathSize confidenceInterval(const MathSize z) const

  {

    return z * mean_ / sqrt(n_);

  }


  size_t n_;

  MathSize min_;

  MathSize max_;

  MathSize mean_;

  MathSize median_;

  MathSize standard_deviation_;

  MathSize sample_variance_;

  array<MathSize, 101> percentiles_{};

};


}

}

fs::util::Statistics
Provides statistics calculation for vectors of values.
Definition Statistics.h:125

fs::util::Statistics::confidenceInterval90
MathSize confidenceInterval90() const
90% Confidence Interval
Definition Statistics.h:209

fs::util::Statistics::studentsT
MathSize studentsT() const noexcept
Calculate Student's T value.
Definition Statistics.h:278

fs::util::Statistics::sample_variance_
MathSize sample_variance_
Sample variance.
Definition Statistics.h:352

fs::util::Statistics::confidenceInterval80
MathSize confidenceInterval80() const
80% Confidence Interval
Definition Statistics.h:201

fs::util::Statistics::percentile
MathSize percentile(const uint8_t i) const noexcept
Value for given percentile.
Definition Statistics.h:188

fs::util::Statistics::max_
MathSize max_
Maximum value.
Definition Statistics.h:336

fs::util::Statistics::Statistics
Statistics(vector< MathSize > values)
Calculates statistics on a vector of values.
Definition Statistics.h:241

fs::util::Statistics::percentiles_
array< MathSize, 101 > percentiles_
Array of all integer percentile values.
Definition Statistics.h:356

fs::util::Statistics::confidenceInterval98
MathSize confidenceInterval98() const
98% Confidence Interval
Definition Statistics.h:225

fs::util::Statistics::max
MathSize max() const noexcept
Maximum value.
Definition Statistics.h:139

fs::util::Statistics::isConfident
bool isConfident(const MathSize relative_error) const noexcept
Whether or not we have less than the relative error and can be confident in the results.
Definition Statistics.h:290

fs::util::Statistics::confidenceInterval99
MathSize confidenceInterval99() const
99% Confidence Interval
Definition Statistics.h:233

fs::util::Statistics::min_
MathSize min_
Minimum value.
Definition Statistics.h:332

fs::util::Statistics::n_
size_t n_
Number of values.
Definition Statistics.h:328

fs::util::Statistics::confidenceInterval
MathSize confidenceInterval(const MathSize z) const
Calculate Confidence Interval for given z value.
Definition Statistics.h:321

fs::util::Statistics::confidenceInterval95
MathSize confidenceInterval95() const
95% Confidence Interval
Definition Statistics.h:217

fs::util::Statistics::median
MathSize median() const noexcept
Median value.
Definition Statistics.h:147

fs::util::Statistics::standard_deviation_
MathSize standard_deviation_
Standard Deviation.
Definition Statistics.h:348

fs::util::Statistics::mean_
MathSize mean_
Mean (average) value.
Definition Statistics.h:340

fs::util::Statistics::sampleVariance
MathSize sampleVariance() const noexcept
Sample Variance.
Definition Statistics.h:171

fs::util::Statistics::median_
MathSize median_
Median value.
Definition Statistics.h:344

fs::util::Statistics::min
MathSize min() const noexcept
Minimum value.
Definition Statistics.h:131

fs::util::Statistics::n
size_t n() const noexcept
Number of data points in the set.
Definition Statistics.h:179

fs::util::Statistics::mean
MathSize mean() const noexcept
Mean (average) value.
Definition Statistics.h:155

fs::util::Statistics::standardDeviation
MathSize standardDeviation() const noexcept
Standard Deviation.
Definition Statistics.h:163

fs::util::Statistics::runsRequired
size_t runsRequired(const MathSize relative_error) const
Estimate how many more runs are required to achieve desired confidence.
Definition Statistics.h:303

util
Definition util.py:1