Module redvox.common.stats_helper

Support for computing statistics Requires numpy

Expand source code
"""
Support for computing statistics
Requires numpy
"""

from typing import List, Union

import numpy as np


class StatsContainer:
    """
    Helper class to compute statistics for a set of objects.
    Stores the mean, std dev, number of data points (count), and best value per set object.
    Calculates mean of means, mean of variance, variance of means, total variance,
    and total std dev for the set of objects

    Properties:
        mean_array: the mean of each object in the set

        std_dev_array: the std_dev of each object in the set

        count_array: the number of elements in each object in the set

        best_value: the best value to represent the set

        container_id: a string that identifies the StatsContainer
    """

    def __init__(self, container_id: str) -> None:
        """
        Initialize the StatsContainer

        :param container_id: a string describing the container
        """
        self.mean_array: List[Union[float, int]] = []
        self.std_dev_array: List[Union[float, int]] = []
        self.count_array: List[Union[float, int]] = []
        self.best_value: float = 0.0
        self.container_id: str = container_id

    def mean_of_means(self) -> float:
        """
        :return: mean of all means
        """
        # convert non-numbers to 0s
        counts: np.ndarray = np.nan_to_num(self.count_array)
        if np.sum(counts) == 0:
            return np.nan
        # weight each mean by the number of elements in it
        total_means: np.ndarray = np.prod([np.nan_to_num(self.mean_array), counts], axis=0)
        # if sum(counts) is 0, change sum(counts) to 1 to avoid divide by 0 errors
        return np.sum(total_means) / np.sum(counts)

    def mean_of_variance(self) -> float:
        """
        :return: mean of the variances
        """
        # convert non-numbers to 0s
        counts: np.ndarray = np.nan_to_num(self.count_array)
        if np.sum(counts) == 0:
            return np.nan
        std_devs: np.ndarray = np.nan_to_num(self.std_dev_array)
        # variance is std dev squared, which is then weighted by the number of elements for that variance
        total_vars: np.ndarray = np.prod([counts, std_devs, std_devs], axis=0)
        # if sum(counts) is 0, change sum(counts) to 1 to avoid divide by 0 errors
        return np.sum(total_vars) / np.sum(counts)

    def variance_of_means(self) -> float:
        """
        :return: variance of the means
        """
        counts: np.ndarray = np.nan_to_num(self.count_array)
        if np.sum(counts) == 0:
            return np.nan
        # get the difference of individual means and total mean
        mean_vars: np.ndarray = np.subtract(np.nan_to_num(self.mean_array), self.mean_of_means())
        # square the differences then weight them by number of elements
        total: np.ndarray = np.prod([mean_vars, mean_vars, counts], axis=0)
        # if sum(counts) is 0, change sum(counts) to 1 to avoid divide by 0 errors
        return np.sum(total) / np.sum(counts)

    def total_variance(self) -> float:
        """
        :return: total variance of all elements
        """
        # mean of variances + variance of means = total variance
        return self.mean_of_variance() + self.variance_of_means()

    def total_std_dev(self) -> float:
        """
        :return: std dev of all elements (sqrt of total variance)
        """
        return np.sqrt(self.total_variance())  # std dev is square root of variance

    def add(
        self,
        mean: Union[float, int],
        std_dev: Union[float, int],
        count: Union[float, int],
    ) -> None:
        """
        Put an element into the arrays

        :param mean: a mean
        :param std_dev: the std dev for the mean
        :param count: how many values were used to calculate the mean
        """
        self.mean_array.append(mean)
        self.std_dev_array.append(std_dev)
        self.count_array.append(count)

Classes

class StatsContainer (container_id: str)

Helper class to compute statistics for a set of objects. Stores the mean, std dev, number of data points (count), and best value per set object. Calculates mean of means, mean of variance, variance of means, total variance, and total std dev for the set of objects

Properties

mean_array: the mean of each object in the set

std_dev_array: the std_dev of each object in the set

count_array: the number of elements in each object in the set

best_value: the best value to represent the set

container_id: a string that identifies the StatsContainer

Initialize the StatsContainer

:param container_id: a string describing the container

Expand source code
class StatsContainer:
    """
    Helper class to compute statistics for a set of objects.
    Stores the mean, std dev, number of data points (count), and best value per set object.
    Calculates mean of means, mean of variance, variance of means, total variance,
    and total std dev for the set of objects

    Properties:
        mean_array: the mean of each object in the set

        std_dev_array: the std_dev of each object in the set

        count_array: the number of elements in each object in the set

        best_value: the best value to represent the set

        container_id: a string that identifies the StatsContainer
    """

    def __init__(self, container_id: str) -> None:
        """
        Initialize the StatsContainer

        :param container_id: a string describing the container
        """
        self.mean_array: List[Union[float, int]] = []
        self.std_dev_array: List[Union[float, int]] = []
        self.count_array: List[Union[float, int]] = []
        self.best_value: float = 0.0
        self.container_id: str = container_id

    def mean_of_means(self) -> float:
        """
        :return: mean of all means
        """
        # convert non-numbers to 0s
        counts: np.ndarray = np.nan_to_num(self.count_array)
        if np.sum(counts) == 0:
            return np.nan
        # weight each mean by the number of elements in it
        total_means: np.ndarray = np.prod([np.nan_to_num(self.mean_array), counts], axis=0)
        # if sum(counts) is 0, change sum(counts) to 1 to avoid divide by 0 errors
        return np.sum(total_means) / np.sum(counts)

    def mean_of_variance(self) -> float:
        """
        :return: mean of the variances
        """
        # convert non-numbers to 0s
        counts: np.ndarray = np.nan_to_num(self.count_array)
        if np.sum(counts) == 0:
            return np.nan
        std_devs: np.ndarray = np.nan_to_num(self.std_dev_array)
        # variance is std dev squared, which is then weighted by the number of elements for that variance
        total_vars: np.ndarray = np.prod([counts, std_devs, std_devs], axis=0)
        # if sum(counts) is 0, change sum(counts) to 1 to avoid divide by 0 errors
        return np.sum(total_vars) / np.sum(counts)

    def variance_of_means(self) -> float:
        """
        :return: variance of the means
        """
        counts: np.ndarray = np.nan_to_num(self.count_array)
        if np.sum(counts) == 0:
            return np.nan
        # get the difference of individual means and total mean
        mean_vars: np.ndarray = np.subtract(np.nan_to_num(self.mean_array), self.mean_of_means())
        # square the differences then weight them by number of elements
        total: np.ndarray = np.prod([mean_vars, mean_vars, counts], axis=0)
        # if sum(counts) is 0, change sum(counts) to 1 to avoid divide by 0 errors
        return np.sum(total) / np.sum(counts)

    def total_variance(self) -> float:
        """
        :return: total variance of all elements
        """
        # mean of variances + variance of means = total variance
        return self.mean_of_variance() + self.variance_of_means()

    def total_std_dev(self) -> float:
        """
        :return: std dev of all elements (sqrt of total variance)
        """
        return np.sqrt(self.total_variance())  # std dev is square root of variance

    def add(
        self,
        mean: Union[float, int],
        std_dev: Union[float, int],
        count: Union[float, int],
    ) -> None:
        """
        Put an element into the arrays

        :param mean: a mean
        :param std_dev: the std dev for the mean
        :param count: how many values were used to calculate the mean
        """
        self.mean_array.append(mean)
        self.std_dev_array.append(std_dev)
        self.count_array.append(count)

Methods

def add(self, mean: Union[float, int], std_dev: Union[float, int], count: Union[float, int]) ‑> None

Put an element into the arrays

:param mean: a mean :param std_dev: the std dev for the mean :param count: how many values were used to calculate the mean

Expand source code
def add(
    self,
    mean: Union[float, int],
    std_dev: Union[float, int],
    count: Union[float, int],
) -> None:
    """
    Put an element into the arrays

    :param mean: a mean
    :param std_dev: the std dev for the mean
    :param count: how many values were used to calculate the mean
    """
    self.mean_array.append(mean)
    self.std_dev_array.append(std_dev)
    self.count_array.append(count)
def mean_of_means(self) ‑> float

:return: mean of all means

Expand source code
def mean_of_means(self) -> float:
    """
    :return: mean of all means
    """
    # convert non-numbers to 0s
    counts: np.ndarray = np.nan_to_num(self.count_array)
    if np.sum(counts) == 0:
        return np.nan
    # weight each mean by the number of elements in it
    total_means: np.ndarray = np.prod([np.nan_to_num(self.mean_array), counts], axis=0)
    # if sum(counts) is 0, change sum(counts) to 1 to avoid divide by 0 errors
    return np.sum(total_means) / np.sum(counts)
def mean_of_variance(self) ‑> float

:return: mean of the variances

Expand source code
def mean_of_variance(self) -> float:
    """
    :return: mean of the variances
    """
    # convert non-numbers to 0s
    counts: np.ndarray = np.nan_to_num(self.count_array)
    if np.sum(counts) == 0:
        return np.nan
    std_devs: np.ndarray = np.nan_to_num(self.std_dev_array)
    # variance is std dev squared, which is then weighted by the number of elements for that variance
    total_vars: np.ndarray = np.prod([counts, std_devs, std_devs], axis=0)
    # if sum(counts) is 0, change sum(counts) to 1 to avoid divide by 0 errors
    return np.sum(total_vars) / np.sum(counts)
def total_std_dev(self) ‑> float

:return: std dev of all elements (sqrt of total variance)

Expand source code
def total_std_dev(self) -> float:
    """
    :return: std dev of all elements (sqrt of total variance)
    """
    return np.sqrt(self.total_variance())  # std dev is square root of variance
def total_variance(self) ‑> float

:return: total variance of all elements

Expand source code
def total_variance(self) -> float:
    """
    :return: total variance of all elements
    """
    # mean of variances + variance of means = total variance
    return self.mean_of_variance() + self.variance_of_means()
def variance_of_means(self) ‑> float

:return: variance of the means

Expand source code
def variance_of_means(self) -> float:
    """
    :return: variance of the means
    """
    counts: np.ndarray = np.nan_to_num(self.count_array)
    if np.sum(counts) == 0:
        return np.nan
    # get the difference of individual means and total mean
    mean_vars: np.ndarray = np.subtract(np.nan_to_num(self.mean_array), self.mean_of_means())
    # square the differences then weight them by number of elements
    total: np.ndarray = np.prod([mean_vars, mean_vars, counts], axis=0)
    # if sum(counts) is 0, change sum(counts) to 1 to avoid divide by 0 errors
    return np.sum(total) / np.sum(counts)