Source code for rerandomstats.hypothesis_tests

"""
┌──────────────────────────────────────────────────────────────────────┐
│     hypothesis_tests.py « Classical Hypothesis Test Wrapper »        │
│                                                                      │
│  Thin dispatcher around scipy.stats for two-sample hypothesis       │
│  tests.  Supported tests: Mann-Whitney U, Kruskal-Wallis,          │
│  Chi-square, Kolmogorov-Smirnov, Mood's Median, Wilcoxon           │
│  Rank-Sum, and the independent t-test.                              │
│                                                                      │
│  Author : Bart R.H. Geurten                                         │
│  Licence: MIT                                                        │
└──────────────────────────────────────────────────────────────────────┘
"""

from __future__ import annotations

from typing import Literal, Sequence

from scipy.stats import (
    chisquare,
    kruskal,
    kstest,
    mannwhitneyu,
    mood,
    ranksums,
    ttest_ind,
)

# Accepted test names (for documentation / validation)
_VALID_TESTS = {
    "MannWhitneyU",
    "KruskalWallis",
    "ChiSquare",
    "Kolmogorov",
    "MoodMedian",
    "WilcoxonRankSum",
    "IndependentT",
}


[docs] class HypothesisTests: """Unified interface for two-sample hypothesis tests. This class wraps several :mod:`scipy.stats` tests behind a single ``func`` string selector. It is consumed primarily by :class:`~rerandomstats.multi_group_test.MultiGroupTest` when the test family is ``'hypo'``. Supported *func* values: ============== ====================================================== Name Description ============== ====================================================== MannWhitneyU Non-parametric comparison of two independent groups. KruskalWallis Non-parametric comparison (>2 groups supported). ChiSquare Test for independence on frequency data. Kolmogorov Two-sample Kolmogorov-Smirnov distribution test. MoodMedian Non-parametric median comparison. WilcoxonRankSum Wilcoxon rank-sum (equivalent to Mann-Whitney). IndependentT Parametric t-test for independent samples. ============== ====================================================== Args: data_a: First sample. data_b: Second sample. func: Name of the test to perform. alternative: ``'two-sided'``, ``'less'``, or ``'greater'``. Example: >>> ht = HypothesisTests([1, 2, 3], [5, 6, 7], 'MannWhitneyU') >>> p = ht.main() """ def __init__( self, data_a: Sequence[float], data_b: Sequence[float], func: str, alternative: Literal["two-sided", "less", "greater"] = "two-sided", ) -> None: self.data_a = data_a self.data_b = data_b self.func = func self.alternative = alternative # ── main entry point ─────────────────────────────────────────────
[docs] def main(self) -> float: """Run the selected hypothesis test and return its p-value. Returns: The p-value produced by the chosen test. Raises: ValueError: If :attr:`func` is not a recognised test name. """ if self.func == "MannWhitneyU": p_value = mannwhitneyu( self.data_a, self.data_b, alternative=self.alternative )[1] elif self.func == "KruskalWallis": p_value = kruskal(self.data_a, self.data_b)[1] elif self.func == "ChiSquare": _, p_value, _, _ = chisquare(self.data_a, self.data_b) elif self.func == "WilcoxonRankSum": _, p_value = ranksums( self.data_a, self.data_b, alternative=self.alternative ) elif self.func == "Kolmogorov": _, p_value = kstest(self.data_a, self.data_b) elif self.func == "MoodMedian": p_value = mood( self.data_a, self.data_b, alternative=self.alternative )[1] elif self.func == "IndependentT": _, p_value = ttest_ind(self.data_a, self.data_b) else: raise ValueError( f"HypothesisTests: unknown test function '{self.func}'. " f"Choose from: {', '.join(sorted(_VALID_TESTS))}" ) return float(p_value)