Source code for rerandomstats.binomial_stats

"""
┌──────────────────────────────────────────────────────────────────────┐
│        binomial_stats.py « Binomial Proportion Tests »               │
│                                                                      │
│  Single-sample binomial test with Wilson confidence intervals,      │
│  and a two-sample proportions test (z-test or chi-square) for       │
│  comparing success rates across groups.                             │
│                                                                      │
│  Author : Bart R.H. Geurten                                         │
│  Licence: MIT                                                        │
└──────────────────────────────────────────────────────────────────────┘
"""

from __future__ import annotations

from typing import Dict, Literal, Tuple

import numpy as np
from scipy.stats import binomtest
from statsmodels.stats.proportion import (
    proportion_confint,
    proportions_chisquare,
    proportions_ztest,
)



[docs]
class BinomialStats:
    """Single-sample binomial test with confidence intervals.

    Wraps :func:`scipy.stats.binomtest` and
    :func:`statsmodels.stats.proportion.proportion_confint` (Wilson
    method) for quick proportion analysis.

    Args:
        heads: Number of successes observed.
        total_flips: Total number of trials.
        alpha: Significance level (used for the CI).
        alternative: ``'two-sided'``, ``'greater'``, or ``'less'``.

    Example:
        >>> bs = BinomialStats(heads=80, total_flips=100)
        >>> result = bs.binomial_test(base_rate=0.5)
        >>> result.pvalue < 0.05
        True
    """

    def __init__(
        self,
        heads: int,
        total_flips: int,
        alpha: float = 0.05,
        alternative: Literal["two-sided", "greater", "less"] = "two-sided",
    ) -> None:
        self.heads = heads
        self.total_flips = total_flips
        self.alpha = alpha
        self.alternative = alternative

    # ── binomial test ────────────────────────────────────────────────


[docs]
    def binomial_test(self, base_rate: float = 0.5):
        """Perform a binomial test against *base_rate*.

        Args:
            base_rate: Expected success probability under H₀.

        Returns:
            :class:`scipy.stats.BinomTestResult` object (access
            ``.pvalue`` for the p-value).
        """
        return binomtest(
            self.heads,
            self.total_flips,
            p=base_rate,
            alternative=self.alternative,
        )


    # ── Wilson confidence interval ───────────────────────────────────


[docs]
    def exact_ci(self) -> Dict[str, float]:
        """Compute a Wilson confidence interval for the proportion.

        Returns:
            Dictionary with keys ``'Proportion'``, ``'Lower CI'``,
            and ``'Upper CI'`` (all as percentages).
        """
        x = float(self.heads)
        n = float(self.total_flips)
        proportion = round((x / n) * 100, 2)

        lower, upper = proportion_confint(
            count=x, nobs=n, alpha=self.alpha, method="wilson"
        )
        return {
            "Proportion": proportion,
            "Lower CI": max(0.0, round(lower * 100, 4)),
            "Upper CI": min(100.0, round(upper * 100, 4)),
        }





[docs]
class MultipleBinomialTests:
    """Two-sample proportions test (z-test or chi-square).

    Compares the success proportions in two groups.  Each group is
    specified as a tuple ``(successes, failures)``.

    Args:
        data_a: ``(successes, failures)`` for group A.
        data_b: ``(successes, failures)`` for group B.
        func: ``'ztest'`` or ``'chi2'``.
        alternative: ``'two-sided'``, ``'smaller'``, or ``'larger'``.

    Example:
        >>> mbt = MultipleBinomialTests((30, 70), (50, 50), 'ztest')
        >>> p = mbt.main()
    """

    def __init__(
        self,
        data_a: Tuple[int, int],
        data_b: Tuple[int, int],
        func: Literal["ztest", "chi2"],
        alternative: Literal["two-sided", "smaller", "larger"] = "two-sided",
    ) -> None:
        self.data_a = data_a
        self.data_b = data_b
        self.func = func
        self.alternative = alternative

    # ── main entry point ─────────────────────────────────────────────


[docs]
    def main(self) -> float:
        """Run the proportions comparison and return the p-value.

        Returns:
            The p-value.  Returns ``1.0`` if the result is *NaN*
            (identical samples or missing data).

        Raises:
            ValueError: If :attr:`func` is unrecognised.
        """
        counts = np.array((self.data_a[0], self.data_b[0]))
        observations = np.array((np.sum(self.data_a), np.sum(self.data_b)))

        if self.func == "ztest":
            p_value = proportions_ztest(
                count=counts, nobs=observations, alternative=self.alternative
            )[1]
        elif self.func == "chi2":
            p_value = proportions_chisquare(count=counts, nobs=observations)[1]
        else:
            raise ValueError(
                f"MultipleBinomialTests: unknown test function '{self.func}'"
            )

        if np.isnan(p_value):
            p_value = 1.0
            print(
                "MultipleBinomialTests: p-value is NaN (identical samples "
                "or NaN in data) — set to 1.0"
            )
        return float(p_value)