Source code for wings.analysis.stats

import pandas as pd
import numpy as np
import scipy.stats as stats
from tqdm import tqdm
import itertools

[docs] def read_data(file_path): """Read a simulation CSV into a pandas DataFrame. Args: file_path (str): Path to the CSV file. Returns: pandas.DataFrame: Raw simulation data. """ return pd.read_csv(file_path)
[docs] def bootstrap_ci(data, n_bootstrap=1000, ci=0.95): """Compute a bootstrap confidence interval for the median. Args: data (array-like): Observed values. n_bootstrap (int): Number of bootstrap resamples. Defaults to ``10000``. ci (float): Confidence level (0–1). Defaults to ``0.95``. Returns: tuple[float, float]: Lower and upper bounds of the confidence interval. """ bootstrapped_medians = [] for _ in range(n_bootstrap): sample = np.random.choice(data, size=len(data), replace=True) bootstrapped_medians.append(np.median(sample)) lower_bound = np.percentile(bootstrapped_medians, (1 - ci) / 2 * 100) upper_bound = np.percentile(bootstrapped_medians, (1 + ci) / 2 * 100) return lower_bound, upper_bound
[docs] def calculate_statistics(data): """ Calculate required statistics for population size and infection rate. """ stats_dict = { 'mean': data.mean(), 'median': data.median(), 'sem': data.sem(), # Standard Error of the Mean } # 95% Confidence Interval of the Median stats_dict['ci_median'] = bootstrap_ci(data) return stats_dict
[docs] def get_wolbachia_effects(): return {'cytoplasmic_incompatibility': 'ci', 'male_killing': 'mk', 'increased_exploration_rate': 'er', 'increased_eggs': 'eg', 'reduced_eggs': 're' }
[docs] def combination_to_string(effects, combination): return ''.join(effects[e] for e, c in zip(effects, combination) if c) or 'no_effects'
[docs] def main(file_path,output_file): # Read the data df = read_data(file_path) effects = get_wolbachia_effects() # Generate all combinations of Wolbachia effects all_combinations = list(itertools.product([True, False], repeat=len(effects))) # List to store statistics for each combination and day stats_list = [] with tqdm(total=len(all_combinations), desc="Simulating") as pbar: for combination in all_combinations: combination_str = combination_to_string(effects,combination) # Filter data for the current combination combination_data = df[(df['cytoplasmic_incompatibility'] == combination[0]) & (df['male_killing'] == combination[1]) & (df['increased_exploration_rate'] == combination[2]) & (df['increased_eggs'] == combination[3])] for day in combination_data['days'].unique(): day_data = combination_data[combination_data['days'] == day] pop_size_stats = calculate_statistics(day_data['Population Size']) infection_rate_stats = calculate_statistics(day_data['Infection Rate']) stats_entry = { 'combination': combination_str, 'day': day, 'pop_size_mean': pop_size_stats['mean'], 'pop_size_median': pop_size_stats['median'], 'pop_size_ci_lower': pop_size_stats['ci_median'][0], 'pop_size_ci_upper': pop_size_stats['ci_median'][1], 'pop_size_sem': pop_size_stats['sem'], 'infection_rate_mean': infection_rate_stats['mean'], 'infection_rate_median': infection_rate_stats['median'], 'infection_rate_ci_lower': infection_rate_stats['ci_median'][0], 'infection_rate_ci_upper': infection_rate_stats['ci_median'][1], 'infection_rate_sem': infection_rate_stats['sem'] } stats_list.append(stats_entry) pbar.update(1) # Convert the list of statistics to a DataFrame stats_df = pd.DataFrame(stats_list) # Save the DataFrame to CSV stats_df.to_csv(output_file, index=False) print(f"Statistics saved to {output_file}")
if __name__ == "__main__": file_path = 'wolbachia_data.csv' # Update with the path to your CSV file main(file_path,"Wolbachia_analysis.csv")