Source code for stag.analysis.nan_handler

# ╔══════════════════════════════════════════════════════════════════╗
# ║  STAG — analysis.nan_handler                                     ║
# ║  « interpolate or drop NaN gaps »                                ║
# ╠══════════════════════════════════════════════════════════════════╣
# ║  Utility for repairing the feature matrix prior to               ║
# ║  clustering.  See the docstring for the gap-size cutoff.         ║
# ╚══════════════════════════════════════════════════════════════════╝
"""NaN detection and linear interpolation for sensor data."""

import numpy as np
import pandas as pd


[docs] def load_data(filename): """Loads data from a file. Handles various potential file formats.""" try: # Try loading as a NumPy array directly return np.load(filename) except ValueError: # File might be CSV or have mixed delimiters; use pandas for flexibility return pd.read_csv(filename, header=None).to_numpy()
[docs] def find_nan_sequences(arr): """Finds sequences of NaN values within each column.""" nan_sequences = [] for col_idx in range(arr.shape[1]): col = arr[:, col_idx] is_nan = np.isnan(col) if any(is_nan): start = None for i, val in enumerate(is_nan): if val and start is None: start = i elif not val and start is not None: nan_sequences.append((col_idx, start, i - 1)) start = None return nan_sequences
[docs] def interpolate_nan_sequences(arr, nan_sequences): """Interpolates NaN sequences linearly in each column.""" for col_idx, start, end in nan_sequences: col = arr[:, col_idx] y1 = col[start - 1] if start > 0 else np.nan y2 = col[end + 1] if end < len(col) - 1 else np.nan interp_values = np.linspace(y1, y2, end - start + 1) col[start:end + 1] = interp_values
if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description="Interpolate NaN runs in a raw accelerometer .npy array.", ) parser.add_argument("infile", help="Input .npy (or CSV) feature matrix.") parser.add_argument("outfile", help="Output .npy with NaN runs interpolated.") args = parser.parse_args() data = load_data(args.infile) interpolate_nan_sequences(data, find_nan_sequences(data)) np.save(args.outfile, data) print("NaN sequences interpolated.")