# ╔══════════════════════════════════════════════════════════════════╗
# ║ arista — db.seeds ║
# ║ « dimension-table seeds: researchers, cell_types, stimulus, ║
# ║ strains » ║
# ╠══════════════════════════════════════════════════════════════════╣
# ║ Inserts every entry that's known at design time into the four ║
# ║ controlled-vocabulary tables. Researchers and cell-types are ║
# ║ fixed; stimulus_protocols and strains will grow as new ║
# ║ experiments are added, but every entry shipped here is one we ║
# ║ expect ingest to see. ║
# ╚══════════════════════════════════════════════════════════════════╝
"""Seed the controlled-vocabulary dimension tables."""
from __future__ import annotations
import json
import sqlite3
from arista.constants import (
CANONICAL_STRAINS,
CELL_TYPES,
STIMULUS_PROTOCOLS,
)
# ─────────────────────────────────────────────────────────────────
# Researcher catalogue « four students contributing Ca²⁺ data »
# ─────────────────────────────────────────────────────────────────
# Anne Oepen contributed behaviour-only data (see [[Data Sources]] §
# Anne Oepen) and is intentionally NOT in this list — the v1.0.0 DB is
# Ca²⁺-only.
RESEARCHERS: tuple[tuple[str, str, str], ...] = (
("Robert Kossen", "PhD", "2016–2019"),
("Niko", "MSc", "2016"),
("Laurin Büld", "MSc", "2020–2021"),
("Alexander Busch", "post-BSc", "2021–2022"),
)
[docs]
def seed_researchers(conn: sqlite3.Connection) -> None:
"""Insert the four researcher rows. Idempotent via ``INSERT OR IGNORE``."""
conn.executemany(
"INSERT OR IGNORE INTO researchers (name, role, period) VALUES (?, ?, ?)",
RESEARCHERS,
)
[docs]
def seed_cell_types(conn: sqlite3.Connection) -> None:
"""Insert CC / HC / WC entries from :data:`arista.constants.CELL_TYPES`."""
conn.executemany(
"INSERT OR IGNORE INTO cell_types (code, name, description) VALUES (?, ?, ?)",
[(ct.code, ct.name, ct.description) for ct in CELL_TYPES.values()],
)
[docs]
def seed_stimulus_protocols(conn: sqlite3.Connection) -> None:
"""Insert every protocol from :data:`arista.constants.STIMULUS_PROTOCOLS`.
The ``target_sequence`` tuple is serialised as a JSON array string
so ``json_each()`` can unroll it later inside SQL queries.
"""
rows = [
(
p.name,
p.family,
p.description,
json.dumps(list(p.target_sequence)) if p.target_sequence else None,
)
for p in STIMULUS_PROTOCOLS.values()
]
conn.executemany(
"INSERT OR IGNORE INTO stimulus_protocols "
"(name, family, description, target_sequence_json) VALUES (?, ?, ?, ?)",
rows,
)
[docs]
def seed_strains(conn: sqlite3.Connection) -> None:
"""Insert the canonical strain list as starter rows.
Strains continue to grow over time; this seed only guarantees the
documented ones exist on first DB build. The ingester inserts any
additional strain it encounters via :func:`ensure_strain`.
"""
conn.executemany(
"INSERT OR IGNORE INTO strains (strain_name) VALUES (?)",
[(s,) for s in CANONICAL_STRAINS],
)
[docs]
def seed_dimensions(conn: sqlite3.Connection) -> None:
"""Apply every dimension seed in dependency order and commit."""
seed_researchers(conn)
seed_cell_types(conn)
seed_stimulus_protocols(conn)
seed_strains(conn)
conn.commit()