"""Pattern → motif expansion utilities."""
from __future__ import annotations
from itertools import product
from typing import Dict, List
from tqdm import tqdm
__all__ = ["generate_motifs", "build_pattern_to_motifs"]
_BASES = ("G", "A", "T", "C")
[docs]
def generate_motifs(patterns: list[str]):
"""Expand wildcard * patterns into concrete motifs (deduplicated, sorted)."""
unique: set[str] = set()
for pattern in tqdm(patterns, desc="Generating motifs"):
pre, _, post = pattern.partition("CG")
pre_cg, post_cg = pre.count("*"), post.count("*")
for pre_seq in product(_BASES, repeat=pre_cg):
for post_seq in product(_BASES, repeat=post_cg):
unique.add(f"{''.join(pre_seq)}CG{''.join(post_seq)}")
return sorted(unique)
[docs]
def build_pattern_to_motifs(patterns: list[str]) -> Dict[str, List[str]]:
"""Return mapping *pattern→[motifs]* using N as wildcard stand-in."""
mapping: Dict[str, List[str]] = {}
for pattern in tqdm(patterns, desc="Building pattern→motifs map"):
key = pattern.replace("*", "N")
pre, _, post = pattern.partition("CG")
pre_cg, post_cg = pre.count("*"), post.count("*")
pre_vars = ("".join(p) for p in product(_BASES, repeat=pre_cg))
post_vars = ("".join(p) for p in product(_BASES, repeat=post_cg))
mapping[key] = [f"{pre}CG{post}" for pre in pre_vars for post in post_vars]
return mapping