Source code for mesofield.datakit.sources.session.notes
"""Session notes data source.
Parses free-form ``*_notes.txt`` files where each line begins with a timestamp
followed by a colon and a short message. The result is a simple dataframe with
an index-aligned ``time_elapsed_s`` axis so that manual observations can be
plotted alongside other timeseries.
"""
import pandas as pd
import numpy as np
from pathlib import Path
import re
from datetime import datetime
from mesofield.datakit.sources.register import LoadContext, TimeseriesSource
[docs]
class SessionNotesSource(TimeseriesSource):
"""Load timestamped notes recorded by the experimenter."""
tag = "notes"
patterns = ("**/*_notes.txt",)
camera_tag = None
flatten_payload = False
line_pattern = r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}):\s*(.*)"
timestamp_format = "%Y-%m-%d %H:%M:%S"
empty_time_value = 0.0
[docs]
def build_timeseries(
self,
path: Path,
*,
context: LoadContext | None = None,
) -> tuple[np.ndarray, pd.DataFrame, dict]:
"""Parse ``*_notes.txt`` into a timeline-aware dataframe."""
with open(path, 'r') as f:
notes = f.readlines()
timestamps = []
note_texts = []
for line in notes:
if not line.strip():
continue
match = re.match(self.line_pattern, line.strip())
if match:
timestamp_str, note_text = match.groups()
try:
timestamp = datetime.strptime(timestamp_str, self.timestamp_format)
timestamps.append(timestamp)
note_texts.append(note_text)
except ValueError:
continue
if timestamps:
# Convert to seconds relative to first note
t = np.array([(ts - timestamps[0]).total_seconds() for ts in timestamps])
df = pd.DataFrame({'timestamp': timestamps, 'note': note_texts})
else:
t = np.array([self.empty_time_value])
df = pd.DataFrame({'timestamp': [], 'note': []})
return t.astype(np.float64), df, {"source_file": str(path), "n_notes": len(note_texts)}