Source code for codestral_ros2_gen.utils.metrics_handler

import pandas as pd
from pathlib import Path
import logging
import json
from io import StringIO

from codestral_ros2_gen import logger_main
from codestral_ros2_gen.generators.generation_attempt import AttemptMetrics

logger = logging.getLogger(f"{logger_main}.{__name__.split('.')[-1]}")


[docs] class MetricsHandler: """ Handler for collecting, recording, and analyzing code generation metrics. This class manages per-attempt metrics and overall aggregated metrics for the code generation process. It provides functionalities to record metrics, save and load metrics from a JSONL file, generate summary statistics, analyze error patterns, and produce plots. """
[docs] def __init__(self, metrics_file: str | Path): """ Initialize the MetricsHandler. Args: metrics_file (str | Path): The path to the JSONL file where metrics will be stored. """ self.metrics_file = Path(metrics_file).absolute().resolve() logger.info(f"Metrics file: {self.metrics_file}") self.metrics_df = pd.DataFrame() # Ensure the metrics directory exists. self.metrics_file.parent.mkdir(parents=True, exist_ok=True)
[docs] def record_attempt( self, iteration_number: int, attempt_number: int, attempt_metrics: AttemptMetrics, ) -> None: """ Record the metrics for a single generation attempt. Args: iteration_number (int): The sequential number of the iteration. attempt_number (int): The sequential number of the attempt. attempt_metrics (AttemptMetrics): An AttemptMetrics instance containing metrics for the attempt. """ logger.debug( f"Recording Attempt #{attempt_number}, Iteration #{iteration_number}:\n" + f"{json.dumps(attempt_metrics.as_dict, indent=2)}" ) record = attempt_metrics.as_series record["iteration"] = iteration_number record["attempt"] = attempt_number self.metrics_df = pd.concat( [self.metrics_df, record.to_frame().T], ignore_index=True ) self._save_metrics()
[docs] def _save_metrics(self) -> None: """ Save the current metrics DataFrame to the JSONL file. Raises: RuntimeError: If saving fails. """ try: self.metrics_df.to_json( self.metrics_file, orient="records", lines=True, date_format="iso" ) logger.debug(f"Metrics saved to {self.metrics_file}") except Exception as e: raise RuntimeError(f"Error saving metrics: {str(e)}")
[docs] def load_metrics(self) -> None: """ Load metrics from an existing JSONL file into the internal DataFrame. Raises: RuntimeError: If the file is not found or an error occurs during loading. """ try: data = self.metrics_file.read_text(encoding="utf-8") self.metrics_df = pd.read_json( StringIO(data), orient="records", lines=True, ) logger.info(f"Metrics loaded from {self.metrics_file}") logger.debug(f"Loaded metrics: {self.metrics_df}") except FileNotFoundError: raise RuntimeError(f"Metrics file {self.metrics_file} not found.") except Exception as e: raise RuntimeError(f"Error loading metrics: {str(e)}")
[docs] def get_report(self) -> str: """ Generate a report based on the collected metrics. Returns: str: The generated report. """ if self.metrics_df.empty: return "No metrics collected." rep_1 = self.metrics_df.groupby("iteration")[ ["success", "attempt", "tests_passed"] ].apply("max") rep_1["success"] = rep_1["success"].map({True: "✅", False: "🚫"}) rep_1.columns = ("success", "attempts", "tests_passed") rep_2 = ( self.metrics_df.groupby("iteration")[["attempt_time", "total_tokens"]] .apply("mean") .astype(int) ) rep_2.columns = ("avg attempt time, s", "avg total tokens") return rep_1.merge(rep_2, left_index=True, right_index=True).to_markdown( tablefmt="pretty" )