Source code for codestral_ros2_gen.utils.metrics_handler

import pandas as pd
from pathlib import Path
import logging
import json
from io import StringIO

from codestral_ros2_gen import logger_main
from codestral_ros2_gen.generators.generation_attempt import AttemptMetrics

logger = logging.getLogger(f"{logger_main}.{__name__.split('.')[-1]}")



[docs]
class MetricsHandler:
    """
    Handler for collecting, recording, and analyzing code generation metrics.

    This class manages per-attempt metrics and overall aggregated metrics for
    the code generation process. It provides functionalities to record metrics,
    save and load metrics from a JSONL file, generate summary statistics,
    analyze error patterns, and produce plots.
    """


[docs]
    def __init__(self, metrics_file: str | Path):
        """
        Initialize the MetricsHandler.

        Args:
            metrics_file (str | Path): The path to the JSONL file where metrics will be stored.
        """
        self.metrics_file = Path(metrics_file).absolute().resolve()
        logger.info(f"Metrics file: {self.metrics_file}")
        self.metrics_df = pd.DataFrame()

        # Ensure the metrics directory exists.
        self.metrics_file.parent.mkdir(parents=True, exist_ok=True)



[docs]
    def record_attempt(
        self,
        iteration_number: int,
        attempt_number: int,
        attempt_metrics: AttemptMetrics,
    ) -> None:
        """
        Record the metrics for a single generation attempt.

        Args:
            iteration_number (int): The sequential number of the iteration.
            attempt_number (int): The sequential number of the attempt.
            attempt_metrics (AttemptMetrics): An AttemptMetrics instance containing metrics for the attempt.
        """
        logger.debug(
            f"Recording Attempt #{attempt_number}, Iteration #{iteration_number}:\n"
            + f"{json.dumps(attempt_metrics.as_dict, indent=2)}"
        )
        record = attempt_metrics.as_series
        record["iteration"] = iteration_number
        record["attempt"] = attempt_number
        self.metrics_df = pd.concat(
            [self.metrics_df, record.to_frame().T], ignore_index=True
        )
        self._save_metrics()



[docs]
    def _save_metrics(self) -> None:
        """
        Save the current metrics DataFrame to the JSONL file.

        Raises:
            RuntimeError: If saving fails.
        """
        try:
            self.metrics_df.to_json(
                self.metrics_file, orient="records", lines=True, date_format="iso"
            )
            logger.debug(f"Metrics saved to {self.metrics_file}")
        except Exception as e:
            raise RuntimeError(f"Error saving metrics: {str(e)}")



[docs]
    def load_metrics(self) -> None:
        """
        Load metrics from an existing JSONL file into the internal DataFrame.

        Raises:
            RuntimeError: If the file is not found or an error occurs during loading.
        """
        try:
            data = self.metrics_file.read_text(encoding="utf-8")
            self.metrics_df = pd.read_json(
                StringIO(data),
                orient="records",
                lines=True,
            )
            logger.info(f"Metrics loaded from {self.metrics_file}")
            logger.debug(f"Loaded metrics: {self.metrics_df}")
        except FileNotFoundError:
            raise RuntimeError(f"Metrics file {self.metrics_file} not found.")
        except Exception as e:
            raise RuntimeError(f"Error loading metrics: {str(e)}")



[docs]
    def get_report(self) -> str:
        """
        Generate a report based on the collected metrics.

        Returns:
            str: The generated report.
        """
        if self.metrics_df.empty:
            return "No metrics collected."

        rep_1 = self.metrics_df.groupby("iteration")[
            ["success", "attempt", "tests_passed"]
        ].apply("max")
        rep_1["success"] = rep_1["success"].map({True: "✅", False: "🚫"})
        rep_1.columns = ("success", "attempts", "tests_passed")

        rep_2 = (
            self.metrics_df.groupby("iteration")[["attempt_time", "total_tokens"]]
            .apply("mean")
            .astype(int)
        )
        rep_2.columns = ("avg attempt time, s", "avg total tokens")

        return rep_1.merge(rep_2, left_index=True, right_index=True).to_markdown(
            tablefmt="pretty"
        )