RAGEvaluator Class

Metrics for retrieval and generation quality.


Source Code

rag_evaluator.py
import numpy as np

class RAGEvaluator:
    def compute_retrieval_metrics(self, retrieved_ids, relevant_ids, k=5):
        # Precision@k
        retrieved_at_k = retrieved_ids[:k]
        relevant_set = set(relevant_ids)
        hits = [doc_id for doc_id in retrieved_at_k if doc_id in relevant_set]
        precision = len(hits) / k
        
        # Recall@k
        recall = len(hits) / len(relevant_set) if relevant_set else 0.0
        
        return {"precision": precision, "recall": recall}

    def compute_faithfulness(self, answer, context):
        # Placeholder for LLM-based check
        # Real implementation would call an LLM to verify facts
        return 1.0 if context in answer else 0.0