Run evaluation over a dataset
In this example, we show how to run evaluation over a dataset.
from pathlib import Path
from continuous_eval.data_downloader import example_data_downloaderfrom continuous_eval.eval import Dataset, EvaluationRunner, SingleModulePipelinefrom continuous_eval.metrics.retrieval import PrecisionRecallF1, RankedRetrievalMetrics
# Let's download the retrieval dataset exampledataset_jsonl = example_data_downloader("retrieval") # 300 samples in the retrieval datasetdataset = Dataset(dataset_jsonl)
pipeline = SingleModulePipeline( dataset=dataset, eval=[ PrecisionRecallF1().use( retrieved_context=dataset.retrieved_contexts, ground_truth_context=dataset.ground_truth_contexts, ), RankedRetrievalMetrics().use( retrieved_context=dataset.retrieved_contexts, ground_truth_context=dataset.ground_truth_contexts, ), ],)
# We start the evaluation runner and run the metrics over the downloaded datasetevalrunner = EvaluationRunner(pipeline)metrics = evalrunner.evaluate(dataset)print(metrics.aggregate())