Source code for gec_metrics.metrics.bertscore

from .base import MetricBaseForSourceFree
from dataclasses import dataclass
from bert_score import BERTScorer
import os
import torch


[docs]
class BertScore(MetricBaseForSourceFree):

[docs]
    @dataclass
    class Config(MetricBaseForSourceFree.Config):
        '''BERTScore configuration.
        
        - model_type (str): Embedding model.
        - num_layers (int): The layer of representation to use.
            If None, the pre-difined one is used. (See bert_score.utils.model2layers.)
        - nthreads (int): Number of threads.
        - idf (bool): Whether to use idf or not.
        - idf_sents (list[str]): Sentences to compute idf weights.
        - rescale_with_baselines (bool): Whether to rescale scores.
        - baseline_path (str): Path to .tsv file.
            If None, the pre-defined one is used. (See bert_score.rescale_baseline.*.tsv)
        - use_fast_tokenizer (bool): Whether to use fast tokenizer.
        - score_type (str): "p" (precision) or "r" (recall) or "f" (F1) score.
        '''
        model_type: str = 'bert-base-uncased'
        num_layers: int = None
        batch_size: int = 64
        nthreads: int = 4
        all_layers: bool = False
        idf: bool = False
        idf_sents: list[str] = None
        lang: str = 'en'
        rescale_with_baseline: bool = True
        baseline_path: str = None
        use_fast_tokenizer: bool = False
        score_type: str = 'f'


    def __init__(self, config: Config = None):
        super().__init__(config)
        assert self.config.score_type in ['p', 'r', 'f']
        self.scorer = BERTScorer(
            model_type=self.config.model_type,
            num_layers=self.config.num_layers,
            batch_size=self.config.batch_size,
            nthreads=self.config.nthreads,
            all_layers=self.config.all_layers,
            idf=self.config.idf,
            idf_sents=self.config.idf_sents,
            lang=self.config.lang,
            rescale_with_baseline=self.config.rescale_with_baseline,
            baseline_path=self.config.baseline_path,
            use_fast_tokenizer=self.config.use_fast_tokenizer
        )
        self.scorer._model.eval()
        if torch.cuda.is_available():
            self.scorer._model.cuda()
        

[docs]
    def score_sentence(
        self,
        hypotheses: list[str],
        references: list[list[str]]
    ) -> list[float]:
        '''Calculate sentence-level scores.

        Args:
            hypotheses (list[str]): Corrected sentences.
                The shape is (num_sentences, )
            references (list[list[str]]): Reference sentences.
                The shape is (num_references, num_sentences).
        
        Returns:
            list[float]: The sentence-level scores.
        '''
        # (num_refs, num_sents) -> (num_sents, num_refs)
        references = list(zip(*references))
        output = self.scorer.score(
            cands=hypotheses,
            refs=references
        )
        idx = {
            'p': 0,
            'r': 1,
            'f': 2
        }[self.config.score_type]
        scores = output[idx].view(-1).tolist()
        return scores