Source code for gec_metrics.metrics.gotoscorer

from .base import MetricBaseForReferenceBased
from .errant import ERRANT
import errant 
from dataclasses import dataclass
from collections import Counter
import json

[docs] class GoToScorer(ERRANT):
[docs] @dataclass class Config(ERRANT.Config): '''GoToScorer configuration. - The JSON file containing the pre-computed weight. This can be generated by the `gecmetrics-gen-gotoscorer-weight` script. - ref_id (int): The reference id. - no_weight (bool): If True, the weight of all chunks are 1.0. ''' weight_file: str = '' ref_id: int = 0 # GoToScorer uses only one reference. no_weight: bool = True
[docs] @dataclass class Chunk: '''Class to represent a chunk. - o_start (int): Start index of the span for the source words. - o_end (int): End index of the span for the source words. - c_str (str): Corrected version of the span. - type (str): Error type. - weight (float): Weight for the chunk. - is_edited (bool): Flag whether the GEC systems tried to edit this span. This is used to distinguish FP from FN, and TP and TN. ''' o_start: int = 0 o_end: int = 0 c_str: str = '' type: str = '' weight: float = 1.0 is_edited: bool = False
def __init__(self, config: Config = None): super().__init__(config) if not self.config.no_weight: data = json.load(open( self.config.weight_file )) self.weights = [d['weights'] for d in data]
[docs] def generate_chunks( self, edits: list[errant.edit.Edit], tokens: list[str] ) -> list[Chunk]: '''Generate a chunk sequence given an edit sequence. - Tokens included in each edit become a single chunk. - Each token outside of the edits becomes a chunk respectively. - Dummy chunks will be inserted between all tokens to account for possible insertions. Args: edits (list[errant.edit.Edit]): The edit sequence that can be obtained via errant.annotate() tokens (list[str]): The source tokens. Returns: list[Chunk]: The chunk sequence. ''' chunks = [] edit_idx = 0 # We first convert edits into chunks. word_id = 0 while word_id < len(tokens): if edit_idx < len(edits) \ and edits[edit_idx].o_start == word_id: e = edits[edit_idx] chunks.append(self.Chunk( o_start=e.o_start, o_end=e.o_end, c_str=e.c_str, type=e.type, is_edited=True )) edit_idx += 1 word_id = e.o_end - 1 else: chunks.append(self.Chunk( o_start=word_id, o_end=word_id + 1, c_str=tokens[word_id], type='DUMMY:DUMMY', is_edited=False )) word_id += 1 if len(edits) > 0 and edits[-1].o_start == len(tokens): chunks.append(self.Chunk( o_start=edits[-1].o_start, o_end=edits[-1].o_end, c_str=edits[-1].c_str, type=edits[-1].type, is_edited=True )) # We then insert dummy chunk for the potential insertion. is_previous_chunk_insertion = False new_chunks = [] for chunk in chunks: if chunk.o_start == chunk.o_end: is_previous_chunk_insertion = True new_chunks.append(chunk) continue else: if not is_previous_chunk_insertion: # Insert a dummy chunk. new_chunks.append(self.Chunk( o_start=chunk.o_start, o_end=chunk.o_start, c_str='', type='DUMMY:DUMMY', is_edited=False )) new_chunks.append(chunk) is_previous_chunk_insertion = False if not is_previous_chunk_insertion: # The last chunk to handle the insetion edit at the end. new_chunks.append(self.Chunk( o_start=chunks[-1].o_end, o_end=chunks[-1].o_end, c_str='', type='DUMMY:DUMMY', is_edited=False )) return new_chunks
[docs] def score_base( self, sources: list[str], hypotheses: list[str], references: list[list[str]] ) -> list[list[dict[str, "Score"]]]: '''Calculate scores while retaining sentence and reference boundaries. The results can be aggregated according to the purpose, e.g., at sentence-level or corpus-level. Args: sources (list[str]): Source sentence. hypothesis (list[str]): Corrected sentences. references (list[list[str]]): Reference sentences. The shape is (the number of references, the number of sentences). Returns: list[list[dict[str, "Score"]]]: The verbose scores. - The list shape is (num_sents, num_refs) - The dict contains error type-wise scores. ''' num_sents = len(sources) num_refs = len(references) assert 0 <= self.config.ref_id < num_refs scores = list() # The shape will be (num_sents, ) for sent_id in range(num_sents): hyp_edits = self.edit_extraction( sources[sent_id], hypotheses[sent_id] ) ref_edits = self.edit_extraction( sources[sent_id], references[self.config.ref_id][sent_id] ) hyp_chunks = self.generate_chunks( hyp_edits, tokens=sources[sent_id].split(' ') ) ref_chunks = self.generate_chunks( ref_edits, tokens=sources[sent_id].split(' ') ) no_weight = self.config.no_weight if not no_weight: weights = self.weights[sent_id] assert len(ref_chunks) == len(weights), f"{sent_id=} {len(ref_chunks)=} {len(weights)=}" this_score = dict() for i, r_chunk in enumerate(ref_chunks): r_chunk.weight = 1.0 if no_weight else weights[i] is_correct, try_edit = self.annotate( r_chunk, hyp_chunks ) this_score[r_chunk.type] = this_score.get( r_chunk.type, self.Score(beta=self.config.beta) ) s = this_score[r_chunk.type] if try_edit: if is_correct: s.tp += r_chunk.weight else: s.fp += r_chunk.weight if r_chunk.is_edited: s.fn += r_chunk.weight else: if is_correct: s.tn += r_chunk.weight else: s.fn += r_chunk.weight scores.append([this_score]) return scores
[docs] def annotate( self, r_chunk: Chunk, hyp_chunks: list[Chunk], ) -> tuple[bool]: '''Annotate whether the reference chunk is correct and whether the system attempted to edit it. Args: r_chunk (Chunk): The chunk to be evaluated. hyp_chunks (list[Chunk]): The chunk sequence for one GEC systems. Returns: tuple[bool]: This contains two elements. The first one represents correctness. The second one represents whether the system tried to edit or not. ''' is_correct = False try_edit = False for h_chunk in hyp_chunks: if (r_chunk.o_start, r_chunk.o_end) \ == (h_chunk.o_start, h_chunk.o_end): if r_chunk.c_str == h_chunk.c_str: is_correct = True else: is_correct = False # To distuinguish TP or TN. try_edit = h_chunk.is_edited break elif r_chunk.o_start == r_chunk.o_end: is_correct = True elif (r_chunk.o_start <= h_chunk.o_start < r_chunk.o_end) \ or (h_chunk.o_start <= r_chunk.o_start < h_chunk.o_end): try_edit |= h_chunk.is_edited return is_correct, try_edit
[docs] def visualize_chunk( self, chunks: list[Chunk], tokens: str ) -> None: '''The visualizer. Args: chunks (list[Chunk]): The chunk sequence. tokens (list[str]): The source tokens. Returns: None Example: ``` from gec_metrics.metrics.gotoscorer import GoToScorer scorer = GoToScorer(GoToScorer.Config(no_weight=True)) src = 'This sentences contain gramamtical error .' trg = 'This sentence contains a grammatical error .' edits = scorer.edit_extraction(src, trg) chunks = scorer.generate_chunks(edits, src.split(' ')) scorer.visualize_chunk(chunks, src.split(' ')) # Output: # | |This| |sentences| |contain | |gramamtical| |error| | . | | # | |This| |sentence | |contains| a |grammatical| |error| | . | | # |1.0|1.0 |1.0| 1.0 |1.0| 1.0 |1.0| 1.0 |1.0| 1.0 |1.0|1.0|1.0| ``` ''' def insert_space(w: str, n: int): '''Insert space for the visualization. Put spaces evenly on both sides of the string. Args: w (str): strings n (int): max length Returns: str: The string after inseting the spaces. ''' if len(w) < n: offset = n - len(w) w = ' '*(offset//2) + w + ' '*(offset - offset//2) return w vis = { 'orig': ['orig : '], 'cor': ['gold : '], 'weight': ['weight: '], 'type': ['cat : '] } for c in chunks: orig = ' '.join(tokens[c.o_start:c.o_end]) cor = c.c_str weight_str = str(round(c.weight, 2)) etype = '' if c.type == 'DUMMY:DUMMY' else c.type max_len = max(len(orig), len(cor), len(weight_str), len(etype)) vis['orig'].append(insert_space(orig, max_len)) vis['cor'].append(insert_space(cor, max_len)) vis['weight'].append(insert_space(weight_str, max_len)) vis['type'].append(insert_space(etype, max_len)) print('|'.join(vis['orig']) + '|') print('|'.join(vis['cor']) + '|') print('|'.join(vis['weight']) + '|') print('|'.join(vis['type']) + '|') print()