Module pysimt.metrics.multibleu
Tokenized BLEU through sacreBLEU API.
Expand source code
"""Tokenized BLEU through sacreBLEU API."""
from typing import Union, Iterable, TextIO
from sacrebleu import corpus_bleu
from ..utils.misc import listify
from ..utils.io import read_reference_files, read_hypothesis_file
from .metric import Metric
class BLEUScorer:
"""Computes the multi-bleu equivalent using SacreBLEU, with tokenization
option disabled.
Args:
refs: List of reference text files
hyps: A file path, or a list of hypothesis strings or an open file handle
language: unused
"""
def compute(self, refs: Iterable[str],
hyps: Union[str, Iterable[str], TextIO],
language=None) -> Metric:
if isinstance(hyps, str):
hyps = read_hypothesis_file(hyps)
assert isinstance(hyps, list)
refs = read_reference_files(*listify(refs))
score = corpus_bleu(hyps, refs, tokenize='none')
verbose_score = ' '.join(score.format().split()[2:])
float_score = score.score
return Metric('BLEU', float_score, verbose_score)
Classes
class BLEUScorer
-
Computes the multi-bleu equivalent using SacreBLEU, with tokenization option disabled.
Args
refs
- List of reference text files
hyps
- A file path, or a list of hypothesis strings or an open file handle
language
- unused
Expand source code
class BLEUScorer: """Computes the multi-bleu equivalent using SacreBLEU, with tokenization option disabled. Args: refs: List of reference text files hyps: A file path, or a list of hypothesis strings or an open file handle language: unused """ def compute(self, refs: Iterable[str], hyps: Union[str, Iterable[str], TextIO], language=None) -> Metric: if isinstance(hyps, str): hyps = read_hypothesis_file(hyps) assert isinstance(hyps, list) refs = read_reference_files(*listify(refs)) score = corpus_bleu(hyps, refs, tokenize='none') verbose_score = ' '.join(score.format().split()[2:]) float_score = score.score return Metric('BLEU', float_score, verbose_score)
Methods
def compute(self, refs: Iterable[str], hyps: Union[str, Iterable[str], TextIO], language=None) ‑> Metric
-
Expand source code
def compute(self, refs: Iterable[str], hyps: Union[str, Iterable[str], TextIO], language=None) -> Metric: if isinstance(hyps, str): hyps = read_hypothesis_file(hyps) assert isinstance(hyps, list) refs = read_reference_files(*listify(refs)) score = corpus_bleu(hyps, refs, tokenize='none') verbose_score = ' '.join(score.format().split()[2:]) float_score = score.score return Metric('BLEU', float_score, verbose_score)