mbrs.metrics.bleu module

mbrs.metrics.bleu module#

class mbrs.metrics.bleu.MetricBLEU(cfg: Config)[source]#

Bases: MetricAggregatable

BLEU metric class.

class AggregatedReference(ngrams: Counter[tuple[str, ...]], length: float)[source]#

Bases: object

Aggregated reference representation.

ngrams (Counter[tuple[str, …]]): Bags of expected n-gram counts.
length (float): Expected length of references.

length: float#

ngrams: Counter[tuple[str, ...]]#

class Config(lowercase: bool = False, force: bool = False, tokenize: str | None = None, smooth_method: str = 'exp', smooth_value: float | None = None, max_ngram_order: int = 4, effective_order: bool = True, trg_lang: str = '', num_workers: int = 8)[source]#

Bases: Config

BLEU metric configuration.

lowercase (bool): If True, lowercased BLEU is computed.
force (bool): Ignore data that looks already tokenized.
tokenize (str, optional): The tokenizer to use. If None, defaults to language-specific tokenizers with ‘13a’ as the fallback default.
smooth_method (str): The smoothing method to use (‘floor’, ‘add-k’, ‘exp’ or ‘none’).
smooth_value (float, optional): The smoothing value for floor and add-k methods. None falls back to default value.
max_ngram_order (int): If given, it overrides the maximum n-gram order (default: 4) when computing precisions.
effective_order (bool): If True, stop including n-gram orders for which precision is 0. This should be True, if sentence-level BLEU will be computed. (default: True)
trg_lang (str): An optional language code to raise potential tokenizer warnings.
num_workers (int): Number of workers for multiprocessing.

effective_order: bool = True#

force: bool = False#

lowercase: bool = False#

max_ngram_order: int = 4#

num_workers: int = 8#

smooth_method: str = 'exp'#

smooth_value: float | None = None#

tokenize: str | None = None#

trg_lang: str = ''#

cfg: Config#

corpus_score(hypotheses: list[str], references_lists: list[list[str]], sources: list[str] | None = None) → float[source]#

Calculate the corpus-level score.

Parameters:

hypotheses (list[str]) – Hypotheses.
references_lists (list[list[str]]) – Lists of references.
sources (list[str], optional) – Sources.

Returns:

The corpus score.

Return type:

float

expected_scores_reference_aggregation(hypotheses: list[str], references: list[str], source: str | None = None, reference_lprobs: Tensor | None = None) → Tensor[source]#

Calculate the expected scores for each hypothesis.

Parameters:

hypotheses (list[str]) – Hypotheses.
references (list[str]) – References.
source (str, optional) – A source.
reference_lprobs (Tensor, optional) – Log-probabilities for each reference sample. The shape must be (len(references),). See https://arxiv.org/abs/2311.05263.

Returns:

The expected scores for each hypothesis.

Return type:

Tensor

pairwise_scores(hypotheses: list[str], references: list[str], *_, **__) → Tensor[source]#

Calculate the pairwise scores.

Parameters:

hypotheses (list[str]) – Hypotheses.
references (list[str]) – References.

Returns:

Score matrix of shape (H, R), where H is the number: of hypotheses and R is the number of references.

Return type:

Tensor

score(hypothesis: str, reference: str, *_, **__) → float[source]#

Calculate the score of the given hypothesis.

Parameters:

hypothesis (str) – Hypothesis.
reference (str) – Reference.

Returns:

The score of the given hypothesis.

Return type:

float

scores(hypotheses: list[str], references: list[str], *_, **__) → Tensor[source]#

Calculate the scores of the given hypotheses.

Parameters:

hypotheses (list[str]) – N hypotheses.
references (list[str]) – N references.

Returns:

The N scores of the given hypotheses.

Return type:

Tensor

mbrs.metrics.bleu module

Contents

mbrs.metrics.bleu module#