mbrs.metrics.bertscore module

mbrs.metrics.bertscore module#

class mbrs.metrics.bertscore.BERTScoreScoreType(value)[source]#

Bases: int, Enum

An enumeration.

f1 = 2#

precision = 0#

recall = 1#

class mbrs.metrics.bertscore.MetricBERTScore(cfg: Config)[source]#

Bases: MetricCacheable

BERTScore metric class.

class Cache(embeddings: list[Tensor], idf_weights: list[Tensor])[source]#

Bases: Cache

Intermediate representations of sentences.

embeddings (list[Tensor]): A list of token embeddings of shape (T, D),
where T is the length of sequence, and D is a size of the embedding.
idf_weights (list[Tensor]): A list of IDF weights of shape (T,).

embeddings: list[Tensor]#

idf_weights: list[Tensor]#

repeat(n: int) → Cache[source]#

Repeat the representations by n times.

Parameters:: n (int) – The number of repetition.
Returns:: The repeated cache.
Return type:: Cache

class Config(score_type: BERTScoreScoreType = BERTScoreScoreType.f1, model_type: str | None = None, num_layers: int | None = None, batch_size: int = 64, nthreads: int = 4, idf: bool = False, idf_sents: list[str] | None = None, lang: str | None = None, rescale_with_baseline: bool = False, baseline_path: str | None = None, use_fast_tokenizer: bool = False, fp16: bool = False, bf16: bool = False, cpu: bool = False)[source]#

Bases: Config

BERTScore metric configuration.

score_type (BERTScoreScoreType): The output score type, i.e.,
precision, recall, or f1.
model_type (str): Contexual embedding model specification, default using the
suggested model for the target langauge; has to specify at least one of model_type or lang.
num_layers (int): The layer of representation to use. Default using the number
of layer tuned on WMT16 correlation data.
idf (bool): A booling to specify whether to use idf or not. (This should be
True even if idf_sents is given.)
idf_sents (list[str]): List of sentences used to compute the idf weights.
batch_size (int): Bert score processing batch size
nthreads (int): Number of threads.
lang (str): Language of the sentences; has to specify at least one of
model_type or lang. lang needs to be specified when rescale_with_baseline is True.
rescale_with_baseline (bool): Rescale bertscore with pre-computed baseline.
baseline_path (str): Customized baseline file.
use_fast_tokenizer (bool): use_fast parameter passed to HF tokenizer.
fp16 (bool): Use float16 for the forward computation.
bf16 (bool): Use bfloat16 for the forward computation.
cpu (bool): Use CPU for the forward computation.

baseline_path: str | None = None#

batch_size: int = 64#

bf16: bool = False#

cpu: bool = False#

fp16: bool = False#

idf: bool = False#

idf_sents: list[str] | None = None#

lang: str | None = None#

model_type: str | None = None#

nthreads: int = 4#

num_layers: int | None = None#

rescale_with_baseline: bool = False#

score_type: BERTScoreScoreType = 2#

use_fast_tokenizer: bool = False#

cfg: Config#

corpus_score(hypotheses: list[str], references_lists: list[list[str]], sources: list[str] | None = None) → float[source]#

Calculate the corpus-level score.

Parameters:

hypotheses (list[str]) – Hypotheses.
references_lists (list[list[str]]) – Lists of references.
sources (list[str], optional) – Sources.

Returns:

The corpus score.

Return type:

float

property device: device#: Returns the device of the model.

property embed_dim: int#: Return the size of embedding dimension.

encode(sentences: list[str]) → Cache[source]#

Encode the given sentences into their intermediate representations.

Parameters:

sentences (list[str]) – Input sentences.

Returns:

Intermediate representations of shape (N, D) where N is the: number of hypotheses and D is a size of the embedding dimension.

Return type:

Tensor

idf_dict: dict[int, float]#

out_proj(hypotheses_ir: Cache, references_ir: Cache, sources_ir: Cache | None = None) → Tensor[source]#

Forward the output projection layer.

Parameters:

hypotheses_ir (Cache) – N intermediate representations of hypotheses.
references_ir (Cache) – N intermediate representations of references.
sources_ir (Cache, optional) – N intermediate representations of sources.

Returns:

N scores.

Return type:

Tensor

pad_sequence(tensors: list[Tensor]) → Tensor[source]#

pairwise_scores(hypotheses: list[str], references: list[str], *_, **__) → Tensor[source]#

Calculate the pairwise scores.

Parameters:

hypotheses (list[str]) – Hypotheses.
references (list[str]) – References.

Returns:

Score matrix of shape (H, R), where H is the number: of hypotheses and R is the number of references.

Return type:

Tensor

scorer: BERTScorer#

scores(hypotheses: list[str], references: list[str], *_, **__) → Tensor[source]#

Calculate the scores of the given hypothesis.

Parameters:

hypotheses (list[str]) – N hypotheses.
references (list[str]) – N references.

Returns:

The N scores of the given hypotheses.

Return type:

Tensor

tokenizer: PreTrainedTokenizerBase#

mbrs.metrics.bertscore module

Contents

mbrs.metrics.bertscore module#