mbrs.metrics.base module#

class mbrs.metrics.base.Metric(cfg: Config)[source]#

Bases: MetricBase

Base metric class.

corpus_score(hypotheses: list[str], references_lists: list[list[str]], sources: list[str] | None = None) float[source]#

Calculate the corpus-level score.

Parameters:
  • hypotheses (list[str]) – Hypotheses.

  • references_lists (list[list[str]]) – Lists of references.

  • sources (list[str], optional) – Sources.

Returns:

The corpus score.

Return type:

float

expected_scores(hypotheses: list[str], references: list[str], source: str | None = None, reference_lprobs: Tensor | None = None) Tensor[source]#

Calculate the expected scores for each hypothesis.

Parameters:
  • hypotheses (list[str]) – Hypotheses.

  • references (list[str]) – References.

  • source (str, optional) – A source.

  • reference_lprobs (Tensor, optional) – Log-probabilities for each reference sample. The shape must be (len(references),). See https://arxiv.org/abs/2311.05263.

Returns:

The expected scores for each hypothesis.

Return type:

Tensor

pairwise_scores(hypotheses: list[str], references: list[str], source: str | None = None) Tensor[source]#

Calculate the pairwise scores.

Parameters:
  • hypotheses (list[str]) – Hypotheses.

  • references (list[str]) – References.

  • source (str, optional) – A source.

Returns:

Score matrix of shape (H, R), where H is the number

of hypotheses and R is the number of references.

Return type:

Tensor

abstract score(hypothesis: str, reference: str, source: str | None = None) float[source]#

Calculate the score of the given hypothesis.

Parameters:
  • hypothesis (str) – A hypothesis.

  • reference (str) – A reference.

  • source (str, optional) – A source.

Returns:

The score of the given hypothesis.

Return type:

float

scores(hypotheses: list[str], references: list[str], sources: list[str] | None = None) Tensor[source]#

Calculate the scores of the given hypotheses.

Parameters:
  • hypotheses (list[str]) – N hypotheses.

  • references (list[str]) – N references.

  • sources (list[str], optional) – N sources.

Returns:

The N scores of the given hypotheses.

Return type:

Tensor

class mbrs.metrics.base.MetricAggregatable(cfg: Config)[source]#

Bases: Metric

Base class for aggregatable metrics.

This class supports reference aggregation.

abstract expected_scores_reference_aggregation(hypotheses: list[str], references: list[str], source: str | None = None, reference_lprobs: Tensor | None = None) Tensor[source]#

Calculate the expected scores for each hypothesis.

Parameters:
  • hypotheses (list[str]) – Hypotheses.

  • references (list[str]) – References.

  • source (str, optional) – A source.

  • reference_lprobs (Tensor, optional) – Log-probabilities for each reference sample. The shape must be (len(references),). See https://arxiv.org/abs/2311.05263.

Returns:

The expected scores for each hypothesis.

Return type:

Tensor

class mbrs.metrics.base.MetricAggregatableCache(cfg: Config)[source]#

Bases: MetricAggregatable, MetricCacheable

Base class for metrics that can aggregate the cache.

This class supports to aggregate intermediate representations of sentences.

class Cache[source]#

Bases: Cache

Intermediate representations of sentences.

abstract aggregate(reference_lprobs: Tensor | None = None) Cache[source]#

Aggregate the cached representations.

Parameters:

reference_lprobs (Tensor, optional) – Log-probabilities for each reference sample. The shape must be (len(references),). See https://arxiv.org/abs/2311.05263.

Returns:

An aggregated representation.

Return type:

Cache

cluster(kmeans: Kmeans) tuple[Cache, Tensor][source]#

Cluster the cached representations.

Parameters:

kmeans (Kmeans) – k-means class to perform clustering.

Returns:

  • Cache: Centroid representations.

  • Tensor: N assigned IDs.

Return type:

tuple[Cache, Tensor]

expected_scores_reference_aggregation(hypotheses: list[str], references: list[str], source: str | None = None, reference_lprobs: Tensor | None = None) Tensor[source]#

Calculate the expected scores for each hypothesis.

Parameters:
  • hypotheses (list[str]) – Hypotheses.

  • references (list[str]) – References.

  • source (str, optional) – A source.

  • reference_lprobs (Tensor, optional) – Log-probabilities for each reference sample. The shape must be (len(references),). See https://arxiv.org/abs/2311.05263.

Returns:

The expected scores for each hypothesis.

Return type:

Tensor

class mbrs.metrics.base.MetricBase(cfg: Config)[source]#

Bases: ABC

Base metric class.

class Config[source]#

Bases: object

HIGHER_IS_BETTER: bool = True#
property device: device#

Returns the device of the metric object.

class mbrs.metrics.base.MetricCacheable(cfg: Config)[source]#

Bases: Metric

Base class for cacheable metrics.

This class supports to cache intermediate representations of sentences.

class Cache[source]#

Bases: object

Intermediate representations of sentences.

abstract repeat(n: int) Cache[source]#

Repeat the representations by n times.

Parameters:

n (int) – The number of repetition.

Returns:

The repeated cache.

Return type:

Cache

abstract property embed_dim: int#

Return the size of embedding dimension.

abstract encode(sentences: list[str]) Cache[source]#

Encode the given sentences into their intermediate representations.

Parameters:

sentences (list[str]) – Input sentences.

Returns:

Intermediate representations.

Return type:

MetricCacheable.Cache

abstract out_proj(hypotheses_ir: Cache, references_ir: Cache, sources_ir: Cache | None = None) Tensor[source]#

Forward the output projection layer.

Parameters:
  • hypotheses_ir (Cache) – N intermediate representations of hypotheses.

  • references_ir (Cache) – N intermediate representations of references.

  • sources_ir (Cache, optional) – N intermediate representations of sources.

Returns:

N scores.

Return type:

Tensor

pairwise_scores(hypotheses: list[str], references: list[str], source: str | None = None) Tensor[source]#

Calculate the pairwise scores.

Parameters:
  • hypotheses (list[str]) – Hypotheses.

  • references (list[str]) – References.

  • source (str, optional) – A source.

Returns:

Score matrix of shape (H, R), where H is the number

of hypotheses and R is the number of references.

Return type:

Tensor

pairwise_scores_from_ir(hypotheses_ir: Cache, references_ir: Cache, source_ir: Cache | None = None) Tensor[source]#

Calculate the pairwise scores from the intermediate representations.

Parameters:
  • hypotheses_ir (Cache) – Hypotheses.

  • references_ir (Cache) – References.

  • source_ir (Cache, optional) – A source.

Returns:

Score matrix of shape (H, R), where H is the number

of hypotheses and R is the number of references.

Return type:

Tensor

score(hypothesis: str, reference: str, source: str | None = None) float[source]#

Calculate the score of the given hypothesis.

Parameters:
  • hypothesis (str) – A hypothesis.

  • reference (str) – A reference.

  • source (str, optional) – A source.

Returns:

The score of the given hypothesis.

Return type:

float

scores(hypotheses: list[str], references: list[str], sources: list[str] | None = None) Tensor[source]#

Calculate the scores of the given hypotheses.

Parameters:
  • hypotheses (list[str]) – N hypotheses.

  • references (list[str]) – N references.

  • source (list[str], optional) – N sources.

Returns:

The N scores of the given hypotheses.

Return type:

Tensor

scores_from_ir(hypotheses_ir: Cache, references_ir: Cache, sources_ir: Cache | None = None) Tensor[source]#

Calculate the scores of the given hypotheses from the intermediate representations.

Parameters:
  • hypotheses_ir (Cache) – N hypotheses.

  • references_ir (Cache) – N references.

  • sources_ir (Cache, optional) – N sources.

Returns:

The N scores of the given hypotheses.

Return type:

Tensor

class mbrs.metrics.base.MetricReferenceless(cfg: Config)[source]#

Bases: MetricBase

Base class for reference-less metrics like quality estimation.

corpus_score(hypotheses: list[str], sources: list[str]) float[source]#

Calculate the corpus-level score.

Parameters:
  • hypotheses (list[str]) – Hypotheses.

  • sources (list[str]) – Sources.

Returns:

The corpus score.

Return type:

float

abstract score(hypothesis: str, source: str) float[source]#

Calculate the score of the given hypothesis.

Parameters:
  • hypothesis (str) – A hypothesis.

  • source (str) – A source.

Returns:

The score of the given hypothesis.

Return type:

float

scores(hypotheses: list[str], sources: list[str]) Tensor[source]#

Calculate the scores of hypotheses.

Parameters:
  • hypotheses (list[str]) – N hypotheses.

  • sources (list[str]) – N sources.

Returns:

The scores of hypotheses.

Return type:

Tensor