Source code for mbrs.metrics.xcomet

  1from __future__ import annotations
  2
  3import os
  4from dataclasses import dataclass
  5from typing import Optional
  6
  7import comet.encoders
  8import torch
  9from comet import download_model, load_from_checkpoint
 10from comet.encoders.base import Encoder
 11from comet.encoders.bert import BERTEncoder
 12from comet.models import XCOMETMetric
 13from huggingface_hub import PyTorchModelHubMixin
 14from torch import Tensor, nn
 15from transformers import AutoConfig, AutoModel, AutoTokenizer
 16from transformers.models.deberta_v2 import modeling_deberta_v2
 17
 18from mbrs import timer, utils
 19
 20from . import Metric, register
 21
 22

[docs]
 23class DeBERTaEncoder(BERTEncoder):
 24    """DeBERTa encoder.
 25
 26    Args:
 27        pretrained_model (str): Pretrained model from hugging face.
 28        load_pretrained_weights (bool): If set to True loads the pretrained weights
 29            from Hugging Face
 30        local_files_only (bool): Whether or not to only look at local files.
 31    """
 32
 33    def __init__(
 34        self,
 35        pretrained_model: str,
 36        load_pretrained_weights: bool = True,
 37        local_files_only: bool = False,
 38    ) -> None:
 39        super(Encoder, self).__init__()
 40        os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
 41        self.tokenizer = AutoTokenizer.from_pretrained(
 42            pretrained_model, local_files_only=local_files_only
 43        )
 44        if load_pretrained_weights:
 45            self.model = AutoModel.from_pretrained(pretrained_model)
 46        else:
 47            self.model = AutoModel.from_config(
 48                AutoConfig.from_pretrained(
 49                    pretrained_model, local_files_only=local_files_only
 50                ),
 51            )
 52        self.model.encoder.output_hidden_states = True
 53
 54        self.model.encoder.layer = nn.ModuleList(
 55            [
 56                modeling_deberta_v2.DebertaV2Layer(
 57                    AutoConfig.from_pretrained(pretrained_model)
 58                )
 59                for _ in range(self.model.config.num_hidden_layers)
 60            ]
 61        )
 62

[docs]
 63    @classmethod
 64    def from_pretrained(
 65        cls,
 66        pretrained_model: str,
 67        load_pretrained_weights: bool = True,
 68        local_files_only: bool = False,
 69    ) -> Encoder:
 70        """Function that loads a pretrained encoder from Hugging Face.
 71
 72        Args:
 73            pretrained_model (str):Name of the pretrain model to be loaded.
 74            load_pretrained_weights (bool): If set to True loads the pretrained weights
 75                from Hugging Face
 76            local_files_only (bool): Whether or not to only look at local files.
 77
 78        Returns:
 79            DeBERTaEncoder: DeBERTaEncoder object.
 80        """
 81        return DeBERTaEncoder(
 82            pretrained_model, load_pretrained_weights, local_files_only=local_files_only
 83        )

 84

[docs]
 85    def forward(
 86        self,
 87        input_ids: torch.Tensor,
 88        attention_mask: Optional[torch.Tensor] = None,
 89        token_type_ids: Optional[torch.Tensor] = None,
 90        **kwargs,
 91    ) -> dict[str, torch.Tensor]:
 92        if attention_mask is None:
 93            attention_mask = torch.ones_like(input_ids)
 94
 95        model_output = self.model(
 96            input_ids=input_ids,
 97            attention_mask=attention_mask,
 98            token_type_ids=token_type_ids,
 99            output_hidden_states=True,
100        )
101        return {
102            "sentemb": model_output.last_hidden_state[:, 0, :],
103            "wordemb": model_output.last_hidden_state,
104            "all_layers": model_output.hidden_states,
105            "attention_mask": attention_mask,
106        }


107
108

[docs]
109class XCOMETLiteMetric(XCOMETMetric, PyTorchModelHubMixin):
110    """xCOMET-Lite model."""
111
112    def __init__(
113        self,
114        encoder_model="DeBERTa",
115        pretrained_model="microsoft/mdeberta-v3-base",
116        word_layer=8,
117        validation_data=[],
118        word_level_training=True,
119        hidden_sizes=(3072, 1024),
120        load_pretrained_weights=False,
121        *args,
122        **kwargs,
123    ):
124        comet.encoders.str2encoder["DeBERTa"] = DeBERTaEncoder
125        super().__init__(
126            encoder_model=encoder_model,
127            pretrained_model=pretrained_model,
128            word_layer=word_layer,
129            layer_transformation="softmax",
130            validation_data=validation_data,
131            word_level_training=word_level_training,
132            hidden_sizes=hidden_sizes,
133            load_pretrained_weights=load_pretrained_weights,
134        )

135
136

[docs]
137@register("xcomet")
138class MetricXCOMET(Metric):
139    """XCOMET metric class.
140
141    Both XCOMET (Guerreiro et al., 2024) and XCOMET-lite (Larionov et al., 2024) are supported.
142
143    Supported models:
144        - Unbabel/XCOMET-XL
145        - Unbabel/XCOMET-XXL
146        - myyycroft/XCOMET-lite
147    """
148
149    scorer: XCOMETMetric
150

[docs]
151    @dataclass
152    class Config(Metric.Config):
153        """XCOMET metric configuration.
154
155        - model (str): Model name or path.
156        - batch_size (int): Batch size.
157        - fp16 (bool): Use float16 for the forward computation.
158        - bf16 (bool): Use bfloat16 for the forward computation.
159        - cpu (bool): Use CPU for the forward computation.
160        """
161
162        model: str = "Unbabel/XCOMET-XL"
163        batch_size: int = 8
164        fp16: bool = False
165        bf16: bool = False
166        cpu: bool = False

167
168    def __init__(self, cfg: MetricXCOMET.Config):
169        super().__init__(cfg)
170        if cfg.model == "myyycroft/XCOMET-lite":
171            self.scorer = XCOMETLiteMetric.from_pretrained(cfg.model)
172        else:
173            self.scorer = load_from_checkpoint(download_model(cfg.model))
174        self.scorer.eval()
175        for param in self.scorer.parameters():
176            param.requires_grad = False
177
178        if not cfg.cpu and torch.cuda.is_available():
179            if cfg.fp16:
180                self.scorer = self.scorer.half()
181            elif cfg.bf16:
182                self.scorer = self.scorer.bfloat16()
183            self.scorer = self.scorer.cuda()
184
185    @property
186    def device(self) -> torch.device:
187        """Returns the device of the model."""
188        return self.scorer.device
189

[docs]
190    def score(
191        self,
192        hypothesis: str,
193        reference: Optional[str] = None,
194        source: Optional[str] = None,
195    ) -> float:
196        """Calculate the score of the given hypothesis.
197
198        Args:
199            hypothesis (str): A hypothesis.
200            reference (str, optional): A reference.
201            source (str, optional): A source.
202
203        Returns:
204            float: The score of the given hypothesis.
205        """
206        inputs = {"mt": hypothesis}
207        if reference is not None:
208            inputs["ref"] = reference
209        if source is not None:
210            inputs["src"] = source
211
212        batch = self.scorer.prepare_for_inference([inputs])
213        batch = utils.to_device(batch, self.device)
214        model_output = self.scorer.predict_step(batch)
215        return model_output.scores.item()

216

[docs]
217    def scores(
218        self,
219        hypotheses: list[str],
220        references: Optional[list[str]] = None,
221        sources: Optional[list[str]] = None,
222    ) -> Tensor:
223        """Calculate the scores of the given hypothesis.
224
225        Args:
226            hypotheses (list[str]): N hypotheses.
227            references (list[str], optional): N references.
228            sources (list[str], optional): N sources.
229
230        Returns:
231            Tensor: The N scores of the given hypotheses.
232        """
233        inputs = [{"mt": hyp} for hyp in hypotheses]
234        if references is not None:
235            for d, ref in zip(inputs, references):
236                d["ref"] = ref
237        if sources is not None:
238            for d, src in zip(inputs, sources):
239                d["src"] = src
240
241        scores = []
242        with timer.measure("score") as t:
243            t.set_delta_ncalls(len(inputs))
244            for i in range(0, len(inputs), self.cfg.batch_size):
245                batch = self.scorer.prepare_for_inference(
246                    inputs[i : i + self.cfg.batch_size]
247                )
248                batch = utils.to_device(batch, self.device)
249                model_output = self.scorer.predict_step(batch)
250                scores.append(model_output.scores)
251        return torch.cat(scores).view(len(hypotheses))

252

[docs]
253    def pairwise_scores(
254        self, hypotheses: list[str], references: list[str], source: Optional[str] = None
255    ) -> Tensor:
256        """Calculate the pairwise scores.
257
258        Args:
259            hypotheses (list[str]): Hypotheses.
260            references (list[str]): References.
261            source (str, optional): A source.
262
263        Returns:
264            Tensor: Score matrix of shape `(H, R)`, where `H` is the number
265              of hypotheses and `R` is the number of references.
266        """
267        data = [
268            {"src": source, "mt": hyp, "ref": ref}
269            for hyp in hypotheses
270            for ref in references
271        ]
272        scores = []
273        with timer.measure("score") as t:
274            t.set_delta_ncalls(len(data))
275            for i in range(0, len(data), self.cfg.batch_size):
276                batch = self.scorer.prepare_for_inference(
277                    data[i : i + self.cfg.batch_size]
278                )
279                batch = utils.to_device(batch, self.device)
280                model_output = self.scorer.predict_step(batch)
281                scores.append(model_output.scores)
282        return torch.cat(scores).view(len(hypotheses), len(references))

283

[docs]
284    def corpus_score(
285        self,
286        hypotheses: list[str],
287        references_lists: Optional[list[list[str]]] = None,
288        sources: Optional[list[str]] = None,
289    ) -> float:
290        """Calculate the corpus-level score.
291
292        Args:
293            hypotheses (list[str]): Hypotheses.
294            references_lists (list[list[str]], optional): Lists of references.
295            sources (list[str], optional): Sources.
296
297        Returns:
298            float: The corpus score.
299        """
300        scores: list[Tensor] = []
301        if references_lists is None:
302            if sources is None:
303                raise ValueError(
304                    "`sources` must be given when `references_lists` is None."
305                )
306
307            for i in range(0, len(hypotheses), self.cfg.batch_size):
308                scores.append(
309                    self.scores(
310                        hypotheses[i : i + self.cfg.batch_size],
311                        None,
312                        sources[i : i + self.cfg.batch_size],
313                    )
314                    .float()
315                    .cpu()
316                )
317        else:
318            for references in references_lists:
319                for i in range(0, len(hypotheses), self.cfg.batch_size):
320                    scores.append(
321                        self.scores(
322                            hypotheses[i : i + self.cfg.batch_size],
323                            references[i : i + self.cfg.batch_size],
324                            sources[i : i + self.cfg.batch_size]
325                            if sources is not None
326                            else None,
327                        )
328                        .float()
329                        .cpu()
330                    )
331        return torch.cat(scores).mean().item()