123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- from abc import ABC
- import torch
- class ABCBloomConstraint(ABC):
- """
- Base class of all kind of decoding constraints. It can be used to implement a new constraint.
- """
- def __init__(self) -> None:
- pass
- def __call__(self, tokens_id: torch.Tensor, logits: torch.Tensor, hypo_ids: torch.Tensor) -> torch.Tensor:
- """
- This method is called by the decoding algorithm to apply the constraint. It changes and returns new logits.
- :param tokens_id: The token id of the last choosen token.
- :param logits: The logits from the Bloom model.
- :param hypo_ids: The hypothesis ids of the last tokens.
- """
- pass
- class MaxNewTokensConstraint(ABCBloomConstraint):
- """
- Constraint that forbids to generate more than max_new_tokens tokens after the prefix.
- Args:
- prefix: The prefix of the sequence.
- max_new_tokens: The maximum number of tokens that can be generated after the prefix.
- eos_token_id: The id of the end of sentence token.
- pad_token_id: The id of the padding token.
- min_logits: The minimum logits that can be generated. Default: -1e6.
- """
- def __init__(
- self, prefix: torch.Tensor, max_new_tokens: int, eos_token_id: int, pad_token_id: int, min_logits: float = -1e8
- ) -> None:
- self.max_new_tokens = max_new_tokens
- self.current_generated_tokens = None
- self.eos_token_id = eos_token_id
- self.min_logits = min_logits
- max_pad_size = (prefix == pad_token_id).sum(1).unsqueeze(1).max()
- self.current_generated_tokens = (prefix == pad_token_id).sum(1).unsqueeze(1) - max_pad_size
- def __call__(self, tokens_id: torch.Tensor, logits: torch.Tensor, hypo_ids: torch.Tensor) -> torch.Tensor:
- if tokens_id is not None:
- self.current_generated_tokens += 1
- mask = self.current_generated_tokens >= self.max_new_tokens
- logits += self.min_logits * mask
- logits[mask[:, 0], self.eos_token_id] = 0
- return logits
- class EosConstraint(ABCBloomConstraint):
- """
- This constrained repeats EOS token if it was generated on the previous step.
- Args:
- prefix: The prefix of the sequence.
- eos_token_id: The id of the end of sentence token.
- pad_token_id: The id of the padding token.
- min_logits: The minimum logits that can be generated. Default: -1e6.
- """
- def __init__(self, prefix: torch.Tensor, eos_token_id: int, pad_token_id: int, min_logits: float = -1e8) -> None:
- self.eos_token_id = eos_token_id
- self.min_logits = min_logits
- self.past_tokens = None
- self.wait_until_starting = (prefix == pad_token_id).sum(1).unsqueeze(1)
- def __call__(self, tokens_id: torch.Tensor, logits: torch.Tensor, hypo_ids: torch.Tensor) -> torch.Tensor:
- if self.past_tokens is not None:
- mask = (self.wait_until_starting < 0) & (self.past_tokens == self.eos_token_id)
- logits += self.min_logits * mask
- logits[mask[:, 0], self.eos_token_id] = 0
- if tokens_id is not None:
- self.past_tokens = tokens_id
- self.wait_until_starting -= 1
- return logits
|