def__call__(self,batch)->Tuple[Dict[str,torch.Tensor],torch.Tensor]:""" Tokenizes and collates a batch of sentences and labels. Args: | batch: List of tuples containing sentence and label. Returns: | Tuple[Dict[str, torch.Tensor], torch.Tensor]: Batch of tokenized sentences and labels. """sentences=[]labels=[]forsentence,labelinbatch:tokenized_sentence=self.tokenizer.encode(sentence,return_tensors="pt")tokenized_label=self.tokenizer.encode(label,return_tensors="pt")iflen(tokenized_sentence[0])==len(tokenized_label[0]):mask_token_label_index=torch.where(tokenized_sentence==self.tokenizer.mask_token_id,tokenized_label,-100,)sentences.append(sentence)labels.append(mask_token_label_index[0])batch_x=self.tokenizer(sentences,padding=True,return_tensors="pt",)batch_y=pad_sequence(labels,batch_first=True,padding_value=-100)returnbatch_x,batch_y
[docs]classBERTNOTTextualEntailmentCollator:""" Collator for BERTNOTTextualEntailment dataset. Args: | pretrained_tokenizer (str): Pretrained tokenizer name or path. Returns: | Tuple[Dict[str, torch.Tensor], torch.Tensor]: Batch of tokenized premises, hypotheses, and labels. """def__init__(self,pretrained_tokenizer:str)->None:
def__call__(self,batch)->Tuple[Dict[str,torch.Tensor],torch.Tensor]:""" Tokenizes and collates a batch of premises, hypotheses, and labels. Args: | batch: List of tuples containing premise, hypothesis, and label. Returns: | Tuple[Dict[str, torch.Tensor], torch.Tensor]: Batch of tokenized premises, hypotheses, and labels. """premises=[]hypotheses=[]labels=[]forp,h,linbatch:premises.append(p)hypotheses.append(h)labels.append(l)batch_x=self.tokenizer(premises,hypotheses,padding=True,return_tensors="pt")batch_y=torch.tensor(labels)returnbatch_x,batch_y