Make applying embeddings more efficient.
Adding new tokens no longer makes a whole copy of the embeddings weight which can be massive on certain models.
This commit is contained in:
@@ -239,8 +239,11 @@ class T5(torch.nn.Module):
|
||||
def set_input_embeddings(self, embeddings):
|
||||
self.shared = embeddings
|
||||
|
||||
def forward(self, input_ids, *args, **kwargs):
|
||||
x = self.shared(input_ids, out_dtype=kwargs.get("dtype", torch.float32))
|
||||
def forward(self, input_ids, attention_mask, embeds=None, num_tokens=None, **kwargs):
|
||||
if input_ids is None:
|
||||
x = embeds
|
||||
else:
|
||||
x = self.shared(input_ids, out_dtype=kwargs.get("dtype", torch.float32))
|
||||
if self.dtype not in [torch.float32, torch.float16, torch.bfloat16]:
|
||||
x = torch.nan_to_num(x) #Fix for fp8 T5 base
|
||||
return self.encoder(x, *args, **kwargs)
|
||||
return self.encoder(x, attention_mask=attention_mask, **kwargs)
|
||||
|
||||
Reference in New Issue
Block a user