You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
warnings.warn(
Traceback (most recent call last):
File "test_train.small.gemma.infini.py", line 150, in
trainer.train()
File "/transformers/src/transformers/trainer.py", line 1885, in train
return inner_training_loop(
File "/transformers/src/transformers/trainer.py", line 2216, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/transformers/src/transformers/trainer.py", line 3238, in training_step
loss = self.compute_loss(model, inputs)
File "/transformers/src/transformers/trainer.py", line 3264, in compute_loss
outputs = model(**inputs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/accelerate/utils/operations.py", line 822, in forward
return model_forward(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/accelerate/utils/operations.py", line 810, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/usr/local/lib/python3.8/dist-packages/torch/amp/autocast_mode.py", line 16, in decorate_autocast
return func(*args, **kwargs)
File "/jupyter_workspace/Raghav/InfiniTransformer/infini_gemma/modeling_infini_gemma.py", line 1613, in forward
outputs = self.model(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/jupyter_workspace/Raghav/InfiniTransformer/infini_gemma/modeling_infini_gemma.py", line 1371, in forward
layer_outputs = self._gradient_checkpointing_func(
File "/usr/local/lib/python3.8/dist-packages/torch/_compile.py", line 24, in inner
return torch._dynamo.disable(fn, recursive)(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/eval_frame.py", line 451, in _fn
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/external_utils.py", line 36, in inner
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/utils/checkpoint.py", line 487, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "/usr/local/lib/python3.8/dist-packages/torch/autograd/function.py", line 598, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/usr/local/lib/python3.8/dist-packages/torch/utils/checkpoint.py", line 262, in forward
outputs = run_function(*args)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/jupyter_workspace/Raghav/InfiniTransformer/infini_gemma/modeling_infini_gemma.py", line 1056, in forward
_attended = self.self_attn(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/jupyter_workspace/Raghav/InfiniTransformer/infini_gemma/modeling_infini_gemma.py", line 868, in forward
query_states, key_states = apply_rotary_pos_emb(
File "/jupyter_workspace/Raghav/InfiniTransformer/infini_gemma/modeling_infini_gemma.py", line 272, in apply_rotary_pos_emb
q_embed = (q * cos) + (rotate_half(q) * sin)
RuntimeError: The size of tensor a (65536) must match the size of tensor b (8192) at non-singleton dimension 2
The text was updated successfully, but these errors were encountered:
Hey @Beomi ,any clarity on this?it would be deeply appreciated.
I currently ran the custom (Type I method) training script to pre-train on dataset but I also wanted to do finetuning using HF framework but getting this error
Did anyone face this issue?
warnings.warn(
Traceback (most recent call last):
File "test_train.small.gemma.infini.py", line 150, in
trainer.train()
File "/transformers/src/transformers/trainer.py", line 1885, in train
return inner_training_loop(
File "/transformers/src/transformers/trainer.py", line 2216, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/transformers/src/transformers/trainer.py", line 3238, in training_step
loss = self.compute_loss(model, inputs)
File "/transformers/src/transformers/trainer.py", line 3264, in compute_loss
outputs = model(**inputs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/accelerate/utils/operations.py", line 822, in forward
return model_forward(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/accelerate/utils/operations.py", line 810, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/usr/local/lib/python3.8/dist-packages/torch/amp/autocast_mode.py", line 16, in decorate_autocast
return func(*args, **kwargs)
File "/jupyter_workspace/Raghav/InfiniTransformer/infini_gemma/modeling_infini_gemma.py", line 1613, in forward
outputs = self.model(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/jupyter_workspace/Raghav/InfiniTransformer/infini_gemma/modeling_infini_gemma.py", line 1371, in forward
layer_outputs = self._gradient_checkpointing_func(
File "/usr/local/lib/python3.8/dist-packages/torch/_compile.py", line 24, in inner
return torch._dynamo.disable(fn, recursive)(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/eval_frame.py", line 451, in _fn
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/external_utils.py", line 36, in inner
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/utils/checkpoint.py", line 487, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "/usr/local/lib/python3.8/dist-packages/torch/autograd/function.py", line 598, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/usr/local/lib/python3.8/dist-packages/torch/utils/checkpoint.py", line 262, in forward
outputs = run_function(*args)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/jupyter_workspace/Raghav/InfiniTransformer/infini_gemma/modeling_infini_gemma.py", line 1056, in forward
_attended = self.self_attn(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
File "/jupyter_workspace/Raghav/InfiniTransformer/infini_gemma/modeling_infini_gemma.py", line 868, in forward
query_states, key_states = apply_rotary_pos_emb(
File "/jupyter_workspace/Raghav/InfiniTransformer/infini_gemma/modeling_infini_gemma.py", line 272, in apply_rotary_pos_emb
q_embed = (q * cos) + (rotate_half(q) * sin)
RuntimeError: The size of tensor a (65536) must match the size of tensor b (8192) at non-singleton dimension 2
The text was updated successfully, but these errors were encountered: