You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When I use my own dataset to train the BEIT network, there is no problem with the training process, but the following error is reported during validation, what is the reason?
#3812
Open
vGuiGui6 opened this issue
Nov 12, 2024
· 1 comment
11/12 04:39:27 - mmengine - INFO - Exp name: beit-base_upernet_8xb2-160k_parcel20k-640x640_20241111_235427
11/12 04:39:27 - mmengine - INFO - Iter(train) [ 2000/20000] base_lr: 2.9906e-05 lr: 7.6016e-06 eta: 1 day, 18:42:25 time: 8.5668 data_time: 0.0020 memory: 11904 loss: 0.4703 decode.loss_ce: 0.3251 decode.acc_seg: 75.8877 aux.loss_ce: 0.1451 aux.acc_seg: 76.0485
11/12 04:39:27 - mmengine - INFO - Saving checkpoint at 2000 iterations
Traceback (most recent call last):
File "E:\Net\mmsegmentation\tools\train.py", line 106, in
main()
File "E:\Net\mmsegmentation\tools\train.py", line 102, in main
runner.train()
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\mmengine\runner\runner.py", line 1777, in train
model = self.train_loop.run() # type: ignore
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\mmengine\runner\loops.py", line 294, in run
self.runner.val_loop.run()
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\mmengine\runner\loops.py", line 373, in run
self.run_iter(idx, data_batch)
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\torch\autograd\grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\mmengine\runner\loops.py", line 393, in run_iter
outputs = self.runner.model.val_step(data_batch)
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\mmengine\model\base_model\base_model.py", line 133, in val_step
return self._run_forward(data, mode='predict') # type: ignore
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\mmengine\model\base_model\base_model.py", line 361, in _run_forward
results = self(**data, mode=mode)
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\torch\nn\modules\module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "E:\Net\mmsegmentation\mmseg\models\segmentors\base.py", line 96, in forward
return self.predict(inputs, data_samples)
File "E:\Net\mmsegmentation\mmseg\models\segmentors\encoder_decoder.py", line 220, in predict
seg_logits = self.inference(inputs, batch_img_metas)
File "E:\Net\mmsegmentation\mmseg\models\segmentors\encoder_decoder.py", line 341, in inference
seg_logit = self.slide_inference(inputs, batch_img_metas)
File "E:\Net\mmsegmentation\mmseg\models\segmentors\encoder_decoder.py", line 283, in slide_inference
crop_seg_logit = self.encode_decode(crop_img, batch_img_metas)
File "E:\Net\mmsegmentation\mmseg\models\segmentors\encoder_decoder.py", line 128, in encode_decode
x = self.extract_feat(inputs)
File "E:\Net\mmsegmentation\mmseg\models\segmentors\encoder_decoder.py", line 119, in extract_feat
x = self.backbone(inputs)
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\torch\nn\modules\module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "E:\Net\mmsegmentation\mmseg\models\backbones\beit.py", line 535, in forward
x = layer(x)
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\torch\nn\modules\module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "E:\Net\mmsegmentation\mmseg\models\backbones\beit.py", line 221, in forward
x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x)))
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\torch\nn\modules\module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "E:\Net\mmsegmentation\mmseg\models\backbones\beit.py", line 142, in forward
attn = attn + relative_position_bias.unsqueeze(0)
RuntimeError: The size of tensor a (1025) must match the size of tensor b (1601) at non-singleton dimension 3
Process finished with exit code 1
The text was updated successfully, but these errors were encountered:
Due to the separation of the training and test sets, their cropping resolutions are different. As a result, your convolution cannot evenly divide the images in the test set, leading to tensor concatenation errors. You can try adding size_divisor=32 to the data_preprocessor section in your configuration file for the test set, which might solve your problem. I encountered a similar issue before.
11/12 04:39:27 - mmengine - INFO - Exp name: beit-base_upernet_8xb2-160k_parcel20k-640x640_20241111_235427
11/12 04:39:27 - mmengine - INFO - Iter(train) [ 2000/20000] base_lr: 2.9906e-05 lr: 7.6016e-06 eta: 1 day, 18:42:25 time: 8.5668 data_time: 0.0020 memory: 11904 loss: 0.4703 decode.loss_ce: 0.3251 decode.acc_seg: 75.8877 aux.loss_ce: 0.1451 aux.acc_seg: 76.0485
11/12 04:39:27 - mmengine - INFO - Saving checkpoint at 2000 iterations
Traceback (most recent call last):
File "E:\Net\mmsegmentation\tools\train.py", line 106, in
main()
File "E:\Net\mmsegmentation\tools\train.py", line 102, in main
runner.train()
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\mmengine\runner\runner.py", line 1777, in train
model = self.train_loop.run() # type: ignore
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\mmengine\runner\loops.py", line 294, in run
self.runner.val_loop.run()
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\mmengine\runner\loops.py", line 373, in run
self.run_iter(idx, data_batch)
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\torch\autograd\grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\mmengine\runner\loops.py", line 393, in run_iter
outputs = self.runner.model.val_step(data_batch)
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\mmengine\model\base_model\base_model.py", line 133, in val_step
return self._run_forward(data, mode='predict') # type: ignore
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\mmengine\model\base_model\base_model.py", line 361, in _run_forward
results = self(**data, mode=mode)
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\torch\nn\modules\module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "E:\Net\mmsegmentation\mmseg\models\segmentors\base.py", line 96, in forward
return self.predict(inputs, data_samples)
File "E:\Net\mmsegmentation\mmseg\models\segmentors\encoder_decoder.py", line 220, in predict
seg_logits = self.inference(inputs, batch_img_metas)
File "E:\Net\mmsegmentation\mmseg\models\segmentors\encoder_decoder.py", line 341, in inference
seg_logit = self.slide_inference(inputs, batch_img_metas)
File "E:\Net\mmsegmentation\mmseg\models\segmentors\encoder_decoder.py", line 283, in slide_inference
crop_seg_logit = self.encode_decode(crop_img, batch_img_metas)
File "E:\Net\mmsegmentation\mmseg\models\segmentors\encoder_decoder.py", line 128, in encode_decode
x = self.extract_feat(inputs)
File "E:\Net\mmsegmentation\mmseg\models\segmentors\encoder_decoder.py", line 119, in extract_feat
x = self.backbone(inputs)
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\torch\nn\modules\module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "E:\Net\mmsegmentation\mmseg\models\backbones\beit.py", line 535, in forward
x = layer(x)
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\torch\nn\modules\module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "E:\Net\mmsegmentation\mmseg\models\backbones\beit.py", line 221, in forward
x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x)))
File "D:\ProgramData\Anaconda3\envs\open-mmlab\lib\site-packages\torch\nn\modules\module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "E:\Net\mmsegmentation\mmseg\models\backbones\beit.py", line 142, in forward
attn = attn + relative_position_bias.unsqueeze(0)
RuntimeError: The size of tensor a (1025) must match the size of tensor b (1601) at non-singleton dimension 3
Process finished with exit code 1
The text was updated successfully, but these errors were encountered: