From 152c0fb257db11bd3b682b532538dc4145aa9166 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9E=A5=EC=8A=B9=EB=AF=BC?= Date: Tue, 14 Mar 2023 08:24:29 +0000 Subject: [PATCH 1/4] Add padding direction --- torchtext/transforms.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/torchtext/transforms.py b/torchtext/transforms.py index 4684d58080..4f6b67dcd9 100644 --- a/torchtext/transforms.py +++ b/torchtext/transforms.py @@ -240,10 +240,11 @@ class PadTransform(Module): :type pad_value: bool """ - def __init__(self, max_length: int, pad_value: int) -> None: + def __init__(self, max_length: int, pad_value: int, right_pad: bool=True) -> None: super().__init__() self.max_length = max_length self.pad_value = float(pad_value) + self.right_pad = right_pad def forward(self, x: Tensor) -> Tensor: """ @@ -255,7 +256,10 @@ def forward(self, x: Tensor) -> Tensor: max_encoded_length = x.size(-1) if max_encoded_length < self.max_length: pad_amount = self.max_length - max_encoded_length - x = torch.nn.functional.pad(x, (0, pad_amount), value=self.pad_value) + if self.right_pad: + x = torch.nn.functional.pad(x, (0, pad_amount), value=self.pad_value) + else: + x = torch.nn.functional.pad(x, (pad_amount, 0), value=self.pad_value) return x From 16edbd13251c3294e5c8dccbdc067e68a52b4d4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9E=A5=EC=8A=B9=EB=AF=BC?= Date: Tue, 14 Mar 2023 08:47:51 +0000 Subject: [PATCH 2/4] Add padding direction --- torchtext/transforms.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/torchtext/transforms.py b/torchtext/transforms.py index 4f6b67dcd9..b44a0f76af 100644 --- a/torchtext/transforms.py +++ b/torchtext/transforms.py @@ -237,7 +237,9 @@ class PadTransform(Module): :param max_length: Maximum length to pad to :type max_length: int :param pad_value: Value to pad the tensor with - :type pad_value: bool + :type pad_value: int + :param right_pad: Whether to insert pad at right or left + :type pad_value: bool, defaults to True(right) """ def __init__(self, max_length: int, pad_value: int, right_pad: bool=True) -> None: From bb1f1c0418bdab3a987cdf4adc726bf14c00ee69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9E=A5=EC=8A=B9=EB=AF=BC?= Date: Tue, 21 Mar 2023 01:27:00 +0000 Subject: [PATCH 3/4] follow flake8 and name --- torchtext/transforms.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/torchtext/transforms.py b/torchtext/transforms.py index b44a0f76af..eb2b3de7bf 100644 --- a/torchtext/transforms.py +++ b/torchtext/transforms.py @@ -238,15 +238,15 @@ class PadTransform(Module): :type max_length: int :param pad_value: Value to pad the tensor with :type pad_value: int - :param right_pad: Whether to insert pad at right or left - :type pad_value: bool, defaults to True(right) + :param begin: Whether to insert pad_value at start or end, defaults to True + :type begin: bool """ - def __init__(self, max_length: int, pad_value: int, right_pad: bool=True) -> None: + def __init__(self, max_length: int, pad_value: int, begin: bool = True) -> None: super().__init__() self.max_length = max_length self.pad_value = float(pad_value) - self.right_pad = right_pad + self.begin = begin def forward(self, x: Tensor) -> Tensor: """ @@ -258,10 +258,10 @@ def forward(self, x: Tensor) -> Tensor: max_encoded_length = x.size(-1) if max_encoded_length < self.max_length: pad_amount = self.max_length - max_encoded_length - if self.right_pad: - x = torch.nn.functional.pad(x, (0, pad_amount), value=self.pad_value) - else: + if self.begin: x = torch.nn.functional.pad(x, (pad_amount, 0), value=self.pad_value) + else: + x = torch.nn.functional.pad(x, (0, pad_amount), value=self.pad_value) return x From 4846f9b12c20b8748584e5e4bb329a7cb23729c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9E=A5=EC=8A=B9=EB=AF=BC?= Date: Tue, 21 Mar 2023 02:43:14 +0000 Subject: [PATCH 4/4] Add PadTransform Unittest - begin, end --- test/torchtext_unittest/test_transforms.py | 80 +++++++++++++++------- torchtext/transforms.py | 4 +- 2 files changed, 58 insertions(+), 26 deletions(-) diff --git a/test/torchtext_unittest/test_transforms.py b/test/torchtext_unittest/test_transforms.py index 618cbca38f..c0cf8171c8 100644 --- a/test/torchtext_unittest/test_transforms.py +++ b/test/torchtext_unittest/test_transforms.py @@ -218,42 +218,74 @@ def _pad_transform(self, test_scripting): input_1d_tensor = torch.ones(5) input_2d_tensor = torch.ones((8, 5)) - pad_long = transforms.PadTransform(max_length=7, pad_value=0) + pad_long_end = PadTransform(max_length=7, pad_value=0, begin=False) + pad_long_begin = PadTransform(max_length=7, pad_value=0, begin=True) if test_scripting: - pad_long = torch.jit.script(pad_long) - padded_1d_tensor_actual = pad_long(input_1d_tensor) - padded_1d_tensor_expected = torch.cat([torch.ones(5), torch.zeros(2)]) + pad_long_end = torch.jit.script(pad_long_end) + pad_long_begin = torch.jit.script(pad_long_begin) + padded_1d_tensor_actual_end = pad_long_end(input_1d_tensor) + padded_1d_tensor_expected_end = torch.cat([torch.ones(5), torch.zeros(2)]) torch.testing.assert_close( - padded_1d_tensor_actual, - padded_1d_tensor_expected, - msg=f"actual: {padded_1d_tensor_actual}, expected: {padded_1d_tensor_expected}", + padded_1d_tensor_actual_end, + padded_1d_tensor_expected_end, + msg=f"actual: {padded_1d_tensor_actual_end}, expected: {padded_1d_tensor_expected_end}", + ) + padded_1d_tensor_actual_begin = pad_long_begin(input_1d_tensor) + padded_1d_tensor_expected_begin = torch.cat([torch.zeros(2), torch.ones(5)]) + torch.testing.assert_close( + padded_1d_tensor_actual_begin, + padded_1d_tensor_expected_begin, + msg=f"actual: {padded_1d_tensor_actual_begin}, expected: {padded_1d_tensor_expected_begin}", ) - padded_2d_tensor_actual = pad_long(input_2d_tensor) - padded_2d_tensor_expected = torch.cat([torch.ones(8, 5), torch.zeros(8, 2)], axis=-1) + padded_2d_tensor_actual_end = pad_long_end(input_2d_tensor) + padded_2d_tensor_expected_end = torch.cat([torch.ones(8, 5), torch.zeros(8, 2)], axis=-1) + torch.testing.assert_close( + padded_2d_tensor_actual_end, + padded_2d_tensor_expected_end, + msg=f"actual: {padded_2d_tensor_actual_end}, expected: {padded_2d_tensor_expected_end}", + ) + padded_2d_tensor_actual_begin = pad_long_begin(input_2d_tensor) + padded_2d_tensor_expected_begin = torch.cat([torch.zeros(8, 2), torch.ones(8, 5),], axis=-1) torch.testing.assert_close( - padded_2d_tensor_actual, - padded_2d_tensor_expected, - msg=f"actual: {padded_2d_tensor_actual}, expected: {padded_2d_tensor_expected}", + padded_2d_tensor_actual_begin, + padded_2d_tensor_expected_begin, + msg=f"actual: {padded_2d_tensor_actual_begin}, expected: {padded_2d_tensor_expected_begin}", ) - pad_short = transforms.PadTransform(max_length=3, pad_value=0) + pad_short_end = PadTransform(max_length=3, pad_value=0) + pad_short_begin = PadTransform(max_length=3, pad_value=0, begin=True) if test_scripting: - pad_short = torch.jit.script(pad_short) - padded_1d_tensor_actual = pad_short(input_1d_tensor) - padded_1d_tensor_expected = input_1d_tensor + pad_short_end = torch.jit.script(pad_short_end) + pad_short_begin = torch.jit.script(pad_short_begin) + padded_1d_tensor_actual_end = pad_short_end(input_1d_tensor) + padded_1d_tensor_expected_end = input_1d_tensor torch.testing.assert_close( - padded_1d_tensor_actual, - padded_1d_tensor_expected, - msg=f"actual: {padded_1d_tensor_actual}, expected: {padded_1d_tensor_expected}", + padded_1d_tensor_actual_end, + padded_1d_tensor_expected_end, + msg=f"actual: {padded_1d_tensor_actual_end}, expected: {padded_1d_tensor_expected_end}", + ) + padded_1d_tensor_actual_begin = pad_short_begin(input_1d_tensor) + padded_1d_tensor_expected_begin = input_1d_tensor + torch.testing.assert_close( + padded_1d_tensor_actual_begin, + padded_1d_tensor_expected_begin, + msg=f"actual: {padded_1d_tensor_actual_begin}, expected: {padded_1d_tensor_expected_begin}", ) - padded_2d_tensor_actual = pad_short(input_2d_tensor) - padded_2d_tensor_expected = input_2d_tensor + padded_2d_tensor_actual_end = pad_short_end(input_2d_tensor) + padded_2d_tensor_expected_end = input_2d_tensor + torch.testing.assert_close( + padded_2d_tensor_actual_end, + padded_2d_tensor_expected_end, + msg=f"actual: {padded_2d_tensor_actual_end}, expected: {padded_2d_tensor_expected_end}", + ) + padded_2d_tensor_actual_begin = pad_short_begin(input_2d_tensor) + padded_2d_tensor_expected_begin = input_2d_tensor torch.testing.assert_close( - padded_2d_tensor_actual, - padded_2d_tensor_expected, - msg=f"actual: {padded_2d_tensor_actual}, expected: {padded_2d_tensor_expected}", + padded_2d_tensor_actual_begin, + padded_2d_tensor_expected_begin, + msg=f"actual: {padded_2d_tensor_actual_begin}, expected: {padded_2d_tensor_expected_begin}", ) def test_pad_transform(self) -> None: diff --git a/torchtext/transforms.py b/torchtext/transforms.py index eb2b3de7bf..e57f643431 100644 --- a/torchtext/transforms.py +++ b/torchtext/transforms.py @@ -238,11 +238,11 @@ class PadTransform(Module): :type max_length: int :param pad_value: Value to pad the tensor with :type pad_value: int - :param begin: Whether to insert pad_value at start or end, defaults to True + :param begin: Whether to insert pad_value at start or end, defaults to False :type begin: bool """ - def __init__(self, max_length: int, pad_value: int, begin: bool = True) -> None: + def __init__(self, max_length: int, pad_value: int, begin: bool = False) -> None: super().__init__() self.max_length = max_length self.pad_value = float(pad_value)