Merge pull request #71 from pytorch-labs/wrapper

consistent wrapper for ET & AOTI
pytorch · Apr 7, 2024 · 06c9bb6 · 06c9bb6
2 parents 301bf31 + c78662f
commit 06c9bb6
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 24 deletions.
diff --git a/export.py b/export.py
@@ -37,6 +37,26 @@ def device_sync(device):
     else:
         print(f"device={device} is not yet suppported")
 
+
+class model_wrapper(nn.Module):
+    def __init__(self, model, device):
+        super().__init__()
+
+        max_seq_length = 350
+        with torch.device(device):
+            model.setup_caches(max_batch_size=1, max_seq_length=max_seq_length)
+
+        self.model = model
+        # init model here if necessary
+
+    def forward(self, idx, input_pos):
+        # input_pos: [B, 1]
+        # assert failed on symbolic shape during aot_compile?!
+        # but not for ET?
+        # assert input_pos.shape[-1] == 1
+        logits = self.model(idx, input_pos)
+        return logits  # sample(logits, **sampling_kwargs)
+
 
 def main(checkpoint_path, device, quantize = "{ }", args = None):
     assert checkpoint_path.is_file(), checkpoint_path
@@ -53,7 +73,8 @@ def main(checkpoint_path, device, quantize = "{ }", args = None):
     print(f"Time to load model: {time.time() - t0:.02f} seconds")
 
     quantize_model(model, args.quantize)
-
+    model = model_wrapper(model, device=device)
+
     output_pte_path = args.output_pte_path
     output_dso_path = args.output_dso_path
 

diff --git a/export_aoti.py b/export_aoti.py
@@ -33,8 +33,8 @@ def device_sync(device):
 
 def export_model(model: nn.Module, device, output_path, args=None):
     max_seq_length = 350
-    with torch.device(device):
-        model.setup_caches(max_batch_size=1, max_seq_length=max_seq_length)
+#    with torch.device(device):
+#        model.setup_caches(max_batch_size=1, max_seq_length=max_seq_length)
 
     input = (
         torch.tensor([[1, 9038, 2501,  263,  931]], dtype=torch.int, device=device),

diff --git a/export_et.py b/export_et.py
@@ -70,32 +70,15 @@ def materialze_broadcast_of_rope_freq_cis(
     return module
 
 
-class model_wrapper(nn.Module):
-    def __init__(self, model, device):
-        super().__init__()
-
-        max_seq_length = 350
-        with torch.device(device):
-            model.setup_caches(max_batch_size=1, max_seq_length=max_seq_length)
-
-        self.model = model
-        # init model here if necessary
-
-    def forward(self, x, input_pos):
-        # input_pos: [B, 1]
-        assert input_pos.shape[-1] == 1
-        logits = self.model(x, input_pos)
-        return logits  # sample(logits, **sampling_kwargs)
-
-
 def canonical_path(path):
     return path
 
-## align AOTI and ET export
-# def export_model(model: nn.Module, device, output_path):
+
 def export_model(model, device, output_path, args=None) -> str:  # noqa: C901
 
-    export_model = model_wrapper(model, device=device)
+    # applied wrapper already in export.
+    # export_model = model_wrapper(model, device=device)
+    export_model = model
     print(export_model)
 
     input = (