Merge pull request #604 from mlcommons/mlperf-inference

Sync Mlperf inference
mlcommons · Nov 24, 2024 · 1ad4bca · 1ad4bca
2 parents 2a13797 + b0740a9
commit 1ad4bca
Show file tree

Hide file tree

Showing 386 changed files with 12,267 additions and 8,342 deletions.
diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
@@ -1,5 +1,5 @@
 # Automatic code formatting
-name: "format"
+name: "Code formatting"
 on:
   push:
     branches: [ "main", "mlperf-inference" ]
@@ -10,6 +10,7 @@ env:
 jobs:
   format-code:
     runs-on: ubuntu-latest
+    if: github.repository_owner == 'mlcommons'
     steps:
       - uses: actions/checkout@v4
         with:
@@ -37,12 +38,22 @@ jobs:
             git add $FILE
           done
 
-      - name: Commit
+      - name: Commit and create PR
         run: |
           HAS_CHANGES=$(git diff --staged --name-only)
           if [ ${#HAS_CHANGES} -gt 0 ]; then
             git config --global user.name mlcommons-bot
             git config --global user.email "[email protected]"
+            # Commit changes
             git commit -m '[Automated Commit] Format Codebase'
-            git push
+      
+            # Push changes to a new branch
+            BRANCH_NAME="auto/code-format"
+            git branch $BRANCH_NAME
+            git push origin $BRANCH_NAME --force
+
+            # Create a pull request to the "code-format" branch
+            gh pr create --base code-format --head $BRANCH_NAME --title "[Automated PR] Format Codebase" --body "This pull request contains automated code formatting changes."
           fi
+        env:
+          GH_TOKEN: ${{ secrets.ACCESS_TOKEN }}
diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml
@@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations
 
 on:
   schedule:
-    - cron: "54 22 * * *" #to be adjusted
+    - cron: "55 01 * * *" #to be adjusted
 
 jobs:
   run_nvidia:

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -1,2 +1,9 @@
-You can find the guidelines to contribute to the MLCommons CM project 
-at the [CM GitHub page](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md).
+## Contributing
+
+The best way to contribute to the MLCommons is to get involved with one of our many project communities. You find more information about getting involved with MLCommons [here](https://mlcommons.org/en/get-involved/#getting-started). 
+
+Generally we encourage people to become a MLCommons member if they wish to contribute to MLCommons projects, but outside pull requests are very welcome too.
+
+To get started contributing code, you or your organization needs to sign the MLCommons CLA found at the [MLC policies page](https://mlcommons.org/en/policies/). Once you or your organization has signed the corporate CLA, please fill out this [CLA sign up form](https://forms.gle/Ew1KkBVpyeJDuRw67) form to get your specific GitHub handle authorized so that you can start contributing code under the proper license.
+
+MLCommons project work is tracked with issue trackers and pull requests. Modify the project in your own fork and issue a pull request once you want other developers to take a look at what you have done and discuss the proposed changes. Ensure that cla-bot and other checks pass for your Pull requests.
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.5.0
+0.6.3
diff --git a/automation/cache/module.py b/automation/cache/module.py
@@ -3,6 +3,7 @@
 from cmind.automation import Automation
 from cmind import utils
 
+
 class CAutomation(Automation):
     """
     Automation actions
@@ -47,9 +48,9 @@ def test(self, i):
         """
 
         import json
-        print (json.dumps(i, indent=2))
+        print(json.dumps(i, indent=2))
 
-        return {'return':0}
+        return {'return': 0}
 
     ############################################################
     def show(self, i):
@@ -77,7 +78,7 @@ def show(self, i):
 
         # Check parsed automation
         if 'parsed_automation' not in i:
-            return {'return':1, 'error':'automation is not specified'}
+            return {'return': 1, 'error': 'automation is not specified'}
 
         console = i.get('out') == 'con'
 
@@ -101,54 +102,65 @@ def show(self, i):
         i['out'] = None
         r = self.search(i)
 
-        if r['return']>0: return r
+        if r['return'] > 0:
+            return r
 
         lst = r['list']
-        for artifact in sorted(lst, key = lambda x: sorted(x.meta['tags'])):
-#        for artifact in lst:
+        for artifact in sorted(lst, key=lambda x: sorted(x.meta['tags'])):
+            #        for artifact in lst:
             path = artifact.path
             meta = artifact.meta
             original_meta = artifact.original_meta
 
-            alias = meta.get('alias','')
-            uid = meta.get('uid','')
+            alias = meta.get('alias', '')
+            uid = meta.get('uid', '')
 
-            tags = meta.get('tags',[])
+            tags = meta.get('tags', [])
             tags1 = sorted([x for x in tags if not x.startswith('_')])
             tags2 = sorted([x for x in tags if x.startswith('_')])
             tags = tags1 + tags2
 
-            version = meta.get('version','')
+            version = meta.get('version', '')
 
             if console:
-                print ('')
+                print('')
 #                print ('* UID: {}'.format(uid))
-                print ('* Tags: {}'.format(','.join(tags)))
-                print ('  Path: {}'.format(path))
-                if version!='':
-                    print ('  Version: {}'.format(version))
+                print('* Tags: {}'.format(','.join(tags)))
+                print('  Path: {}'.format(path))
+                if version != '':
+                    print('  Version: {}'.format(version))
 
             if show_env and console:
-                path_to_cached_state_file = os.path.join(path, 'cm-cached-state.json')
+                path_to_cached_state_file = os.path.join(
+                    path, 'cm-cached-state.json')
 
                 if os.path.isfile(path_to_cached_state_file):
-                    r =  utils.load_json(file_name = path_to_cached_state_file)
-                    if r['return']>0: return r
+                    r = utils.load_json(file_name=path_to_cached_state_file)
+                    if r['return'] > 0:
+                        return r
 
                     # Update env and state from cache!
                     cached_state = r['meta']
 
                     new_env = cached_state.get('new_env', {})
-                    if len(new_env)>0:
-                        print ('    New env:')
-                        print (json.dumps(new_env, indent=6, sort_keys=True).replace('{','').replace('}',''))
+                    if len(new_env) > 0:
+                        print('    New env:')
+                        print(
+                            json.dumps(
+                                new_env,
+                                indent=6,
+                                sort_keys=True).replace(
+                                '{',
+                                '').replace(
+                                '}',
+                                ''))
 
                     new_state = cached_state.get('new_state', {})
-                    if len(new_state)>0:
-                        print ('    New state:')
-                        print (json.dumps(new_env, indent=6, sort_keys=True))
+                    if len(new_state) > 0:
+                        print('    New state:')
+                        print(json.dumps(new_env, indent=6, sort_keys=True))
 
-        return {'return':0, 'list': lst}
+        return {'return': 0, 'list': lst}
 
     ############################################################
     def search(self, i):
@@ -159,30 +171,36 @@ def search(self, i):
         """
         # Check simplified CMD: cm show cache "get python"
         # If artifact has spaces, treat them as tags!
-        artifact = i.get('artifact','')
-        tags = i.get('tags','')
-        # Tags may be a list (if comes internally from CM scripts) or string if comes from CMD
-        if type(tags)!=list:
+        artifact = i.get('artifact', '')
+        tags = i.get('tags', '')
+
+        # Tags may be a list (if comes internally from CM scripts) or string if
+        # comes from CMD
+        if not isinstance(tags, list):
             tags = tags.strip()
-        if ' ' in artifact:# or ',' in artifact:
-            del(i['artifact'])
-            if 'parsed_artifact' in i: del(i['parsed_artifact'])
 
-            new_tags = artifact.replace(' ',',')
-            tags = new_tags if tags=='' else new_tags+','+tags
+        if ' ' in artifact:  # or ',' in artifact:
+            del (i['artifact'])
+            if 'parsed_artifact' in i:
+                del (i['parsed_artifact'])
+
+            new_tags = artifact.replace(' ', ',')
+            tags = new_tags if tags == '' else new_tags + ',' + tags
 
             i['tags'] = tags
 
         # Force automation when reruning access with processed input
-        i['automation']='cache,541d6f712a6b464e'
-        i['action']='search'
-        i['common'] = True # Avoid recursion - use internal CM add function to add the script artifact
+        i['automation'] = 'cache,541d6f712a6b464e'
+        i['action'] = 'search'
+        # Avoid recursion - use internal CM add function to add the script
+        # artifact
+        i['common'] = True
 
         # Find CM artifact(s)
         return self.cmind.access(i)
 
-
     ############################################################
+
     def copy_to_remote(self, i):
         """
         Add CM automation.
@@ -208,4 +226,5 @@ def copy_to_remote(self, i):
 
         """
 
-        return utils.call_internal_module(self, __file__, 'module_misc', 'copy_to_remote', i)
+        return utils.call_internal_module(
+            self, __file__, 'module_misc', 'copy_to_remote', i)
diff --git a/automation/cache/module_misc.py b/automation/cache/module_misc.py
@@ -30,13 +30,17 @@ def copy_to_remote(i):
 
     self_module = i['self_module']
 
-    remote_host =  i.get('remote_host')
+    remote_host = i.get('remote_host')
     if not remote_host:
-        return {'return':1, 'error': 'Please input remote host_name/IP via --remote_host'}
-    remote_cm_repos_location = i.get('remote_cm_repos_location', os.path.join("/home", os.getlogin(), "CM", "repos"))
-    remote_cm_cache_location = os.path.join(remote_cm_repos_location, "local", "cache")
-
-    remote_port =  i.get('remote_port', '22')
+        return {'return': 1,
+            'error': 'Please input remote host_name/IP via --remote_host'}
+    remote_cm_repos_location = i.get(
+    'remote_cm_repos_location', os.path.join(
+        "/home", os.getlogin(), "CM", "repos"))
+    remote_cm_cache_location = os.path.join(
+    remote_cm_repos_location, "local", "cache")
+
+    remote_port = i.get('remote_port', '22')
     remote_user = i.get('remote_user', os.getlogin())
 
     tag_string = i['tags']
@@ -52,18 +56,18 @@ def copy_to_remote(i):
         return r
 
     if len(r['list']) == 0:
-        pass #fixme
+        pass  # fixme
     elif len(r['list']) > 1:
         print("Multiple cache entries found: ")
-        for k in sorted(r['list'], key = lambda x: x.meta.get('alias','')):
+        for k in sorted(r['list'], key=lambda x: x.meta.get('alias', '')):
             print(k.path)
         x = input("Would you like to copy them all? Y/n: ")
         if x.lower() == 'n':
             return {'return': 0}
 
     import json
 
-    for k in sorted(r['list'], key = lambda x: x.meta.get('alias','')):
+    for k in sorted(r['list'], key=lambda x: x.meta.get('alias', '')):
         path = k.path
         cacheid = os.path.basename(path)
 
@@ -73,26 +77,33 @@ def copy_to_remote(i):
 
         cm_cached_state_json_file = os.path.join(path, "cm-cached-state.json")
         if not os.path.exists(cm_cached_state_json_file):
-            return {'return':1, 'error': f'cm-cached-state.json file missing in {path}'}
+            return {'return': 1,
+                'error': f'cm-cached-state.json file missing in {path}'}
 
         with open(cm_cached_state_json_file, "r") as f:
             cm_cached_state = json.load(f)
 
         new_env = cm_cached_state['new_env']
-        new_state = cm_cached_state['new_state'] # Todo fix new state
-        cm_repos_path = os.environ.get('CM_REPOS', os.path.join(os.path.expanduser("~"), "CM", "repos"))
-        cm_cache_path = os.path.realpath(os.path.join(cm_repos_path, "local", "cache"))
+        new_state = cm_cached_state['new_state']  # Todo fix new state
+        cm_repos_path = os.environ.get(
+    'CM_REPOS', os.path.join(
+        os.path.expanduser("~"), "CM", "repos"))
+        cm_cache_path = os.path.realpath(
+            os.path.join(cm_repos_path, "local", "cache"))
+
+        for key, val in new_env.items():
+
 
-        for key,val in new_env.items():
-            if type(val) == str and cm_cache_path in val:
-                new_env[key] = val.replace(cm_cache_path, remote_cm_cache_location)
+if isinstance(val,             if )                new_env[key] = val.replace(
+    cm_cache_path, remote_cm_cache_location)
 
         with open("tmp_remote_cached_state.json", "w") as f:
             json.dump(cm_cached_state, f, indent=2)
 
-        remote_cached_state_file_location = os.path.join(remote_cm_cache_location, cacheid, "cm-cached-state.json")
+        remote_cached_state_file_location = os.path.join(
+    remote_cm_cache_location, cacheid, "cm-cached-state.json")
         copy_cmd = f"rsync -avz -e 'ssh -p {remote_port}' tmp_remote_cached_state.json {remote_user}@{remote_host}:{remote_cached_state_file_location}"
         print(copy_cmd)
         os.system(copy_cmd)
 
-    return {'return':0}
+    return {'return': 0}