From b9365a3a56080c1da780de6ad0b84735dda993be Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sun, 31 Dec 2023 16:15:00 +0530 Subject: [PATCH] Release v0.0.1 --- .flake8 | 5 + .gitattributes | 131 + .gitignore | 279 + .pre-commit-config.yaml | 174 + LICENSE | 21 + README.md | 1 + pyproject.toml | 387 + requirements/constraints.fine_tuning.txt | 8 + requirements/constraints.txt | 11 + requirements/requirements.fine_tuning.txt | 8 + requirements/requirements.txt | 11 + src/cli.py | 88 + src/generative_ai/__init__.py | 23 + .../dataset_generation/__init__.py | 32 + .../orchestrate_generation.py | 106 + .../dataset_generation/step_1_generation.py | 291 + .../dataset_generation/step_2_generation.py | 2787 ++++++ .../dataset_generation/utils_generation.py | 211 + .../fine_tuning/step_1_tuning.ipynb | 7589 +++++++++++++++++ .../fine_tuning/step_2_tuning.ipynb | 756 ++ .../information_retrieval/__init__.py | 53 + .../orchestrate_retrieval.py | 127 + .../information_retrieval/step_1_retrieval.py | 45 + .../information_retrieval/step_2_retrieval.py | 105 + .../information_retrieval/step_3_retrieval.py | 53 + .../information_retrieval/utils_retrieval.py | 57 + src/generative_ai/metadata.json | 21 + src/generative_ai/py.typed | 0 src/generative_ai/top_level.py | 130 + src/generative_ai/utils_top_level.py | 12 + src/gui.py | 303 + 31 files changed, 13825 insertions(+) create mode 100644 .flake8 create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 pyproject.toml create mode 100644 requirements/constraints.fine_tuning.txt create mode 100644 requirements/constraints.txt create mode 100644 requirements/requirements.fine_tuning.txt create mode 100644 requirements/requirements.txt create mode 100644 src/cli.py create mode 100644 src/generative_ai/__init__.py create mode 100644 src/generative_ai/dataset_generation/__init__.py create mode 100644 src/generative_ai/dataset_generation/orchestrate_generation.py create mode 100644 src/generative_ai/dataset_generation/step_1_generation.py create mode 100644 src/generative_ai/dataset_generation/step_2_generation.py create mode 100644 src/generative_ai/dataset_generation/utils_generation.py create mode 100644 src/generative_ai/fine_tuning/step_1_tuning.ipynb create mode 100644 src/generative_ai/fine_tuning/step_2_tuning.ipynb create mode 100644 src/generative_ai/information_retrieval/__init__.py create mode 100644 src/generative_ai/information_retrieval/orchestrate_retrieval.py create mode 100644 src/generative_ai/information_retrieval/step_1_retrieval.py create mode 100644 src/generative_ai/information_retrieval/step_2_retrieval.py create mode 100644 src/generative_ai/information_retrieval/step_3_retrieval.py create mode 100644 src/generative_ai/information_retrieval/utils_retrieval.py create mode 100644 src/generative_ai/metadata.json create mode 100644 src/generative_ai/py.typed create mode 100644 src/generative_ai/top_level.py create mode 100644 src/generative_ai/utils_top_level.py create mode 100644 src/gui.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..e7ad425 --- /dev/null +++ b/.flake8 @@ -0,0 +1,5 @@ +[flake8] +extend-ignore = E203 +per-file-ignores = + __init__.py: F401 +max-line-length = 99 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..ab9328b --- /dev/null +++ b/.gitattributes @@ -0,0 +1,131 @@ +## Reference: https://github.com/alexkaratarakis/gitattributes/blob/7beed92a802062af247243d6c06a65fbbc7a35de/Common.gitattributes + +# Common settings that generally should always be used with your language specific settings + +# Auto detect text files and perform LF normalization +* text=auto + +# +# The above will handle all files NOT found below +# + +# Documents +*.bibtex text diff=bibtex +*.doc diff=astextplain +*.DOC diff=astextplain +*.docx diff=astextplain +*.DOCX diff=astextplain +*.dot diff=astextplain +*.DOT diff=astextplain +*.pdf diff=astextplain +*.PDF diff=astextplain +*.rtf diff=astextplain +*.RTF diff=astextplain +*.md text diff=markdown +*.mdx text diff=markdown +*.tex text diff=tex +*.adoc text +*.textile text +*.mustache text +*.csv text eol=crlf +*.tab text +*.tsv text +*.txt text +*.sql text +*.epub diff=astextplain + +# Graphics +*.png binary +*.jpg binary +*.jpeg binary +*.gif binary +*.tif binary +*.tiff binary +*.ico binary +# SVG treated as text by default. +*.svg text +# If you want to treat it as binary, +# use the following line instead. +# *.svg binary +*.eps binary + +# Scripts +*.bash text eol=lf +*.fish text eol=lf +*.sh text eol=lf +*.zsh text eol=lf +# These are explicitly windows files and should use crlf +*.bat text eol=crlf +*.cmd text eol=crlf +*.ps1 text eol=crlf + +# Serialisation +*.json text +*.toml text +*.xml text +*.yaml text +*.yml text + +# Archives +*.7z binary +*.gz binary +*.tar binary +*.tgz binary +*.zip binary + +# Text files where line endings should be preserved +*.patch -text + +# +# Exclude files from exporting +# + +.gitattributes export-ignore +.gitignore export-ignore +.gitkeep export-ignore + +## Reference: https://github.com/alexkaratarakis/gitattributes/blob/7beed92a802062af247243d6c06a65fbbc7a35de/Python.gitattributes + +# Basic .gitattributes for a python repo. + +# Source files +# ============ +*.pxd text diff=python +*.py text diff=python +*.py3 text diff=python +*.pyw text diff=python +*.pyx text diff=python +*.pyz text diff=python +*.pyi text diff=python + +# Binary files +# ============ +*.db binary +*.p binary +*.pkl binary +*.pickle binary +*.pyc binary export-ignore +*.pyo binary export-ignore +*.pyd binary + +# Jupyter notebook +*.ipynb text eol=lf + +# Note: .db, .p, and .pkl files are associated +# with the python modules ``pickle``, ``dbm.*``, +# ``shelve``, ``marshal``, ``anydbm``, & ``bsddb`` +# (among others). + +## Reference: https://github.com/alexkaratarakis/gitattributes/blob/7beed92a802062af247243d6c06a65fbbc7a35de/Markdown.gitattributes + +# Apply override to all files in the directory +*.md linguist-detectable + +## Custom + +*.gitattributes text linguist-detectable linguist-language=gitattributes +*.gitignore text linguist-detectable linguist-language=gitignore + +*.py linguist-detectable +*.toml diff=toml linguist-detectable +*.yaml diff=yaml linguist-detectable diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e9175f6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,279 @@ +## Reference: https://github.com/github/gitignore/blob/4488915eec0b3a45b5c63ead28f286819c0917de/Global/Linux.gitignore + +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +## Reference: https://github.com/github/gitignore/blob/4488915eec0b3a45b5c63ead28f286819c0917de/Global/macOS.gitignore + +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +## Reference: https://github.com/github/gitignore/blob/4488915eec0b3a45b5c63ead28f286819c0917de/Global/Windows.gitignore + +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +## Reference: https://github.com/github/gitignore/blob/4488915eec0b3a45b5c63ead28f286819c0917de/Global/Vim.gitignore + +# Swap +[._]*.s[a-v][a-z] +!*.svg # comment out if you don't need vector files +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] + +# Session +Session.vim +Sessionx.vim + +# Temporary +.netrwhist +*~ +# Auto-generated tag files +tags +# Persistent undo +[._]*.un~ + +## Reference: https://github.com/github/gitignore/blob/4488915eec0b3a45b5c63ead28f286819c0917de/Global/VisualStudioCode.gitignore + +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +## Reference: https://github.com/github/gitignore/blob/4488915eec0b3a45b5c63ead28f286819c0917de/Python.gitignore + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +## Custom + +coverage_data +coverage_html_report/ +coverage_xml_report.xml +pytest_junit_report.xml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..528ff65 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,174 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-ast + - id: check-case-conflict + - id: check-executables-have-shebangs + - id: check-json + - id: check-merge-conflict + - id: check-shebang-scripts-are-executable + - id: check-symlinks + - id: check-toml + - id: check-yaml + args: + - --allow-multiple-documents + - id: detect-private-key + - id: end-of-file-fixer + - id: mixed-line-ending + - id: name-tests-test + args: + - --pytest-test-first + - id: no-commit-to-branch + - id: pretty-format-json + args: + - --autofix + - --indent + - "4" + - id: requirements-txt-fixer + - id: trailing-whitespace + - repo: https://github.com/asottile/pyupgrade + rev: v3.15.0 + hooks: + - id: pyupgrade + args: + - --keep-runtime-typing + - --py311-plus + - repo: https://github.com/pycqa/autoflake + rev: v2.2.1 + hooks: + - id: autoflake + args: + - src + pass_filenames: false + - repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + args: + - src + pass_filenames: false + - repo: https://github.com/psf/black + rev: 23.12.1 + hooks: + - id: black-jupyter + args: + - src + pass_filenames: false + - repo: https://github.com/pycqa/bandit + rev: 1.7.6 + hooks: + - id: bandit + args: + - --recursive + - --severity-level + - high + - --confidence-level + - high + - src + pass_filenames: false + - repo: https://github.com/pycqa/flake8 + rev: 6.1.0 + hooks: + - id: flake8 + args: + - src + pass_filenames: false + # - repo: https://github.com/pre-commit/mirrors-mypy + # rev: v1.7.1 + # hooks: + # - id: mypy + # additional_dependencies: + # - pydantic + # args: + # - --ignore-missing-imports + # - --scripts-are-modules + # pass_filenames: false + # stages: + # - manual + - repo: https://github.com/PyCQA/pylint + rev: v3.0.3 + hooks: + - id: pylint + args: + - --disable + - import-error + - src + pass_filenames: false + stages: + - manual + # - repo: https://github.com/RobertCraigie/pyright-python + # rev: v1.1.337 + # hooks: + # - id: pyright + # pass_filenames: false + # stages: + # - manual + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.9 + hooks: + - id: ruff + args: + - src + pass_filenames: false + - repo: https://github.com/jendrikseipp/vulture + rev: v2.10 + hooks: + - id: vulture + pass_filenames: false + # - repo: https://github.com/PyCQA/docformatter + # rev: v1.7.5 + # hooks: + # - id: docformatter + # additional_dependencies: + # - tomli + # args: + # - --in-place + # - src + # pass_filenames: false + # - repo: https://github.com/adamchainz/blacken-docs + # rev: 1.16.0 + # hooks: + # - id: blacken-docs + # args: + # - --line-length + # - "87" + # - --target-version + # - py311 + # - repo: https://github.com/econchick/interrogate + # rev: 1.5.0 + # hooks: + # - id: interrogate + # args: + # - src + # pass_filenames: false + # - repo: https://github.com/pycqa/pydocstyle + # rev: 6.3.0 + # hooks: + # - id: pydocstyle + # additional_dependencies: + # - tomli + # args: + # - src + # pass_filenames: false + - repo: https://github.com/tox-dev/pyproject-fmt + rev: 1.5.3 + hooks: + - id: pyproject-fmt + - repo: https://github.com/abravalheri/validate-pyproject + rev: v0.15 + hooks: + - id: validate-pyproject + - repo: https://github.com/codespell-project/codespell + rev: v2.2.6 + hooks: + - id: codespell + additional_dependencies: + - tomli + args: + - --write-changes + stages: + - manual +default_language_version: + python: python3.11 +fail_fast: false diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..985bd07 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Anirban Ray + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..10ffb23 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# Query Package Documentation diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..99cdfe8 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,387 @@ +[build-system] +build-backend = "setuptools.build_meta" +requires = [ + "setuptools>=65.5.1", +] + +[project] +name = "query-package-documentation" +version = "0.0.1" +description = "A package to explore documentations" +keywords = [ + "documentation", + "generative-ai", +] +license = { file = "LICENSE" } +maintainers = [ + { name = "Anirban Ray", email = "39331844+yarnabrina@users.noreply.github.com" }, +] +authors = [ + { name = "Anirban Ray", email = "39331844+yarnabrina@users.noreply.github.com" }, +] +requires-python = "==3.11.*" +classifiers = [ + "Development Status :: 3 - Alpha", + "Framework :: Flake8", + "Framework :: Pydantic", + "Framework :: Pytest", + "Framework :: Sphinx", + "Intended Audience :: Developers", + "License :: OSI Approved", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.11", + "Topic :: Software Development", + "Topic :: Software Development :: Build Tools", + "Topic :: Software Development :: Libraries", + "Topic :: Utilities", + "Typing :: Typed", +] +dynamic=[ + "readme", +] +dependencies = [ + "chromadb<0.5,>=0.4.15", + "ctransformers<0.3,>=0.2.27", + "gradio<4.13,>=4.12", + "jq<1.7,>=1.6", + "langchain==0.0.353", + "numpydoc<1.7,>=1.6", + "pydantic<2.6,>=2.4.2", + "sentence-transformers<2.3,>=2.2.2", + "transformers<4.37,>=4.35", + "typer<0.10,>=0.9", + "typing-extensions<4.10,>=4.9", +] +[project.optional-dependencies] +all = [ + "autoflake", + "bandit", + "black", + "blacken-docs", + "build", + "codespell", + "coverage[toml]", + "docformatter[tomli]", + "flake8", + "furo", + "hypothesis[pytest]", + "interrogate", + "isort", + "mypy", + "nox", + "pre-commit", + "pydocstyle[toml]", + "pylint", + "pyproject-fmt", + "pyright", + "pytest", + "pyupgrade", + "Sphinx", + "sphinx-copybutton", + "twine", + "validate-pyproject", + "vulture", +] +dev = [ + "codespell", + "nox", + "pre-commit", +] +doc = [ + "furo", + "Sphinx", + "sphinx-copybutton", +] +fine-tuning = [ + "accelerate<0.26,>=0.24.1", + "bitsandbytes<0.42,>=0.41.2", + "datasets<2.17,>=2.15", + "peft<0.8,>=0.6.2", + "safetensors<0.5,>=0.4", + "torch<2.2,>=2.1.1", + "transformers<4.37,>=4.35.2", + "trl<0.8,>=0.7.4", +] +format = [ + "autoflake", + "black", + "blacken-docs", + "docformatter[tomli]", + "isort", + "pyproject-fmt", + "pyupgrade", +] +lint = [ + "bandit", + "flake8", + "interrogate", + "mypy", + "pydocstyle[toml]", + "pylint", + "pyright", + "validate-pyproject", + "vulture", +] +release = [ + "build", + "twine", +] +test = [ + "coverage[toml]", + "hypothesis[pytest]", + "pytest", +] +[project.urls] +"Bug Tracker" = "https://github.com/yarnabrina/query-package-documentation/issues" +"Documentation" = "https://query-package-documentation.readthedocs.io" +"Source Code" = "https://github.com/yarnabrina/query-package-documentation" +[project.scripts] +docs-cli = "cli:CLI_APPLICATION" +[project.gui-scripts] +docs-gui = "gui:main" + +[tool.setuptools] +py-modules = [ + "cli", + "gui", +] + +[tool.setuptools.dynamic] +readme = { file = "README.md", content-type = "text/markdown" } + +[tool.setuptools.packages.find] +where = [ + "src", +] +include = [ + "generative_ai*", +] +exclude = [ + "*tests*", +] +namespaces = false + +[tool.setuptools.package-data] +"generative_ai" = [ + "metadata.json", + "py.typed", +] + +[tool.setuptools.exclude-package-data] +"*" = [ + ".gitattributes", + ".gitignore", +] + +[tool.black] +line-length = 99 +target-version = [ + "py311", +] +safe = true + +[tool.ruff] +fix = true +ignore = [ + "COM", + "D", + "D203", + "D213", + "DTZ", + "EM", + "FBT", + "FIX", + "G", + "ICN", + "PD", + "RET501", + "RET502", + "RET503", + "RET504", + "SLF", + "TRY003", +] +ignore-init-module-imports = true +line-length = 99 +output-format = "grouped" +select = [ + "ALL", +] +src = [ + "src", +] +target-version = "py311" + +[tool.ruff.flake8-annotations] +allow-star-arg-any = true +ignore-fully-untyped = true + +[tool.ruff.flake8-bugbear] +extend-immutable-calls = [ + "fastapi.Depends", + "fastapi.Query", + "pydantic.Field", +] + +[tool.ruff.flake8-type-checking] +exempt-modules = [ + "typing", + "typing_extensions", +] +runtime-evaluated-base-classes = [ + "pydantic.BaseModel", +] + +[tool.ruff.pep8-naming] +classmethod-decorators = [ + "pydantic.field_validator", +] + +[tool.ruff.per-file-ignores] +"**/__init__.py" = [ + "F401", +] +"**/test_*.py" = [ + "S101", +] + +[tool.ruff.pycodestyle] +max-doc-length = 99 + +[tool.ruff.pydocstyle] +convention = "numpy" +ignore-decorators = [ + "typing.overload", +] + +[tool.ruff.pyupgrade] +keep-runtime-typing = true + +[tool.isort] +overwrite_in_place = true +profile = "black" +atomic = true +float_to_top = true +line_length = 99 +remove_redundant_aliases = true +src_paths = [ + "src", +] +py_version = 311 + +[tool.pylint.main] +extension-pkg-allow-list = [ + "pydantic", +] +fail-under = 8.5 +jobs = 0 +recursive = true + +[tool.pylint.basic] +include-naming-hint = true + +[tool.pylint.format] +max-line-length = 99 + +[tool.pylint.logging] +logging-format-style = "new" + +[tool.pylint."messages control"] +enable = [ + "all", +] +disable = [ + "logging-fstring-interpolation", + "missing-class-docstring", + "missing-function-docstring", + "missing-module-docstring", +] + +[tool.pylint.reports] +output-format = "colorized" + +[tool.docformatter] +in-place = true +recursive = true +wrap-summaries = 99 +wrap-descriptions = 99 + +[tool.pytest.ini_options] +addopts = "--junit-xml=pytest_junit_report.xml --doctest-modules --doctest-ignore-import-errors --doctest-continue-on-failure" +console_output_style = "count" + +[tool.coverage.run] +branch = true +command_line = "--module pytest" +data_file = "coverage_data" +include = [ + "src/**/*.py", +] +omit = [ + "**/tests/*.py", +] + +[tool.coverage.report] +fail_under = 85 +include = [ + "src/**/*.py", +] +omit = [ + "**/tests/*.py", +] +precision = 2 +exclude_lines = [ + "pragma: no cover", + "if __name__ == .__main__.:", + "if typing.TYPE_CHECKING:", +] + +[tool.coverage.html] +directory = "coverage_html_report" + +[tool.coverage.xml] +output = "coverage_xml_report.xml" + +[tool.mypy] +files = [ + "src", +] +exclude = [ + "conftest", + "test_", +] +strict = true + +[tool.pyright] +include = [ + "src", +] +exclude = [ + "**/tests/*.py", +] +pythonVersion = "3.11" + +[tool.autoflake] +in-place = true +remove-all-unused-imports = true +recursive = true +expand-star-imports = true +ignore-init-module-imports = true +remove-duplicate-keys = true +remove-unused-variables = true + +[tool.interrogate] +fail-under = 85 +ignore-init-method = true + +[tool.pydocstyle] +convention = "numpy" + +[tool.vulture] +min_confidence = 100 +paths = [ + "src", +] diff --git a/requirements/constraints.fine_tuning.txt b/requirements/constraints.fine_tuning.txt new file mode 100644 index 0000000..3370671 --- /dev/null +++ b/requirements/constraints.fine_tuning.txt @@ -0,0 +1,8 @@ +accelerate<0.26,>=0.24.1 +bitsandbytes<0.42,>=0.41.2 +datasets<2.17,>=2.15.0 +peft<0.8,>=0.6.2 +safetensors<0.5,>=0.4.0 +torch<2.2,>=2.1.1 +transformers<4.37,>=4.35.2 +trl<0.8,>=0.7.4 diff --git a/requirements/constraints.txt b/requirements/constraints.txt new file mode 100644 index 0000000..1f4c64d --- /dev/null +++ b/requirements/constraints.txt @@ -0,0 +1,11 @@ +chromadb<0.5,>=0.4.15 +ctransformers<0.3,>=0.2.27 +gradio<4.13,>=4.12 +jq<1.7,>=1.6 +langchain==0.0.353 +numpydoc<1.7,>=1.6 +pydantic<2.6,>=2.4.2 +sentence-transformers<2.3,>=2.2.2 +transformers<4.37,>=4.35 +typer<0.10,>=0.9 +typing-extensions<4.10,>=4.9 diff --git a/requirements/requirements.fine_tuning.txt b/requirements/requirements.fine_tuning.txt new file mode 100644 index 0000000..50dea65 --- /dev/null +++ b/requirements/requirements.fine_tuning.txt @@ -0,0 +1,8 @@ +accelerate +bitsandbytes +datasets +peft +safetensors +torch +transformers +trl diff --git a/requirements/requirements.txt b/requirements/requirements.txt new file mode 100644 index 0000000..a4e2e3a --- /dev/null +++ b/requirements/requirements.txt @@ -0,0 +1,11 @@ +chromadb +ctransformers +gradio +jq +langchain +numpydoc +pydantic +sentence-transformers +transformers +typer +typing-extensions diff --git a/src/cli.py b/src/cli.py new file mode 100644 index 0000000..3221e9d --- /dev/null +++ b/src/cli.py @@ -0,0 +1,88 @@ +import pathlib +import sys + +import typer + +from generative_ai.information_retrieval import PipelineType, RetrievalType, TransformerType +from generative_ai.top_level import create_database, create_dataset, get_response + +CLI_APPLICATION = typer.Typer(name="CLI for Generative AI aaplication") + + +@CLI_APPLICATION.command() +def generate_dataset( + package_name: str, + dataset_file: pathlib.Path = pathlib.Path("json_documents.json"), + force: bool = False, +) -> None: + try: + dataset_path = create_dataset(package_name, dataset_file, force) + except FileExistsError as error: + typer.echo(message=str(error), err=True) + sys.exit(1) + else: + typer.echo(f"Dataset generation complete: '{dataset_path}'.") + + +@CLI_APPLICATION.command() +def generate_database( + dataset_file: pathlib.Path = pathlib.Path("json_documents.json"), + embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2", + database_directory: pathlib.Path = pathlib.Path("embeddings_database"), + force: bool = False, +) -> None: + try: + database_path = create_database(dataset_file, embedding_model, database_directory, force) + except (FileExistsError, FileNotFoundError) as error: + typer.echo(message=str(error), err=True) + sys.exit(1) + else: + typer.echo(f"Database generation complete: '{database_path}'.") + + +@CLI_APPLICATION.command() +def answer_query( # noqa: PLR0913 + query: str, + embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2", + database_directory: pathlib.Path = pathlib.Path("embeddings_database"), + search_type: RetrievalType = RetrievalType.MMR, + number_of_documents: int = 3, + initial_number_of_documents: int = 5, + diversity_level: float = 0.5, + language_model_type: TransformerType = TransformerType.STANDARD_TRANSFORMERS, + standard_pipeline_type: PipelineType = PipelineType.TEXT2TEXT_GENERATION, + standard_model_name: str = "google/flan-t5-large", + quantised_model_name: str = "TheBloke/zephyr-7B-beta-GGUF", + quantised_model_file: str = "zephyr-7b-beta.Q4_K_M.gguf", + quantised_model_type: str = "mistral", +) -> None: + try: + response = get_response( + query, + embedding_model, + database_directory, + search_type, + number_of_documents, + initial_number_of_documents, + diversity_level, + language_model_type, + standard_pipeline_type, + standard_model_name, + quantised_model_name, + quantised_model_file, + quantised_model_type, + ) + except FileNotFoundError as error: + typer.echo(message=str(error), err=True) + sys.exit(1) + else: + typer.echo(f"Query: {response.query}") + typer.echo(f"Answer: {response.answer}") + typer.echo(f"Duration: {response.llm_duration:.2f} seconds") + + for counter, source_document in enumerate(response.source_documents): + typer.echo(f"Source {counter + 1}: {source_document}") + + +if __name__ == "__main__": + CLI_APPLICATION() diff --git a/src/generative_ai/__init__.py b/src/generative_ai/__init__.py new file mode 100644 index 0000000..e2361cb --- /dev/null +++ b/src/generative_ai/__init__.py @@ -0,0 +1,23 @@ +import importlib.resources +import json +import typing + + +class PackageMetadata(typing.TypedDict): + Name: str + Version: str + Description: str + Keywords: list[str] + License: str + Maintainers: list[str] + Authors: list[str] + Links: dict[str, str] + + +METADATA_CONTENTS: str = ( + importlib.resources.files("generative_ai").joinpath("metadata.json").read_text() +) +METADATA: PackageMetadata = json.loads(METADATA_CONTENTS) + +__version__: str = METADATA["Version"] +__all__: list[str] = ["METADATA", "__version__"] diff --git a/src/generative_ai/dataset_generation/__init__.py b/src/generative_ai/dataset_generation/__init__.py new file mode 100644 index 0000000..ff9a691 --- /dev/null +++ b/src/generative_ai/dataset_generation/__init__.py @@ -0,0 +1,32 @@ +from .orchestrate_generation import ( + generate_json_dataset, + generate_raw_datasets, + load_json_dataset, + store_json_dataset, +) +from .step_1_generation import ( + get_all_member_details, + get_all_module_contents, + get_all_package_contents, +) +from .step_2_generation import ( + generate_member_dataset, + generate_module_dataset, + generate_package_dataset, +) +from .utils_generation import JSONDataset, JSONDocument + +__all__ = [ + "JSONDataset", + "JSONDocument", + "generate_json_dataset", + "generate_member_dataset", + "generate_module_dataset", + "generate_package_dataset", + "generate_raw_datasets", + "get_all_member_details", + "get_all_module_contents", + "get_all_package_contents", + "load_json_dataset", + "store_json_dataset", +] diff --git a/src/generative_ai/dataset_generation/orchestrate_generation.py b/src/generative_ai/dataset_generation/orchestrate_generation.py new file mode 100644 index 0000000..9b098a8 --- /dev/null +++ b/src/generative_ai/dataset_generation/orchestrate_generation.py @@ -0,0 +1,106 @@ +import itertools +import json +import logging +import pathlib + +import pydantic + +from .step_1_generation import ( + get_all_member_details, + get_all_module_contents, + get_all_package_contents, +) +from .step_2_generation import ( + generate_member_dataset, + generate_module_dataset, + generate_package_dataset, +) +from .utils_generation import Dataset, JSONDataset, JSONDocument, MemberDetails, Module + +LOGGER = logging.getLogger(__name__) + + +@pydantic.validate_call(validate_return=True) +def generate_raw_datasets(package_name: str) -> list[Dataset]: + all_package_contents = get_all_package_contents(package_name) + LOGGER.info(f"Enlisted total {len(all_package_contents)} packages recursively.") + + all_module_contents: list[Module] = [] + for package_contents in all_package_contents: + for module in package_contents.children_modules_names: + try: + module_contents = get_all_module_contents( + f"{package_contents.package_qualified_name}.{module}" + ) + except ImportError: + LOGGER.warning(f"Failed to import {module=}.") + + continue + + all_module_contents.append(module_contents) + + LOGGER.info(f"Enlisted total {len(all_module_contents)} modules recursively.") + + all_member_details: list[MemberDetails] = [] + for module_contents in all_module_contents: + for member in module_contents.module_members: + try: + member_details = get_all_member_details( + module_contents.module_qualified_name, member.member_name, member.member_object + ) + except (TypeError, ValueError): + continue + + all_member_details.append(member_details) + + LOGGER.info(f"Enlisted total {len(all_member_details)} members recursively.") + + package_datasets = map(generate_package_dataset, all_package_contents) + module_datasets = map(generate_module_dataset, all_module_contents) + member_datasets = map(generate_member_dataset, all_member_details) + + combined_datasets = itertools.chain(package_datasets, module_datasets, *member_datasets) + + return list(combined_datasets) + + +@pydantic.validate_call(validate_return=True) +def generate_json_dataset(raw_datasets: list[Dataset]) -> JSONDataset: + retrieval_documents: list[str] = [] + tuning_documents: list[JSONDocument] = [] + + for dataset in raw_datasets: + retrieval_documents.extend(dataset.retrieval_chunks) + + tuning_documents.extend( + [ + JSONDocument.model_validate(document.model_dump()) + for document in dataset.tuning_documents + ] + ) + + return JSONDataset.model_validate( + {"retrieval_documents": retrieval_documents, "tuning_documents": tuning_documents} + ) + + +@pydantic.validate_call +def store_json_dataset(json_dataset: JSONDataset, file_path: pathlib.Path) -> None: + with pathlib.Path(file_path).open(mode="w", encoding="utf-8") as file_object: + json.dump(json_dataset.model_dump(), file_object, indent=4) + + +@pydantic.validate_call(validate_return=True) +def load_json_dataset(file_path: pathlib.Path) -> JSONDataset: + with pathlib.Path(file_path).open(mode="r", encoding="utf-8") as file_object: + json_dataset = json.load(file_object) + + return JSONDataset.model_validate(json_dataset) + + +__all__ = [ + "generate_json_dataset", + "generate_raw_datasets", + "load_json_dataset", + "store_json_dataset", +] diff --git a/src/generative_ai/dataset_generation/step_1_generation.py b/src/generative_ai/dataset_generation/step_1_generation.py new file mode 100644 index 0000000..0cbfc51 --- /dev/null +++ b/src/generative_ai/dataset_generation/step_1_generation.py @@ -0,0 +1,291 @@ +import enum +import importlib +import importlib.util +import inspect +import logging +import pkgutil +import types +import typing + +import pydantic +from numpydoc.docscrape import NumpyDocString + +from .utils_generation import ( + Attribute, + ClassDetails, + EnumDetails, + EnumMember, + FunctionDetails, + MemberDetails, + MemberType, + Method, + Module, + ModuleMember, + Package, + Parameter, + Raises, + Returns, + Warns, +) + +LOGGER = logging.getLogger(__name__) + + +@pydantic.validate_call(validate_return=True) +def import_package(package_name: str) -> pydantic.InstanceOf[types.ModuleType]: + package_spec = importlib.util.find_spec(package_name) + + if package_spec is None: + LOGGER.error(f"spec for {package_name=} could not be found") + + raise ValueError(f"{package_name=} is not found") + + package = importlib.util.module_from_spec(package_spec) + + return package + + +@pydantic.validate_call(validate_return=True) +def get_all_package_contents(package_name: str) -> list[Package]: + package_contents = [] + + sub_packages_stack: list[tuple[str, str | None]] = [(package_name, None)] + + while sub_packages_stack: + current_package_name, parent_package_name = sub_packages_stack.pop() + + current_package_hierarchy = current_package_name.split(".") + + try: + current_package_loader = import_package(current_package_name) + except ImportError: + LOGGER.warning(f"{current_package_name=} could not be imported") + + continue + + try: + current_package = importlib.import_module(current_package_name) + except ImportError: + LOGGER.warning(f"{current_package_name=} could not be imported") + + continue + + current_package_sub_packages = [] + current_package_modules = [] + + for _, name, ispkg in pkgutil.walk_packages( + path=current_package_loader.__path__, prefix=f"{current_package_loader.__name__}." + ): + if "tests" in name: + continue + + if "." in name.removeprefix(f"{current_package_name}."): + continue + + if ispkg: + current_package_sub_packages.append(name) + else: + current_package_modules.append(name) + + package_contents.append( + Package( + package_name=current_package_hierarchy[-1], + package_qualified_name=current_package_name, + package_hierarchy=current_package_hierarchy, + parent_package_name=parent_package_name, + children_sub_packages_names=[ + sub_package.removeprefix(f"{current_package_name}.") + for sub_package in current_package_sub_packages + ], + children_modules_names=[ + module.removeprefix(f"{current_package_name}.") + for module in current_package_modules + ], + package_summary=getattr(current_package, "__doc__", None), + package_all_exports=getattr(current_package, "__all__", None), + ) + ) + + for sub_package_name in current_package_sub_packages: + sub_packages_stack.append((sub_package_name, current_package_name)) # noqa: PERF401 + + return package_contents + + +@pydantic.validate_call(validate_return=True) +def get_all_module_contents(module_name: str) -> Module: + module_hierarchy = module_name.split(".") + + module = importlib.import_module(module_name) + + module_contents = inspect.getmembers( + module, predicate=lambda member: inspect.getmodule(member) == module + ) + + return Module( + module_name=module_hierarchy[-1], + module_qualified_name=module_name, + module_hierarchy=module_hierarchy, + package_name=".".join(module_hierarchy[:-1]), + module_members=[ + ModuleMember(member_name=member[0], member_object=member[1]) + for member in module_contents + ], + module_summary=inspect.getdoc(module), + module_all_exports=getattr(importlib.import_module(module_name), "__all__", None), + ) + + +@pydantic.validate_call(validate_return=True) +def get_all_parameters_details( + signature: pydantic.InstanceOf[inspect.Signature], + docstring: pydantic.InstanceOf[NumpyDocString], +) -> list[Parameter]: + parameter_signature = { + parameter.name: { + "parameter_default": parameter.default, + "parameter_annotation": parameter.annotation, + "parameter_kind": parameter.kind.description, + } + for _, parameter in signature.parameters.items() + } + parameter_docstring = { + parameter.name: { + "parameter_annotation": parameter.type, + "parameter_summary": " ".join(parameter.desc), + } + for parameter in docstring["Parameters"] + } + + parameter_details = [ + Parameter.model_validate( + { + "parameter_name": parameter_name, + "parameter_default": parameter_signature_details["parameter_default"], + "parameter_annotation": parameter_docstring.get(parameter_name, {}).get( + "parameter_annotation", None + ) + or parameter_signature_details["parameter_annotation"], + "parameter_kind": parameter_signature_details["parameter_kind"], + "parameter_summary": parameter_docstring.get(parameter_name, {}).get( + "parameter_summary", None + ), + } + ) + for parameter_name, parameter_signature_details in parameter_signature.items() + ] + + return parameter_details + + +@pydantic.validate_call(validate_return=True) +def get_all_returns_details( + signature: pydantic.InstanceOf[inspect.Signature], + docstring: pydantic.InstanceOf[NumpyDocString], +) -> Returns: + returns_signature = signature.return_annotation + + if not docstring["Returns"]: + return Returns(returns_annotation=returns_signature) + + returns_docstring = next( + {"returns_annotation": returns.type, "returns_summary": " ".join(returns.desc)} + for returns in docstring["Returns"] + ) + + return Returns( + returns_annotation=returns_docstring.get("returns_annotation", None) or returns_signature, + returns_summary=returns_docstring.get("returns_summary", None), + ) + + +@pydantic.validate_call(validate_return=True) +def get_all_member_details( + module_name: str, member_name: str, member_object: typing.Any # noqa: ANN401 +) -> MemberDetails: + member_hierarchy = [*module_name.split("."), member_name] + + member_details: dict[str, typing.Any] = { + "member_name": member_name, + "member_qualified_name": ".".join(member_hierarchy), + "member_hierarchy": member_hierarchy, + "member_module": member_hierarchy[-2], + } + + member_details["member_docstring"] = inspect.getdoc(member_object) or "" + parsed_docstring = NumpyDocString(member_details["member_docstring"]) + + if isinstance(member_object, enum.EnumType): + member_details["member_type_details"] = EnumDetails( + member_type=MemberType.ENUM, + enum_members=[ + EnumMember(enum_member_name=enum_member.name, enum_member_value=enum_member.value) + for enum_member in member_object + ], + ) + elif inspect.isclass(member_object): + member_details["member_type_details"] = ClassDetails( + member_type=MemberType.CLASS, + class_parameters=get_all_parameters_details( + inspect.signature(member_object), parsed_docstring + ), + class_methods=[ + Method( + method_name=method[0], + method_parameters=[ + parameter + for parameter, _ in inspect.signature(method[1]).parameters.items() + ], + method_summary=inspect.getdoc(method[1]), + ) + for method in inspect.getmembers(member_object, predicate=inspect.ismethod) + if not method[0].startswith("_") + ], + class_attributes=[ + Attribute(attribute_name=attribute[0]) + for attribute in inspect.getmembers( + member_object, + predicate=lambda member: not inspect.ismethod(member) and not callable(member), + ) + if not attribute[0].startswith("_") + ], + class_summary=" ".join( + parsed_docstring["Summary"] + parsed_docstring["Extended Summary"] + ), + class_notes=" ".join(parsed_docstring["See Also"] + parsed_docstring["Notes"]), + ) + elif callable(member_object): + member_details["member_type_details"] = FunctionDetails( + member_type=MemberType.FUNCTION, + function_parameters=get_all_parameters_details( + inspect.signature(member_object), parsed_docstring + ), + function_returns=get_all_returns_details( + inspect.signature(member_object), parsed_docstring + ), + function_summary=" ".join( + parsed_docstring["Summary"] + parsed_docstring["Extended Summary"] + ), + function_raises=[ + Raises(raises_type=raises.type, raises_summary=" ".join(raises.desc)) + for raises in parsed_docstring["Raises"] + ], + function_warns=[ + Warns(warns_type=warns.type, warns_summary=" ".join(warns.desc)) + for warns in parsed_docstring["Warns"] + ], + function_notes="".join(parsed_docstring["Notes"]), + function_references="".join(parsed_docstring["References"]), + function_examples="".join(parsed_docstring["Examples"]), + ) + + return MemberDetails.model_validate(member_details) + + +__all__ = [ + "get_all_member_details", + "get_all_module_contents", + "get_all_package_contents", + "get_all_parameters_details", + "import_package", +] diff --git a/src/generative_ai/dataset_generation/step_2_generation.py b/src/generative_ai/dataset_generation/step_2_generation.py new file mode 100644 index 0000000..99debe8 --- /dev/null +++ b/src/generative_ai/dataset_generation/step_2_generation.py @@ -0,0 +1,2787 @@ +import inspect +import logging +import random + +import pydantic + +from .utils_generation import ( + ClassDetails, + Dataset, + EnumDetails, + FunctionDetails, + MemberDetails, + MemberType, + Module, + Package, +) + +random.seed(a=0) + +LOGGER = logging.getLogger(__name__) + + +@pydantic.validate_call(validate_return=True) +def enumerate_array_elements(array: list, attribute: str | None = None) -> str: + elements = [] + for element in array: + if isinstance(element, str): + elements.append(element) + elif attribute is not None: + elements.append(getattr(element, attribute)) + else: + LOGGER.error(f"Received {attribute=} along with {array=}") + + raise ValueError("attribute must be non-null if array elements are not string") + + return " ".join(f"{counter + 1}. {element}" for counter, element in enumerate(elements)) + + +@pydantic.validate_call(validate_return=True) +def generate_package_dataset(package_contents: Package) -> Dataset: # noqa: PLR0915 + package_name = package_contents.package_name + package_full_name = package_contents.package_qualified_name + + package = f"'{package_name}' package" + + package_retrieval_chunks: list[str] = [f"'{package_name}' is a Python package."] + package_tuning_pairs: list[tuple[str, str]] = [] + + if (parent_package := package_contents.parent_package_name) is None: + root_package_pairs = [ + ("What is the root package?", f"'{package_name}' is the root package."), + ( + "Can you tell me what the root package is?", + f"Sure, the root package is '{package_name}'.", + ), + ( + "I'm trying to find out the root package. Can you help?", + f"Of course, the root package is '{package_name}'.", + ), + ( + "Do you know what the root package is?", + f"Yes, the root package is '{package_name}'.", + ), + ( + "I'd like to know the root package.", + f"The root package you're asking about is '{package_name}'.", + ), + ( + "Could you identify the root package?", + f"Certainly, '{package_name}' is the root package.", + ), + ] + package_retrieval_chunks.append(f"'{package_name}' is the root package.") + package_tuning_pairs.extend(root_package_pairs) + + parent_package_pairs = [ + ( + f"Name parent package of '{package_name}'.", + f"Being the root package, '{package_name}' has no parent package.", + ), + ( + f"What is the parent package of '{package_name}'?", + f"The root package '{package_name}' does not have a parent package.", + ), + ( + f"Can you tell me the parent package of '{package_name}'?", + f"'{package_name}' is a root package and therefore," + " it does not have a parent package.", + ), + ( + f"Could you identify the parent package of '{package_name}'?", + f"As a root package, '{package_name}' does not possess a parent package.", + ), + ( + f"I'm looking for the parent package of '{package_name}'. Can you help?", + f"Sure, '{package_name}' is a root package, so it doesn't have a parent package.", + ), + ( + f"Do you know the parent package of '{package_name}'?", + f"Yes, '{package_name}' is a root package and hence," + " it doesn't have a parent package.", + ), + ] + package_retrieval_chunks.append(f"'{package_name}' has no parent package.") + package_tuning_pairs.extend(parent_package_pairs) + else: + parent_package_pairs = [ + ( + f"Name parent package of '{package_name}' sub-package.", + f"'{parent_package}' is the full name of its parent package.", + ), + ( + f"What is the parent package of the '{package_name}' sub-package?", + f"The parent package of '{package_name}' is '{parent_package}'.", + ), + ( + f"Could you tell me the parent package of '{package_name}'?", + f"Sure, the parent package of '{package_name}' is '{parent_package}'.", + ), + ( + f"I need to know the parent package of '{package_name}'.", + f"The parent package of '{package_name}' is '{parent_package}'.", + ), + ( + f"Identify the parent package for the '{package_name}' sub-package.", + f"The parent package for '{package_name}' is identified as '{parent_package}'.", + ), + ( + f"Can you name the parent package of the '{package_name}' sub-package?", + f"Yes, the parent package of '{package_name}' is '{parent_package}'.", + ), + ] + package_retrieval_chunks.append( + f"'{package_name}' is part of parent package '{parent_package}'." + ) + package_tuning_pairs.extend(parent_package_pairs) + + package_full_name_pairs = [ + ( + f"Tell the full name of '{package_name}' sub-package.", + f"'{package_full_name}' is the fully qualified name of '{package_name}'.", + ), + ( + f"What is the fully qualified name of the '{package_name}' sub-package?", + f"Fully qualified name of '{package_name}' sub-package is '{package_full_name}'.", + ), + ( + f"Could you provide the full name of the '{package_name}' sub-package?", + f"Sure, the full name of '{package_name}' sub-package is '{package_full_name}'.", + ), + ( + f"I need the full name of the '{package_name}' sub-package. Can you tell me?", + f"Of course, full name of '{package_name}' sub-package is '{package_full_name}'.", + ), + ( + f"Can you inform me about the full name of the '{package_name}' sub-package?", + f"Certainly, full name of '{package_name}' sub-package is '{package_full_name}'.", + ), + ( + f"Please, reveal the full name of the '{package_name}' sub-package.", + f"Absolutely, full name of '{package_name}' sub-package is '{package_full_name}'.", + ), + ] + package_retrieval_chunks.append( + f"Full name of '{package_name}' sub-package is '{package_full_name}'." + ) + package_tuning_pairs.extend(package_full_name_pairs) + + package_hierarchy = enumerate_array_elements(package_contents.package_hierarchy) + package_hierarchy_pairs = [ + ( + f"What is the hierarchy of {package}?", + f"The hierarchy of {package} is as follows: {package_hierarchy}.", + ), + ( + f"Can you explain the hierarchy of the {package}?", + f"Sure, the hierarchy of the {package} is: {package_hierarchy}.", + ), + ( + f"Could you describe the structure of the {package}?", + f"Of course, the structure of {package} is: {package_hierarchy}.", + ), + ( + f"I need to understand the hierarchy of {package}. Can you help?", + f"Absolutely, the hierarchy of {package} is: {package_hierarchy}.", + ), + ( + f"Please provide the hierarchy of the {package}.", + f"The hierarchy of the {package} is: {package_hierarchy}.", + ), + ( + f"I'm interested in the structure of the {package}. What is it?", + f"The structure of {package} is as follows: {package_hierarchy}.", + ), + ] + package_retrieval_chunks.append( + f"Hierarchy of {package} is as follows: {package_hierarchy}." + ) + package_tuning_pairs.extend(package_hierarchy_pairs) + + if not (children_sub_packages := package_contents.children_sub_packages_names): + package_sub_package_pairs = [ + ( + f"List the sub-packages of {package}.", + f"{package} does not have any further sub-packages.", + ), + ( + f"What are the sub-packages of the {package}?", + f"The {package} does not contain any sub-packages.", + ), + ( + f"Could you tell me the sub-packages of {package}?", + f"I'm sorry, but the {package} doesn't have any sub-packages.", + ), + ( + f"I need to know the sub-packages of {package}. Can you list them?", + f"Unfortunately, {package} doesn't include any sub-packages.", + ), + ( + f"Can you provide a list of sub-packages for the {package}?", + f"There are no sub-packages in the {package}.", + ), + ( + f"Identify the sub-packages of {package}.", + f"No sub-packages are present in the {package}.", + ), + ] + package_retrieval_chunks.append(f"{package} does not have any further sub-packages.") + package_tuning_pairs.extend(package_sub_package_pairs) + else: + children_sub_packages_count = len(children_sub_packages) + children_sub_packages_count_pairs = [ + ( + f"How many sub-packages are there in {package}?", + f"{package} has {children_sub_packages_count} many sub-packages.", + ), + ( + f"What is the count of sub-packages in {package}?", + f"The count of sub-packages in {package} is {children_sub_packages_count}.", + ), + ( + f"Could you tell me the number of sub-packages available in {package}?", + f"{package} has {children_sub_packages_count} sub-packages.", + ), + ( + f"Please provide the count of sub-packages for {package}.", + f"Number of sub-packages in {package} is {children_sub_packages_count}.", + ), + ( + f"Tell me the quantity of sub-packages present in {package}.", + f"{package} has {children_sub_packages_count} sub-packages.", + ), + ( + f"Would you mind letting me know how many sub-packages {package} contains?", + f"{package} contains {children_sub_packages_count} sub-packages.", + ), + ] + package_retrieval_chunks.append( + f"{package} has {children_sub_packages_count} many sub-packages." + ) + package_tuning_pairs.extend(children_sub_packages_count_pairs) + + package_sub_packages = enumerate_array_elements(children_sub_packages) + package_sub_package_pairs = [ + ( + f"List the sub-packages of {package}.", + f"Sub-packages of {package} are as follows: {package_sub_packages}.", + ), + ( + f"What are the sub-packages of the {package}?", + f"The {package} has the following sub-packages: {package_sub_packages}.", + ), + ( + f"Could you tell me the sub-packages of {package}?", + f"Sure, the sub-packages of {package} are: {package_sub_packages}.", + ), + ( + f"I need to know the sub-packages of {package}. Can you list them?", + f"Of course, the sub-packages of {package} are: {package_sub_packages}.", + ), + ( + f"Please provide the sub-packages of {package}.", + f"The sub-packages of {package} are: {package_sub_packages}.", + ), + ( + f"Can you enumerate the sub-packages of {package}?", + f"Certainly, the sub-packages of {package} are: {package_sub_packages}.", + ), + ] + package_retrieval_chunks.append( + f"Sub-packages of {package} are as follows: {package_sub_packages}." + ) + package_tuning_pairs.extend(package_sub_package_pairs) + + if not (children_modules := package_contents.children_modules_names): + package_module_pairs = [ + ( + f"What are the modules of {package}?", + f"{package} does not have any direct modules under itself.", + ), + ( + f"Can you list the modules under the {package}?", + f"There are no direct modules under the {package}.", + ), + ( + f"Does the {package} contain any modules?", + f"No, the {package} does not contain any direct modules.", + ), + ( + f"I'm looking for the modules of {package}. Can you help?", + f"I'm sorry, but {package} does not have any direct modules.", + ), + ( + f"Tell me about the modules of {package}.", + f"Actually, the {package} does not have any direct modules.", + ), + ( + f"Are there any modules under the {package}?", + f"No, there aren't any direct modules under the {package}.", + ), + ] + package_retrieval_chunks.append(f"{package} does not have any further modules.") + package_tuning_pairs.extend(package_module_pairs) + else: + children_modules_count = len(children_modules) + children_modules_count_pairs = [ + ( + f"How many modules are there in {package}?", + f"{package} has {children_modules_count} many modules.", + ), + ( + f"What is the count of modules in {package}?", + f"The count of modules in {package} is {children_modules_count}.", + ), + ( + f"Could you tell me the number of modules available in {package}?", + f"{package} has {children_modules_count} modules.", + ), + ( + f"Please provide the count of modules for {package}.", + f"The number of modules in {package} is {children_modules_count}.", + ), + ( + f"Tell me the quantity of modules present in {package}.", + f"{package} has {children_modules_count} modules.", + ), + ( + f"Would you mind letting me know how many modules {package} contains?", + f"{package} contains {children_modules_count} modules.", + ), + ] + package_retrieval_chunks.append(f"{package} has {children_modules_count} many modules.") + package_tuning_pairs.extend(children_modules_count_pairs) + + package_modules = enumerate_array_elements(children_modules) + package_module_pairs = [ + ( + f"What are the modules of {package}?", + f"Direct modules under {package} are as follows: {package_modules}.", + ), + ( + f"Can you list the modules of the {package}?", + f"Sure, the direct modules under {package} are: {package_modules}.", + ), + ( + f"I need to know the modules of the {package}.", + f"The modules you're looking for in {package} are: {package_modules}.", + ), + ( + f"Could you tell me what the modules of the {package} are?", + f"Of course, the modules under {package} are: {package_modules}.", + ), + ( + f"I'm interested in the modules of the {package}.", + f"The modules in {package} are: {package_modules}.", + ), + ( + f"What modules does the {package} contain?", + f"The {package} contains these modules: {package_modules}.", + ), + ] + package_retrieval_chunks.append(f"Modules of {package} are as follows: {package_modules}.") + package_tuning_pairs.extend(package_module_pairs) + + if not (package_summary := package_contents.package_summary): + package_summary_pairs = [ + (f"What does {package} do?", f"{package} does not have any documentation."), + ( + f"Can you tell me the functionality of the {package}?", + f"Unfortunately, the {package} provides no documentation.", + ), + ( + f"I'm curious about what the {package} does. Can you enlighten me?", + f"I'm sorry, but the {package} does not come with any documentation.", + ), + ( + f"Could you explain the purpose of the {package}?", + f"Regrettably, the {package} lacks any form of documentation.", + ), + ( + f"What's the role of the {package}?", + f"The {package} does not offer any documentation.", + ), + ( + f"What functionality does the {package} provide?", + f"The {package} does not have any available documentation.", + ), + ] + package_retrieval_chunks.append( + f"Unfortunately, {package} currently does not have any documentation." + ) + package_tuning_pairs.extend(package_summary_pairs) + else: + package_summary_pairs = [ + (f"What does {package} do?", f"Its documentation is as follows: '{package_summary}'."), + ( + f"Can you tell me about the {package}?", + f"Sure, here is its documentation: '{package_summary}'.", + ), + ( + f"I'd like to know what the {package} does.", + f"Of course, here's the documentation for it: '{package_summary}'.", + ), + ( + f"Could you explain the functionality of the {package}?", + f"Absolutely, the documentation states: '{package_summary}'.", + ), + ( + f"What's the purpose of the {package}?", + f"The purpose is described in its documentation: '{package_summary}'.", + ), + ( + f"I'm curious about the {package}, what does it do?", + f"Good question, its documentation reads: '{package_summary}'.", + ), + ] + package_retrieval_chunks.append( + f"The following is the documentation of {package}: '{package_summary}'." + ) + package_tuning_pairs.extend(package_summary_pairs) + + if not (package_exports := package_contents.package_all_exports): + package_members_pairs = [ + ( + f"What are the public members of the {package}?", + f"{package} does not have any public member exported through '__all__'.", + ), + ( + f"Can you list the public members of the {package}?", + f"The {package} does not export any public members through '__all__'.", + ), + ( + f"Are there any public members in the {package}?", + f"No, the {package} does not have any public members exported through '__all__'.", + ), + ( + f"I'm looking for public members of {package}. Can you help?", + f"Sure, but the {package} does not export any public members through '__all__'.", + ), + ( + f"Could you tell me the public members of the {package}?", + f"Unfortunately, the {package} does not have any public members" + " exported through '__all__'.", + ), + ( + f"I'd like to know the public members of the {package}." + " Can you provide that information?", + f"I'm sorry, but the {package} does not have any public members" + " exported through '__all__'.", + ), + ] + package_retrieval_chunks.append( + f"{package} does not export anything publicly using __all__ variable." + ) + package_tuning_pairs.extend(package_members_pairs) + else: + package_exports_count = len(package_exports) + package_exports_count_pairs = [ + ( + f"How many objects does {package} export publicly?", + f"{package} exports {package_exports_count} many objects using __all__.", + ), + ( + f"What is the count of publicly exported objects in {package}?", + f"Count of publicly exported objects in {package} is {package_exports_count}.", + ), + ( + f"Could you tell me the number of objects publicly exported by {package}?", + f"{package} exports {package_exports_count} objects using __all__.", + ), + ( + f"Please provide the count of objects publicly exported by {package}.", + f"Number of objects publicly exported by {package} is {package_exports_count}.", + ), + ( + f"Tell me the quantity of objects that {package} exports publicly.", + f"{package} exports {package_exports_count} objects using __all__.", + ), + ( + f"Would you mind letting me know how many objects {package} publicly exports?", + f"{package} publicly exports {package_exports_count} objects.", + ), + ] + package_retrieval_chunks.append( + f"{package} has {package_exports_count} many public exports." + ) + package_tuning_pairs.extend(package_exports_count_pairs) + + package_public_members = enumerate_array_elements(package_exports) + package_members_pairs = [ + ( + f"What are the public members of the {package}?", + f"{package} publicly exports the following members using '__all__':" + f" {package_public_members}.", + ), + ( + f"Can you list the public members of the {package}?", + f"Sure, the {package} publicly exports these members using '__all__':" + f" {package_public_members}.", + ), + ( + f"I need to know the public members of the {package}. Can you tell me?", + f"Of course, the {package} publicly exports these members using '__all__':" + f" {package_public_members}.", + ), + ( + f"Could you tell me what the {package} publicly exports?", + f"The {package} publicly exports the following members using '__all__':" + f" {package_public_members}.", + ), + ( + f"I'm interested in the public members of the {package}. What are they?", + f"The {package} publicly exports these members using '__all__':" + f" {package_public_members}.", + ), + ] + package_retrieval_chunks.append( + f"{package} exports following public members using __all__: {package_public_members}." + ) + package_tuning_pairs.extend(package_members_pairs) + + package_dataset = Dataset( + retrieval_chunks=package_retrieval_chunks, tuning_pairs=package_tuning_pairs + ) + + return package_dataset + + +@pydantic.validate_call(validate_return=True) +def generate_module_dataset(module_members: Module) -> Dataset: + module_name = module_members.module_name + module_full_name = module_members.module_qualified_name + module = f"'{module_name}' module" + + module_retrieval_chunks: list[str] = [f"'{module_name}' is a Python module."] + module_tuning_pairs: list[tuple[str, str]] = [] + + module_package_pairs = [ + ( + f"Can you tell the the parent package of {module}?", + f"'{module_members.package_name}' is the parent package of {module}.", + ), + ( + f"What is the parent package of the {module}?", + f"The parent package of {module} is '{module_members.package_name}'.", + ), + ( + f"I'm trying to find the parent package of the {module}. Can you help?", + f"Sure, parent package of {module} is '{module_members.package_name}'.", + ), + ( + f"Could you inform me about the parent package of the {module}?", + f"Certainly, '{module_members.package_name}' is the parent package of the {module}.", + ), + ( + f"I need to know the parent package of {module}. Can you provide that information?", + f"Absolutely, the parent package of the {module} is '{module_members.package_name}'.", + ), + ( + f"Can you identify the parent package for the {module}?", + f"Yes, parent package for {module} is '{module_members.package_name}'.", + ), + ] + module_retrieval_chunks.append( + f"{module} is part of parent package '{module_members.package_name}'." + ) + module_tuning_pairs.extend(module_package_pairs) + + module_full_name_pairs = [ + ( + f"Specify the full name of {module}?", + f"'{module_full_name}' is fully qualified name for {module}.", + ), + ( + f"What is the fully qualified name for the {module}?", + f"The fully qualified name for the {module} is '{module_full_name}'.", + ), + ( + f"Could you tell me the full name of the {module}?", + f"Sure, the full name of the {module} is '{module_full_name}'.", + ), + ( + f"I need the full name of the {module}. Can you provide it?", + f"Of course, the full name of the {module} is '{module_full_name}'.", + ), + ( + f"Can you specify the fully qualified name of the {module}?", + f"Yes, fully qualified name of the {module} is '{module_full_name}'.", + ), + ( + f"I'm looking for the full name of the {module}. What is it?", + f"Full name of the {module} you're looking for is '{module_full_name}'.", + ), + ] + module_retrieval_chunks.append(f"Full name of {module} is '{module_full_name}'.") + module_tuning_pairs.extend(module_full_name_pairs) + + module_hierarchy = enumerate_array_elements(module_members.module_hierarchy) + module_hierarchy_pairs = [ + ( + f"What is the hierarchy of {module}?", + f"The hierarchy of {module} is as follows: {module_hierarchy}.", + ), + ( + f"Can you explain the hierarchy of the {module}?", + f"Sure, the hierarchy of the {module} is: {module_hierarchy}.", + ), + ( + f"Could you describe the structure of the {module}?", + f"Of course, the structure of the {module} is: {module_hierarchy}.", + ), + ( + f"I need to understand the hierarchy of the {module}. Can you help?", + f"Absolutely, the hierarchy of the {module} is: {module_hierarchy}.", + ), + ( + f"Please provide the hierarchy of the {module}.", + f"The hierarchy of the {module} is: {module_hierarchy}.", + ), + ( + f"What does the hierarchy of the {module} look like?", + f"The hierarchy of the {module} looks like this: {module_hierarchy}.", + ), + ] + module_retrieval_chunks.append(f"Hierarchy of {module} is as follows: {module_hierarchy}.") + module_tuning_pairs.extend(module_hierarchy_pairs) + + module_members_count = len(module_members.module_members) + module_members_count_pairs = [ + ( + f"How many members does {module} have?", + f"{module} has {module_members_count} many members.", + ), + ( + f"What is the count of members in {module}?", + f"The count of members in {module} is {module_members_count}.", + ), + ( + f"Could you tell me the number of members in {module}?", + f"{module} has {module_members_count} members.", + ), + ( + f"Please provide the count of members for {module}.", + f"The number of members in {module} is {module_members_count}.", + ), + ( + f"Tell me the quantity of members present in {module}.", + f"{module} has {module_members_count} members.", + ), + ( + f"Would you mind letting me know how many members {module} contains?", + f"{module} contains {module_members_count} members.", + ), + ] + module_retrieval_chunks.append(f"{module} has {module_members_count} many members.") + module_tuning_pairs.extend(module_members_count_pairs) + + module_member_names = enumerate_array_elements( + module_members.module_members, attribute="member_name" + ) + module_members_pairs = [ + ( + f"List the members of {module}.", + f"Members of {module} are as follows: {module_member_names}.", + ), + ( + f"What are the members of the {module}?", + f"The {module} has the following members: {module_member_names}.", + ), + ( + f"Can you tell me the members of the {module}?", + f"Sure, the members of the {module} are: {module_member_names}.", + ), + ( + f"I need to know the members of the {module}.", + f"Members of {module} you asked for are: {module_member_names}.", + ), + ( + f"Could you list the members of the {module}?", + f"Of course, members of the {module} are: {module_member_names}.", + ), + ( + f"Please provide the members of the {module}.", + f"Members of {module} you requested are: {module_member_names}.", + ), + ] + module_retrieval_chunks.append(f"Members of {module} are as follows: {module_member_names}.") + module_tuning_pairs.extend(module_members_pairs) + + if not (module_summary := module_members.module_summary): + module_summary_pairs = [ + (f"What is the {module} for?", f"{module} does not have any documentation."), + ( + f"Can you tell me the purpose of the {module}?", + f"The {module} lacks any documentation.", + ), + ( + f"I'd like to know what the {module} is used for.", + f"Unfortunately, there is no documentation for the {module}.", + ), + ( + f"Could you explain the function of the {module}?", + f"Regrettably, the {module} doesn't come with any documentation.", + ), + (f"What does the {module} do?", f"The {module} is without any documentation."), + ] + module_retrieval_chunks.append( + f"Unfortunately, {module} currently does not have any documentation." + ) + module_tuning_pairs.extend(module_summary_pairs) + else: + module_summary_pairs = [ + ( + f"What is the '{module_name}' module for?", + f"{module} documents itself as follows: '{module_summary}'.", + ), + ( + f"Can you tell me the purpose of the '{module_name}' module?", + f"Purpose of {module} is documented as: '{module_summary}'.", + ), + ( + f"I'm curious about the '{module_name}' module. What does it do?", + f"The {module} is described as: '{module_summary}'.", + ), + ( + f"Could you explain the functionality of the '{module_name}' module?", + f"The functionality of the {module} is described as: '{module_summary}'.", + ), + ( + f"I'd like to know more about the '{module_name}' module. What's its role?", + f"The role of the {module} is: '{module_summary}'.", + ), + ( + f"What's the use of the '{module_name}' module?", + f"Use of the {module} is documented as: '{module_summary}'.", + ), + ] + module_retrieval_chunks.append( + f"The following is the documentation of {module}: {module_summary}." + ) + module_tuning_pairs.extend(module_summary_pairs) + + if not (module_exports := module_members.module_all_exports): + module_exports_pairs = [ + ( + f"Tell me the public members of the {module}.", + f"{module} lacks any public member exported through '__all__'.", + ), + ( + f"What are the public members of the {module}?", + "There are no public members exported through '__all__' in the {module}.", + ), + ( + f"Could you list the public members of the {module}?", + f"Unfortunately, {module} does not export any public members through '__all__'.", + ), + ( + f"I need to know the public members of the {module}.", + f"The {module} does not have any public members exported through '__all__'.", + ), + ( + f"Can you show me the public members of the {module}?", + f"The {module} does not contain any public members exported through '__all__'.", + ), + ( + f"I'm interested in the public members of the {module}. What are they?", + f"{module} does not export any public members through '__all__'.", + ), + ] + module_retrieval_chunks.append( + f"{module} does not export anything publicly using __all__ variable." + ) + module_tuning_pairs.extend(module_exports_pairs) + else: + module_exports_count = len(module_exports) + module_exports_count_pairs = [ + ( + f"How many objects does {module} export publicly?", + f"{module} exports {module_exports_count} many objects using __all__.", + ), + ( + f"What is the count of publicly exported objects in {module}?", + f"The count of publicly exported objects in {module} is {module_exports_count}.", + ), + ( + f"Could you tell me the number of objects publicly exported by {module}?", + f"{module} exports {module_exports_count} objects using __all__.", + ), + ( + f"Please provide the count of objects publicly exported by {module}.", + f"The number of objects publicly exported by {module} is {module_exports_count}.", + ), + ( + f"Tell me the quantity of objects that {module} exports publicly.", + f"{module} exports {module_exports_count} objects using __all__.", + ), + ( + f"Would you mind letting me know how many objects {module} publicly exports?", + f"{module} publicly exports {module_exports_count} objects.", + ), + ] + module_retrieval_chunks.append(f"{module} has {module_exports_count} many public exports.") + module_tuning_pairs.extend(module_exports_count_pairs) + + module_public_exports = enumerate_array_elements(module_exports) + module_exports_pairs = [ + ( + f"Tell me the public members of the {module}.", + f"{module} publicly exports the following members using '__all__':" + f" {module_public_exports}.", + ), + ( + f"What are the public members of the {module}?", + f"The {module} publicly exports the following members using '__all__':" + f" {module_public_exports}.", + ), + ( + f"Could you list the public members of the {module}?", + f"Sure, the {module} publicly exports these members using '__all__':" + f" {module_public_exports}.", + ), + ( + f"I need to know the public members of the {module}.", + f"The {module} publicly exports these members using '__all__':" + f" {module_public_exports}.", + ), + ( + f"Can you show me the public members of the {module}?", + f"Of course, the {module} publicly exports the following members using '__all__':" + f" {module_public_exports}.", + ), + ] + module_retrieval_chunks.append( + f"{module} exports following members using __all__: {module_public_exports}." + ) + module_tuning_pairs.extend(module_exports_pairs) + + module_dataset = Dataset( + retrieval_chunks=module_retrieval_chunks, tuning_pairs=module_tuning_pairs + ) + + return module_dataset + + +@pydantic.validate_call(validate_return=True) +def generate_enum_member_dataset( + enum_member: str, enum_docstring: str, member_type_details: EnumDetails +) -> tuple[Dataset, list[str]]: + enum_member_retrieval_chunks: list[str] = [ + f"{enum_member} is a Python enum.", + f"{enum_member} has following docstring: {enum_docstring}.", + ] + enum_member_tuning_pairs: list[tuple[str, str]] = [] + + enum_member_count = len(member_type_details.enum_members) + enum_member_count_pairs = [ + ( + f"How many members are there in {enum_member}?", + f"{enum_member} has {enum_member_count} members.", + ), + ( + f"What is the count of members in {enum_member}?", + f"The count of members in {enum_member} is {enum_member_count}.", + ), + ( + f"Can you tell me the number of members in {enum_member}?", + f"Sure, the number of members in {enum_member} is {enum_member_count}.", + ), + ( + f"Could you provide the total number of members in {enum_member}?", + f"The total number of members in {enum_member} is {enum_member_count}.", + ), + ( + f"I need to know the quantity of members in {enum_member}.", + f"The quantity of members in {enum_member} is {enum_member_count}.", + ), + ( + f"Please inform me about the number of members in {enum_member}.", + f"The number of members in {enum_member} is {enum_member_count}.", + ), + ] + enum_member_retrieval_chunks.insert(-1, f"{enum_member} has {enum_member_count} many members.") + enum_member_tuning_pairs.extend(enum_member_count_pairs) + + enum_members = enumerate_array_elements( + member_type_details.enum_members, attribute="enum_member" + ) + enum_members_pairs = [ + ( + f"What are the different members of {enum_member}?", + f"Different members of {enum_member} are as follows: {enum_members}.", + ), + ( + f"Can you list the different members of {enum_member}?", + f"Sure, the different members of {enum_member} are: {enum_members}.", + ), + ( + f"Could you tell me the different members of {enum_member}?", + f"Of course, the different members of {enum_member} include: {enum_members}.", + ), + ( + f"I need to know the different members of {enum_member}.", + f"The different members of {enum_member} are: {enum_members}.", + ), + ( + f"What does {enum_member} consist of?", + f"{enum_member} consists of the following members: {enum_members}.", + ), + ] + enum_member_retrieval_chunks.insert( + -1, f"Members of {enum_member} are as follows: {enum_members}." + ) + enum_member_tuning_pairs.extend(enum_members_pairs) + + enum_member_names = enumerate_array_elements( + member_type_details.enum_members, attribute="enum_member_name" + ) + enum_member_names_pairs = [ + ( + f"List just the names of different members of {enum_member}.", + f"Different members of {enum_member} have the following names: {enum_member_names}.", + ), + ( + f"Can you provide the names of different members of {enum_member}?", + f"Sure, different members of {enum_member} are named as follows: {enum_member_names}.", + ), + ( + f"What are the names of different members of {enum_member}?", + f"The names of different members of {enum_member} are: {enum_member_names}.", + ), + ( + f"I need the names of different members of {enum_member}.", + f"The different members of {enum_member} have these names: {enum_member_names}.", + ), + ( + f"Could you list the names of different members of {enum_member}?", + f"Of course, different members of {enum_member} have these names:" + f" {enum_member_names}.", + ), + ( + f"Show me the names of different members of {enum_member}.", + f"The names of different members of {enum_member} are: {enum_member_names}.", + ), + ] + enum_member_retrieval_chunks.insert( + -1, f"Names of different members of {enum_member} are as follows: {enum_member_names}." + ) + enum_member_tuning_pairs.extend(enum_member_names_pairs) + + enum_member_values = enumerate_array_elements( + member_type_details.enum_members, attribute="enum_member_value" + ) + enum_member_values_pairs = [ + ( + f"Only show the different values supported by {enum_member}.", + f"{enum_member} supports the following values: {enum_member_values}.", + ), + ( + f"What are the different values that {enum_member} supports?", + f"The different values that {enum_member} supports are: {enum_member_values}.", + ), + ( + f"Can you list the values supported by {enum_member}?", + f"Sure, {enum_member} supports these values: {enum_member_values}.", + ), + ( + f"I need to know the values supported by {enum_member}.", + f"{enum_member} supports these values: {enum_member_values}.", + ), + ( + f"Could you tell me the values that {enum_member} supports?", + f"Of course, the values that {enum_member} supports are: {enum_member_values}.", + ), + ( + f"Please provide the values supported by {enum_member}.", + f"The values supported by {enum_member} are: {enum_member_values}.", + ), + ] + enum_member_retrieval_chunks.insert( + -1, f"Values of different members of {enum_member} are as follows: {enum_member_values}." + ) + enum_member_tuning_pairs.extend(enum_member_values_pairs) + + enum_member_dataset = Dataset( + retrieval_chunks=enum_member_retrieval_chunks, tuning_pairs=enum_member_tuning_pairs + ) + + return enum_member_dataset, enum_member_retrieval_chunks + + +@pydantic.validate_call(validate_return=True) +def generate_class_member_dataset( # noqa: C901, PLR0912, PLR0915 + class_member: str, class_docstring: str, member_type_details: ClassDetails +) -> tuple[Dataset, list[str]]: + class_member_retrieval_chunks: list[str] = [ + f"{class_member} is a Python class.", + f"{class_member} has following docstring: {class_docstring}.", + ] + class_member_tuning_pairs: list[tuple[str, str]] = [] + + if not (class_parameters := member_type_details.class_parameters): + class_parameters_pairs = [ + ( + f"What are the different parameters of {class_member}?", + f"{class_member} needs no arguments for instantiation.", + ), + ( + f"Can you tell me the parameters required for {class_member}?", + f"No parameters are required for instantiating {class_member}.", + ), + ( + f"What arguments do I need to instantiate {class_member}?", + f"You don't need any arguments to instantiate {class_member}.", + ), + ( + f"Do I need any parameters to use {class_member}?", + f"{class_member} can be used without any parameters.", + ), + ( + f"What should I pass as arguments when creating an instance of {class_member}?", + "There's no need to pass any arguments" + f" when creating an instance of {class_member}.", + ), + ( + f"Are there any parameters needed for the instantiation of {class_member}?", + f"The instantiation of {class_member} doesn't require any parameters.", + ), + ] + class_member_retrieval_chunks.append( + f"{class_member} requires no arguments for instantiation." + ) + class_member_tuning_pairs.extend(class_parameters_pairs) + else: + class_parameter_names = enumerate_array_elements( + class_parameters, attribute="parameter_details" + ) + class_parameters_pairs = [ + ( + f"What are the different parameters of {class_member}?", + f"{class_member} supports these arguments to initiate" + f" a new instance: {class_parameter_names}.", + ), + ( + f"Can you list the parameters for {class_member}?", + f"Sure, {class_member} can be initiated with these arguments:" + f" {class_parameter_names}.", + ), + ( + f"I need to know the parameters of {class_member}.", + f"The parameters to initiate a new instance of {class_member} are:" + f" {class_parameter_names}.", + ), + ( + f"Tell me the parameters that {class_member} supports.", + f"{class_member} can be initiated with these arguments: {class_parameter_names}.", + ), + ( + f"What arguments does {class_member} take for initialisation?", + f"To initialise {class_member}, you can use these arguments:" + f" {class_parameter_names}.", + ), + ] + class_member_retrieval_chunks.append( + f"{class_member} requires the following arguments for initialisation:" + f" {class_parameter_names}" + ) + class_member_tuning_pairs.extend(class_parameters_pairs) + + for class_parameter in class_parameters: + parameter_name = class_parameter.parameter_name + parameter = f"'{parameter_name}' argument in {class_member}" + + if (parameter_default := class_parameter.parameter_default) is inspect._empty: + class_parameter_defaults_pairs = [ + ( + f"Tell default value of {parameter}.", + f"{parameter} does not have a default value.", + ), + ( + f"What is the default value of {parameter}?", + f"The {parameter} does not have a default value.", + ), + ( + f"Could you inform me about default value of {parameter}?", + f"Sure, the {parameter} does not have a default value.", + ), + ( + f"I need to know the default value of {parameter}. Can you help?", + f"Of course, the {parameter} does not have a default value.", + ), + ( + f"Can you tell me if {parameter} has default value?", + f"No, the {parameter} does not have a default value.", + ), + ( + f"I'm curious about default value of {parameter}.", + f"Well, the {parameter} does not have a default value.", + ), + ] + class_member_retrieval_chunks.append(f"{parameter} does not have a default value.") + class_member_tuning_pairs.extend(class_parameter_defaults_pairs) + else: + class_parameter_defaults_pairs = [ + ( + f"Tell default value of {parameter}.", + f"{parameter} takes {parameter_default} by default.", + ), + ( + f"What is the default value of {parameter}?", + f"The default value of {parameter} is {parameter_default}.", + ), + ( + f"Could you inform me about default value of {parameter}?", + f"Sure, the default value of {parameter} is {parameter_default}.", + ), + ( + f"I need to know the default value of {parameter}.", + f"The default value of {parameter} is {parameter_default}.", + ), + ( + f"Can you provide default value of {parameter}?", + f"Yes, default value of {parameter} is {parameter_default}.", + ), + ( + f"Please, disclose default value of {parameter}.", + f"Certainly, the default value of {parameter} is {parameter_default}.", + ), + ] + class_member_retrieval_chunks.append( + f"{parameter_default} is the default value of {parameter}." + ) + class_member_tuning_pairs.extend(class_parameter_defaults_pairs) + + if (parameter_annotation := class_parameter.parameter_annotation) is inspect._empty: + class_parameter_types_pairs = [ + ( + f"Name type hint for {parameter}.", + f"{parameter} does not have a type annotation.", + ), + ( + f"What is the type hint for {parameter}?", + f"There is no type annotation for the {parameter}.", + ), + ( + f"Can you tell me the type hint for {parameter}?", + f"The {parameter} is not annotated with a type.", + ), + ( + f"I'm looking for the type hint for {parameter}. Can you help?", + f"Sure, the {parameter} does not have a type annotation.", + ), + ( + f"Could you provide the type hint for {parameter}?", + f"Unfortunately, {parameter} does not have type annotation.", + ), + ( + f"I need to know the type hint for {parameter}.", + f"The {parameter} does not come with a type annotation.", + ), + ] + class_member_retrieval_chunks.append(f"Type hint for {parameter} is unavailable.") + class_member_tuning_pairs.extend(class_parameter_types_pairs) + else: + class_parameter_types_pairs = [ + ( + f"Name type hint for {parameter}.", + f"{parameter} has '{parameter_annotation}' as type hint.", + ), + ( + f"What is the type hint for {parameter}?", + f"The type hint for {parameter} is '{parameter_annotation}'.", + ), + ( + f"Could you tell me the type hint for {parameter}?", + f"Sure, the type hint for {parameter} is '{parameter_annotation}'.", + ), + ( + f"I need to know the type hint for {parameter}.", + f"The type hint for {parameter} is '{parameter_annotation}'.", + ), + ( + f"Identify the type hint for {parameter}.", + f"The type hint for {parameter} is '{parameter_annotation}'.", + ), + ( + f"Can you specify the type hint for {parameter}?", + f"Yes, the type hint for {parameter} is '{parameter_annotation}'.", + ), + ] + class_member_retrieval_chunks.append( + f"{parameter} is annotated as '{parameter_annotation}' type." + ) + class_member_tuning_pairs.extend(class_parameter_types_pairs) + + if not (parameter_summary := class_parameter.parameter_summary): + class_parameter_summary_pairs = [ + ( + f"What does {parameter} do?", + f"Docstring of {class_member} does not describe '{parameter_name}'.", + ), + ( + f"Can you explain the role of {parameter}?", + f"The docstring of {class_member} does not provide any information about" + f" '{parameter_name}'.", + ), + ( + f"I'm trying to understand what {parameter} does. Can you help?", + f"Unfortunately, the docstring of {class_member} does not mention anything" + f" about '{parameter_name}'.", + ), + ( + f"What is the function of {parameter}?", + f"There is no description of '{parameter_name}' in the docstring of" + f" {class_member}.", + ), + ( + f"Could you tell me what '{parameter_name}' does in {class_member}?", + f"The docstring of {class_member} does not contain any details about" + f" '{parameter_name}'.", + ), + ( + f"I'm curious about the purpose of {parameter}. Can you enlighten me?", + f"I'm sorry, but the docstring of {class_member} does not discuss" + f" '{parameter_name}'.", + ), + ] + class_member_retrieval_chunks.append( + f"{parameter} lacks any documentation in the docstring." + ) + class_member_tuning_pairs.extend(class_parameter_summary_pairs) + else: + class_parameter_summary_pairs = [ + ( + f"What does {parameter} do?", + f"{class_member} documents role of '{parameter_name}' as follows:" + f" '{parameter_summary}'.", + ), + ( + f"Can you explain the role of {parameter}?", + f"Sure, {class_member} describes '{parameter_name}' as follows:" + f" '{parameter_summary}'.", + ), + ( + f"I'm curious about {parameter}. What does it do?", + f"In {class_member}, '{parameter_name}' is documented as follows:" + f" '{parameter_summary}'.", + ), + ( + f"Could you tell me what {parameter} does?", + f"Of course, {parameter} is described as follows: '{parameter_summary}'.", + ), + ( + f"What's the function of {parameter}?", + f"{class_member} describes the function of '{parameter_name}' as follows:" + f" '{parameter_summary}'.", + ), + ( + f"I'd like to know the purpose of {parameter}.", + f"In {class_member}, the purpose of '{parameter_name}' is defined as follows:" + f" '{parameter_summary}'.", + ), + ] + class_member_retrieval_chunks.append( + f"As per docstring, role of {parameter} is: '{parameter_summary}'." + ) + class_member_tuning_pairs.extend(class_parameter_summary_pairs) + + if not (class_methods := member_type_details.class_methods): + class_method_names_pairs = [ + ( + f"List names of the public methods of {class_member}.", + f"{class_member} does not have any public methods (not starting with '_').", + ), + ( + f"Can you provide the names of the public methods for {class_member}?", + f"Unfortunately, {class_member} does not have any public methods.", + ), + ( + f"What are the public methods of {class_member}?", + f"There are no public methods (not starting with '_') in {class_member}.", + ), + ( + f"I need to know the public methods of {class_member}. Can you list them?", + f"I'm sorry, but {class_member} does not have any public methods.", + ), + ( + f"Could you list the public methods of {class_member}?", + f"{class_member} does not contain any public methods (not starting with '_').", + ), + ( + f"Show me the public methods of {class_member}.", + f"It appears that {class_member} does not have any public methods.", + ), + ] + class_member_retrieval_chunks.append( + f"{class_member} has no public (without _ as the prefix) methods." + ) + class_member_tuning_pairs.extend(class_method_names_pairs) + else: + class_methods_count = len(class_methods) + class_methods_count_pairs = [ + ( + f"How many public methods does {class_member} have?", + f"{class_member} has {class_methods_count} many public methods.", + ), + ( + f"What is the count of public methods in {class_member}?", + f"The count of public methods in {class_member} is {class_methods_count}.", + ), + ( + f"Could you tell me the number of public methods in {class_member}?", + f"{class_member} has {class_methods_count} public methods.", + ), + ( + f"Please provide the count of public methods for {class_member}.", + f"The number of public methods in {class_member} is {class_methods_count}.", + ), + ( + f"Tell me the quantity of public methods present in {class_member}.", + f"{class_member} has {class_methods_count} public methods.", + ), + ( + f"Would you mind letting me know how many public methods {class_member} contains?", + f"{class_member} contains {class_methods_count} public methods.", + ), + ] + class_member_retrieval_chunks.append( + f"{class_member} has {class_methods_count} many public methods." + ) + class_member_tuning_pairs.extend(class_methods_count_pairs) + + class_public_methods = enumerate_array_elements(class_methods, attribute="method_name") + class_method_names_pairs = [ + ( + f"List names of the public methods of {class_member}.", + f"Here are the public methods of {class_member}: {class_public_methods}.", + ), + ( + f"Can you provide the names of the public methods for {class_member}?", + f"Sure, the public methods of {class_member} that do not start with '_' are:" + f" {class_public_methods}.", + ), + ( + f"What are the public methods of {class_member}?", + f"The public methods of {class_member} (excluding those starting with '_') are:" + f" {class_public_methods}.", + ), + ( + f"I need to know the public methods of {class_member}.", + f"The public methods of {class_member} (those not starting with '_') are:" + f" {class_public_methods}.", + ), + ( + f"Could you list the public methods of {class_member}?", + f"Of course, the public methods of {class_member} (not beginning with '_') are:" + f" {class_public_methods}.", + ), + ( + f"Please show me the public methods of {class_member}.", + f"Here you go, the public methods of {class_member}" + f" (excluding those with a prefix '_') are: {class_public_methods}.", + ), + ] + class_member_retrieval_chunks.append( + f"{class_member} has the following public methods: {class_public_methods}" + ) + class_member_tuning_pairs.extend(class_method_names_pairs) + + for class_method in class_methods: + method_name = class_method.method_name + method = f"'{method_name}' method of {class_member}" + + if not (method_parameters := class_method.method_parameters): + class_method_parameters_pairs = [ + (f"What arguments do {method} accept?", f"{method} does not take any parameters."), + ( + f"Can you tell me the parameters that {method} requires?", + f"The {method} does not require any parameters.", + ), + ( + f"What are the inputs for the {method} in {class_member}?", + f"There are no inputs for the {method} in {class_member}.", + ), + ( + f"Does the {method} need any arguments?", + f"No, {method} does not need any arguments.", + ), + ( + f"What parameters should I pass to {method}?", + f"You don't need to pass any parameters to the {method}.", + ), + ( + f"What are required arguments for {method}?", + f"{method} does not require any arguments.", + ), + ] + class_member_retrieval_chunks.append(f"{method} takes no arguments.") + class_member_tuning_pairs.extend(class_method_parameters_pairs) + else: + class_method_parameters = enumerate_array_elements(method_parameters) + class_method_parameters_pairs = [ + ( + f"What arguments do {method} accept?", + f"{method} takes the following parameters: {class_method_parameters}.", + ), + ( + f"Can you tell me the parameters that {method} requires?", + f"Sure, {method} requires these parameters: {class_method_parameters}.", + ), + ( + f"I need to know arguments for {method}.", + f"The {method} has these arguments: {class_method_parameters}.", + ), + ( + f"What are the parameters for '{method}'?", + f"The parameters for {method} are: {class_method_parameters}.", + ), + ( + f"Could you list the arguments that the {method} takes?", + f"Certainly, the {method} takes these arguments: {class_method_parameters}.", + ), + ] + class_member_retrieval_chunks.append( + f"{method} accepts following parameters: {class_method_parameters}" + ) + class_member_tuning_pairs.extend(class_method_parameters_pairs) + + if not (method_summary := class_method.method_summary): + class_method_summary_pairs = [ + (f"What does {method} do?", f"Docstring of {method} is missing."), + ( + f"Can you explain functionality of {method}?", + f"The docstring for {method} is not available.", + ), + ( + f"I'm trying to understand what {method} does. Can you help?", + f"Unfortunately, the docstring for {method} is not provided.", + ), + ( + f"Could you describe the role of {method}?", + f"There is no docstring available for {method}.", + ), + ( + f"I'm not sure what {method} does. Can you clarify?", + f"The {method} lacks a docstring.", + ), + (f"What's the purpose of {method}?", f"The {method} doesn't have a docstring."), + ] + class_member_retrieval_chunks.append(f"Unfortunately, {method} is not documented.") + class_member_tuning_pairs.extend(class_method_summary_pairs) + else: + class_method_summary_pairs = [ + ( + f"What does {method} do?", + f"Based on method docstring, its role is to '{method_summary}'.", + ), + ( + f"Can you explain the function of {method}?", + f"Sure, according to method docstring, it is designed to '{method_summary}'.", + ), + ( + f"I'm curious about the {method}. What's its purpose?", + f"Well, if we look at the docstring of {method}, we can see that it's meant to" + f" '{method_summary}'.", + ), + ( + f"Could you tell me what the {method} does?", + f"Of course, the docstring of {method} indicates that its function is to" + f" '{method_summary}'.", + ), + ( + f"I'd like to understand role of {method}.", + f"Certainly, method docstring reveals that its job is to '{method_summary}'.", + ), + ( + f"What's the functionality of the {method}?", + f"As per the method docstring, it's designed to '{method_summary}'.", + ), + ] + class_member_retrieval_chunks.append( + f"Based on docstring, {method} has the purpose of '{method_summary}'." + ) + class_member_tuning_pairs.extend(class_method_summary_pairs) + + if not (class_attributes := member_type_details.class_attributes): + class_attribute_names_pairs = [ + ( + f"Are there any public attributes of {class_member}?", + f"{class_member} has no public attributes (not starting with '_').", + ), + ( + f"Does {class_member} have any public attributes?", + f"No, {class_member} does not have any public attributes.", + ), + ( + f"Can you tell me if {class_member} has any public attributes?", + f"{class_member} does not have any public attributes (not starting with '_').", + ), + ( + f"I'm looking for public attributes of {class_member}. Are there any?", + f"There are no public attributes (not starting with '_') for {class_member}.", + ), + ( + f"Is it possible to find any public attributes in {class_member}?", + f"It's not possible to find any public attributes in {class_member}.", + ), + ] + class_member_retrieval_chunks.append(f"{class_member} has no public attributes.") + class_member_tuning_pairs.extend(class_attribute_names_pairs) + else: + class_attributes_count = len(class_attributes) + class_attributes_count_pairs = [ + ( + f"How many public attributes does {class_member} have?", + f"{class_member} has {class_attributes_count} many public attributes.", + ), + ( + f"What is the count of public attributes in {class_member}?", + f"The count of public attributes in {class_member} is {class_attributes_count}.", + ), + ( + f"Could you tell me the number of public attributes in {class_member}?", + f"{class_member} has {class_attributes_count} public attributes.", + ), + ( + f"Please provide the count of public attributes for {class_member}.", + f"Number of public attributes in {class_member} is {class_attributes_count}.", + ), + ( + f"Tell me the quantity of public attributes present in {class_member}.", + f"{class_member} has {class_attributes_count} public attributes.", + ), + ( + f"Would you mind letting me know how many public attributes {class_member}" + " contains?", + f"{class_member} contains {class_attributes_count} public attributes.", + ), + ] + class_member_retrieval_chunks.append( + f"{class_member} has {class_attributes_count} many public attributes." + ) + class_member_tuning_pairs.extend(class_attributes_count_pairs) + + class_public_attributes = enumerate_array_elements( + class_attributes, attribute="attribute_name" + ) + class_attribute_names_pairs = [ + ( + f"Are there any public attributes of {class_member}?", + f"These are the public attributes of {class_member}: {class_public_attributes}.", + ), + ( + f"Can you list the public attributes of {class_member}?", + f"{class_member} has the following public attributes (not starting with '_'):" + f" {class_public_attributes}.", + ), + ( + f"What are the public attributes of {class_member}?", + f"The public attributes of {class_member} (those not starting with '_') are:" + f" {class_public_attributes}.", + ), + ( + f"I need to know the public attributes of {class_member}.", + f"Sure, the public attributes of {class_member} are: {class_public_attributes}.", + ), + ( + f"Could you tell me the public attributes of {class_member}?", + f"Of course, public attributes of {class_member} (not starting with '_') are:" + f" {class_public_attributes}.", + ), + ] + class_member_retrieval_chunks.append( + f"{class_member} has following public attributes: {class_public_attributes}" + ) + class_member_tuning_pairs.extend(class_attribute_names_pairs) + + if not (class_summary := member_type_details.class_summary): + class_summary_pairs = [ + ( + f"What does {class_member} do in short?", + f"Docstring of {class_member} lacks a summary of its objective.", + ), + ( + f"Can you briefly explain the function of {class_member}?", + f"Docstring of {class_member} doesn't provide a concise summary of its purpose.", + ), + ( + f"Could you tell me what {class_member} is used for?", + f"Unfortunately, the docstring of {class_member} doesn't contain" + " a brief description of its function.", + ), + ( + f"I'm not sure what {class_member} does. Can you clarify?", + f"The docstring of {class_member} doesn't succinctly explain its role.", + ), + ( + f"What's the purpose of {class_member}?", + f"Docstring of {class_member} doesn't have any explanation of its objective.", + ), + ] + class_member_retrieval_chunks.append( + f"Unfortunately, {class_member} does not document its objective." + ) + class_member_tuning_pairs.extend(class_summary_pairs) + else: + class_summary_pairs = [ + ( + f"What does {class_member} do in short?", + f"Based on documentation, objective of {class_member} is to: '{class_summary}'.", + ), + ( + f"Can you briefly explain the function of {class_member}?", + f"Sure, according to the documentation, {class_member} is designed to:" + f" '{class_summary}'.", + ), + ( + f"I'm curious about {class_member}, what's its purpose?", + f"Well, as per the documentation, {class_member} aims to: '{class_summary}'.", + ), + ( + f"Could you give me a quick rundown on what {class_member} does?", + f"Absolutely, the documentation states that the role of {class_member} is to:" + f" '{class_summary}'.", + ), + ( + f"What's the role of {class_member} in a nutshell?", + f"The documentation indicates that the purpose of {class_member} is to:" + f" '{class_summary}'.", + ), + ( + f"Can you summarise the function of {class_member}?", + f"Of course, the documentation outlines that {class_member} is intended to:" + f" '{class_summary}'.", + ), + ] + class_member_retrieval_chunks.append( + f"{class_member} documents its purpose as follows: '{class_summary}'." + ) + class_member_tuning_pairs.extend(class_summary_pairs) + + if not (class_notes := member_type_details.class_notes): + class_notes_pairs = [ + ( + f"Mention any specific details for {class_member} to be aware of.", + f"Docstring of {class_member} does not note on specific details.", + ), + ( + f"What are the specific details to be aware of for {class_member}?", + f"There are no specific details noted in the docstring of {class_member}.", + ), + ( + f"Could you tell me any specifics for {class_member} that I should be aware of?", + f"The docstring of {class_member} doesn't highlight any details.", + ), + ( + f"Are there any specific details for {class_member} that I need to know?", + f"No specific details are mentioned in the docstring of {class_member}.", + ), + ( + f"I need to know the specific details for {class_member}. Can you provide them?", + f"Unfortunately, the docstring of {class_member} does not contain any details.", + ), + ( + f"Can you specify any details for {class_member} that I should be aware of?", + f"The docstring of {class_member} does not specify any details to be aware of.", + ), + ] + class_member_retrieval_chunks.append( + f"Docstring of {class_member} has contains no specific implementation details." + ) + class_member_tuning_pairs.extend(class_notes_pairs) + else: + class_notes_pairs = [ + ( + f"Mention any specific details for {class_member} to be aware of.", + f"The {class_member} docstring highlights the following: '{class_notes}'.", + ), + ( + f"What are specifics that I should be aware of before using {class_member}?", + f"The details you should know to use {class_member} are highlighted in docstring:" + f" '{class_notes}'.", + ), + ( + f"Could you specify the details for {class_member} to take note of?", + f"Sure, the docstring for {class_member} specifies the following details:" + f" '{class_notes}'.", + ), + ( + f"Can you list the details for {class_member} to keep in mind?", + f"Certainly, the docstring for {class_member} lists the following details:" + f" '{class_notes}'.", + ), + ( + f"What should users of {class_member} be mindful of?", + f"The docstring for {class_member} mentions the following points to be mindful of:" + f" '{class_notes}'.", + ), + ( + f"What details does the user of {class_member} need to know?", + f"User of {class_member} needs to know the following details: '{class_notes}'.", + ), + ] + class_member_retrieval_chunks.append( + f"In docstring, {class_member} specifies the following: '{class_notes}'." + ) + class_member_tuning_pairs.extend(class_notes_pairs) + + class_member_dataset = Dataset( + retrieval_chunks=class_member_retrieval_chunks[:2], tuning_pairs=class_member_tuning_pairs + ) + + return class_member_dataset, class_member_retrieval_chunks + + +@pydantic.validate_call(validate_return=True) +def generate_function_member_dataset( # noqa: C901, PLR0912, PLR0915 + function_member: str, function_docstring: str, member_type_details: FunctionDetails +) -> tuple[Dataset, list[str]]: + function_member_retrieval_chunks: list[str] = [ + f"{function_member} is a Python function.", + f"{function_member} has following docstring: {function_docstring}.", + ] + function_member_tuning_pairs: list[tuple[str, str]] = [] + + if not (function_parameters := member_type_details.function_parameters): + function_parameters_pairs = [ + ( + f"List various parameters of {function_member}.", + f"{function_member} does not take any parameters.", + ), + ( + f"What are the parameters of {function_member}?", + f"{function_member} has no parameters.", + ), + ( + f"Could you tell me the parameters that {function_member} takes?", + f"{function_member} doesn't require any parameters.", + ), + ( + f"I need to know the parameters for {function_member}.", + f"There are no parameters for {function_member}.", + ), + ( + f"Can you list the parameters for {function_member}?", + f"Actually, {function_member} doesn't have any parameters.", + ), + ( + f"Please provide the parameters of {function_member}.", + f"Sorry, but {function_member} does not have any parameters.", + ), + ] + function_member_retrieval_chunks.append(f"{function_member} takes no parameters.") + function_member_tuning_pairs.extend(function_parameters_pairs) + else: + function_parameter_names = enumerate_array_elements( + function_parameters, attribute="parameter_details" + ) + function_parameters_pairs = [ + ( + f"List various parameters of {function_member}.", + f"Different parameters of {function_member} are as follows:" + f" {function_parameter_names}.", + ), + ( + f"What are the different parameters of {function_member}?", + f"{function_member} has the following parameters: {function_parameter_names}.", + ), + ( + f"Could you tell me the parameters of {function_member}?", + f"Sure, the parameters of {function_member} are: {function_parameter_names}.", + ), + ( + f"I need to know the parameters of {function_member}.", + f"The parameters of {function_member} are: {function_parameter_names}.", + ), + ( + f"Can you list the parameters for {function_member}?", + f"Yes, the parameters for {function_member} are: {function_parameter_names}.", + ), + ( + f"Please provide the parameters of {function_member}.", + f"Parameters of {function_member} are as follows: {function_parameter_names}.", + ), + ] + function_member_retrieval_chunks.append( + f"{function_member} takes the following parameters: {function_parameter_names}" + ) + function_member_tuning_pairs.extend(function_parameters_pairs) + + for function_parameter in function_parameters: + parameter_name = function_parameter.parameter_name + parameter = f"'{parameter_name}' argument in {function_member}" + + if (parameter_default := function_parameter.parameter_default) is inspect._empty: + function_parameter_defaults_pairs = [ + (f"Default value of {parameter}?", f"{parameter} does not have a default value."), + ( + f"What is the default value for {parameter}?", + f"The {parameter} does not come with a default value.", + ), + ( + f"Could you tell me default value of {parameter}?", + f"Sure, the {parameter} does not possess a default value.", + ), + ( + f"I'm curious about default value of {parameter}.", + f"In response to your curiosity, {parameter} is not assigned a default value.", + ), + ( + f"I'd like to know the default value of {parameter}.", + f"To answer your query, {parameter} does not hold a default value.", + ), + ( + f"Can you inform me about the default value of {parameter}?", + f"Certainly, {parameter} does not contain a default value.", + ), + ] + function_member_retrieval_chunks.append(f"{parameter} has no default value.") + function_member_tuning_pairs.extend(function_parameter_defaults_pairs) + else: + function_parameter_defaults_pairs = [ + ( + f"Default value of {parameter}?", + f"{parameter} has default value of {parameter_default}.", + ), + ( + f"What is the default value for {parameter}?", + f"The default value for {parameter} is {parameter_default}.", + ), + ( + f"Could you tell me default value of {parameter}?", + f"Sure, the default value of {parameter} is {parameter_default}.", + ), + ( + f"I would like to know the default value of {parameter}.", + f"The {parameter} has a default value of {parameter_default}.", + ), + ( + f"Can you inform me about the default value of {parameter}?", + f"Of course, the {parameter} defaults to {parameter_default}.", + ), + ( + f"I'm interested in default value of {parameter}.", + f"The default value of the {parameter} is {parameter_default}.", + ), + ] + function_member_retrieval_chunks.append( + f"{parameter} has the default value of {parameter_default}." + ) + function_member_tuning_pairs.extend(function_parameter_defaults_pairs) + + if (parameter_annotation := function_parameter.parameter_annotation) is inspect._empty: + function_parameter_types_pairs = [ + ( + f"What is type annotation of {parameter}?", + f"{parameter} does not have a type annotation.", + ), + ( + f"Can you tell me type annotation of {parameter}?", + f"The {parameter} does not have a type annotation.", + ), + ( + f"I'm curious about the type annotation of {parameter}." + " Can you provide some information?", + f"Sure, the {parameter} does not have a type annotation.", + ), + ( + f"Do you have any information on the type annotation of {parameter}?", + f"Yes, the {parameter} does not have a type annotation.", + ), + ( + f"Could you inform me about the type annotation of {parameter}?", + f"Certainly, {parameter} does not have a type annotation.", + ), + ( + f"I'd like to know the type annotation of {parameter}.", + f"The {parameter} you're asking about does not have a type annotation.", + ), + ] + function_member_retrieval_chunks.append( + f"Unfortunately, type hint for {parameter} is missing." + ) + function_member_tuning_pairs.extend(function_parameter_types_pairs) + else: + function_parameter_types_pairs = [ + ( + f"What is type annotation of {parameter}?", + f"Type annotation of {parameter} is '{parameter_annotation}'.", + ), + ( + f"Can you tell me type annotation of {parameter}?", + f"Sure, the type annotation of {parameter} is '{parameter_annotation}'.", + ), + ( + f"I'm curious about the type annotation of {parameter}. What is it?", + f"The type annotation of {parameter} is '{parameter_annotation}'.", + ), + ( + f"Do you know type annotation of {parameter}?", + f"Yes, the type annotation of {parameter} is '{parameter_annotation}'.", + ), + ( + f"Could you inform me about the type annotation of {parameter}?", + f"Of course, the type annotation of {parameter} is '{parameter_annotation}'.", + ), + ( + f"What's the type annotation for {parameter}?", + f"The type annotation for {parameter} is '{parameter_annotation}'.", + ), + ] + function_member_retrieval_chunks.append( + f"{parameter} has '{parameter_annotation}' as type annotation." + ) + function_member_tuning_pairs.extend(function_parameter_types_pairs) + + if not (parameter_summary := function_parameter.parameter_summary): + function_parameter_summary_pairs = [ + ( + f"What is {parameter} for?", + f"Docstring of {function_member} lacks a description for '{parameter_name}'.", + ), + ( + f"Can you explain the purpose of {parameter}?", + f"The docstring of {function_member} doesn't provide a description.", + ), + ( + f"I'm not sure what {parameter} does. Can you help?", + f"Unfortunately, the docstring of {function_member} doesn't include" + " a description.", + ), + ( + f"Could you clarify the role of {parameter}?", + f"The description is missing in the docstring of {function_member}.", + ), + ( + f"I'm confused about the {parameter}. What does it do?", + f"The docstring of {function_member} doesn't contain a description.", + ), + ( + f"What does {parameter} do?", + f"There's no description in the docstring of {function_member}.", + ), + ] + function_member_retrieval_chunks.append( + f"{parameter} is not documented in the docstring." + ) + function_member_tuning_pairs.extend(function_parameter_summary_pairs) + else: + function_parameter_summary_pairs = [ + ( + f"What is {parameter} for?", + f"Based on {function_member} docstring, its role is '{parameter_summary}'.", + ), + ( + f"Can you explain the role of {parameter}?", + f"Sure, according to the docstring of {function_member}," + f" '{parameter_name}' is used for '{parameter_summary}'.", + ), + ( + f"I'm curious about the {parameter}. What does it do?", + f"Well, if you look at the docstring of {function_member}, you'll see that" + f" '{parameter_name}' is responsible for '{parameter_summary}'.", + ), + ( + f"Could you tell me the purpose of {parameter}?", + f"Of course, the docstring of {function_member} indicates that" + f" '{parameter_name}' serves the purpose of '{parameter_summary}'.", + ), + ( + f"What's the function of {parameter}?", + f"As per the docstring of {function_member}, '{parameter_name}' functions as:" + f" '{parameter_summary}'.", + ), + ( + f"I'd like to know what '{parameter_name}' does in {function_member}.", + f"Sure thing, the docstring of {function_member} states that" + f" '{parameter_name}' does '{parameter_summary}'.", + ), + ] + function_member_retrieval_chunks.append( + f"In the docstring, {parameter} is described as '{parameter_summary}'." + ) + function_member_tuning_pairs.extend(function_parameter_summary_pairs) + + if ( + returns_annotation := member_type_details.function_returns.returns_annotation + ) is inspect._empty: + function_return_type_pairs = [ + ( + f"What is the return type annotation of {function_member}?", + f"{function_member} lacks a return type annotation. It may still return though.", + ), + ( + f"Can you tell me the return type annotation of {function_member}?", + f"The function {function_member} does not have a return type annotation." + " However, it may still return.", + ), + ( + f"I'm curious about return type annotation of {function_member}. What is it?", + f"Well, {function_member} doesn't have a return type annotation." + " But, it could still return.", + ), + ( + f"Do you know the return type annotation of {function_member}?", + f"Actually, {function_member} doesn't come with a return type annotation." + " It's possible that it still returns though.", + ), + ( + f"Could you inform me about the return type annotation of {function_member}?", + f"Sure, {function_member} is missing a return type annotation." + " It might still return though.", + ), + ( + f"What's the return type annotation for {function_member}?", + f"It appears that {function_member} is without a return type annotation." + " It may still have a return.", + ), + ] + function_member_retrieval_chunks.append( + f"{function_member} has no return annotation, but its return can still be non-null." + ) + function_member_tuning_pairs.extend(function_return_type_pairs) + else: + function_return_type_pairs = [ + ( + f"What is the return type annotation of {function_member}?", + f"Return type annotation for {function_member} is '{returns_annotation}'.", + ), + ( + f"Can you tell me the return type annotation of {function_member}?", + f"Sure, return type annotation for {function_member} is '{returns_annotation}'.", + ), + ( + f"I need to know the return type annotation of {function_member}.", + f"The return type annotation for {function_member} is '{returns_annotation}'.", + ), + ( + f"Do you know the return type annotation of {function_member}?", + f"Yes, return type annotation for {function_member} is '{returns_annotation}'.", + ), + ( + f"Could you inform me about the return type annotation of {function_member}?", + f"Of course, the return type for {function_member} is '{returns_annotation}'.", + ), + ( + f"I'm curious about the return type annotation of {function_member}.", + f"The return type annotation for {function_member} is '{returns_annotation}'.", + ), + ] + function_member_retrieval_chunks.append( + f"Return of {function_member} is annotated as '{returns_annotation}'." + ) + function_member_tuning_pairs.extend(function_return_type_pairs) + + if not (returns_summary := member_type_details.function_returns.returns_summary): + function_return_summary_pairs = [ + ( + f"What does {function_member} return?", + f"Docstring of {function_member} does not describe its return.", + ), + ( + f"Can you tell me what {function_member} returns?", + f"Docstring of {function_member} doesn't provide information about its return.", + ), + ( + f"Do you know the return of {function_member}?", + f"Unfortunately, docstring of {function_member} doesn't specify what it returns.", + ), + ( + f"I'm curious about what {function_member} returns. Can you help?", + f"I'm sorry, but the docstring of {function_member} doesn't clarify its return.", + ), + ( + f"What's the return of {function_member}?", + f"The return of {function_member} is not described in its docstring.", + ), + ( + f"Could you inform me about the return of {function_member}?", + f"Regrettably, the docstring of {function_member} doesn't detail its return.", + ), + ] + function_member_retrieval_chunks.append(f"{function_member} does not document its return.") + function_member_tuning_pairs.extend(function_return_summary_pairs) + else: + function_return_summary_pairs = [ + ( + f"What does {function_member} return?", + f"Based on {function_member} docstring, the return contains: '{returns_summary}'.", + ), + ( + f"Can you tell me what {function_member} returns?", + f"Sure, as per docstring of {function_member}, it returns: '{returns_summary}'.", + ), + ( + f"I'm curious about what {function_member} returns. Can you help?", + f"Absolutely! The docstring of {function_member} indicates that it returns:" + f" '{returns_summary}'.", + ), + ( + f"Do you know what {function_member} returns?", + f"Yes, the docstring of {function_member} states that it returns:" + f" '{returns_summary}'.", + ), + ( + f"I'd like to know what {function_member} returns.", + f"Of course, the docstring of {function_member} reveals that its return contains:" + f" '{returns_summary}'.", + ), + ( + f"Could you inform me about the return of {function_member}?", + f"Certainly, the docstring of {function_member} specifies that it returns:" + f" '{returns_summary}'.", + ), + ] + function_member_retrieval_chunks.append( + f"Based on docstring, return of {function_member} is as follows: '{returns_summary}'." + ) + function_member_tuning_pairs.extend(function_return_summary_pairs) + + if not (function_summary := member_type_details.function_summary): + function_summary_pairs = [ + ( + f"Summarise role of {function_member} in short.", + f"{function_member} docstring lacks a summary of its objective.", + ), + ( + f"Can you briefly explain the role of {function_member}?", + f"The docstring of {function_member} doesn't provide its purpose.", + ), + ( + f"What is the purpose of {function_member} as per its docstring?", + f"The docstring of {function_member} doesn't clearly state its purpose.", + ), + ( + f"Could you provide a summary of objective of {function_member}?", + f"The objective of {function_member} is not summarised in its docstring.", + ), + ( + f"What does {function_member} do according to its docstring?", + f"According to its docstring, role of {function_member} is not summarised.", + ), + ] + function_member_retrieval_chunks.append(f"Documentation for {function_member} is missing.") + function_member_tuning_pairs.extend(function_summary_pairs) + else: + function_summary_pairs = [ + ( + f"Summarise role of {function_member} in short.", + f"Based on docstring, objective of {function_member} is to: '{function_summary}'.", + ), + ( + f"Can you briefly explain the role of {function_member}?", + f"Sure, according to the docstring, the purpose of {function_member} is:" + f" '{function_summary}'.", + ), + ( + f"What does {function_member} do, in a nutshell?", + f"In a nutshell, {function_member} is designed to: '{function_summary}'.", + ), + ( + f"Could you provide a short summary of role of {function_member}?", + f"Certainly, from docstring, {function_member} aims to: '{function_summary}'.", + ), + ( + f"I need a brief explanation of what {function_member} does.", + f"Of course, {function_member} is intended to: '{function_summary}'.", + ), + ( + f"In brief, what is the role of {function_member}?", + f"Briefly, the role of {function_member} is to: '{function_summary}'," + " according to the docstring.", + ), + ] + function_member_retrieval_chunks.append( + f"{function_member} documents itself as follows: '{function_summary}'." + ) + function_member_tuning_pairs.extend(function_summary_pairs) + + if not (function_raises := member_type_details.function_raises): + function_raise_types_pairs = [ + ( + f"Does {function_member} raise any specific exception?", + f"Docstring of {function_member} does not mention any specific exceptions.", + ), + ( + f"Are there any specific exceptions that {function_member} raises?", + f"No specific exceptions are mentioned in the docstring of {function_member}.", + ), + ( + f"Can you tell me if {function_member} raises any specific exceptions?", + f"According to docstring, {function_member} does not raise exceptions.", + ), + ( + f"I want to know if {function_member} raises any specific exceptions." + " Can you confirm?", + f"I can confirm that docstring of {function_member} does not mention exceptions.", + ), + ( + f"Could {function_member} possibly raise any specific exceptions?", + f"The docstring of {function_member} does not indicate that" + " it raises any specific exceptions.", + ), + ( + f"Is it possible for {function_member} to raise any specific exceptions?", + f"The docstring of {function_member} does not suggest that" + " it raises any specific exceptions.", + ), + ] + function_member_retrieval_chunks.append( + f"{function_member} does not document any specific exceptions in the docstring." + ) + function_member_tuning_pairs.extend(function_raise_types_pairs) + else: + function_raise_types = enumerate_array_elements( + function_raises, attribute="raises_details" + ) + function_raise_types_pairs = [ + ( + f"Does {function_member} raise any specific exception?", + f"Based on docstring of {function_member}, it can raise the following:" + f" {function_raise_types}.", + ), + ( + f"Can you tell me if {function_member} raises any specific exceptions?", + f"Yes, according to docstring of {function_member}, it can raise these exceptions:" + f" {function_raise_types}.", + ), + ( + f"What exceptions, if any, does {function_member} raise?", + f"{function_member} can raise these exceptions as per its docstring:" + f" {function_raise_types}.", + ), + ( + f"I need to know if {function_member} throws any specific exceptions." + " Can you help?", + f"Sure, {function_member} can throw following exceptions according to docstring:" + f" {function_raise_types}.", + ), + ( + f"Could you inform me about any specific exceptions that" + f" {function_member} might raise?", + f"Certainly, the docstring of {function_member} indicates that" + f" it can raise these exceptions: {function_raise_types}.", + ), + ( + f"I'm curious about the exceptions that {function_member} might throw." + " Do you have any information?", + f"Yes, the docstring of {function_member} suggests that" + f" it can throw the following exceptions: {function_raise_types}.", + ), + ] + function_member_retrieval_chunks.append( + f"From docstring, {function_member} can raise the following: {function_raise_types}" + ) + function_member_tuning_pairs.extend(function_raise_types_pairs) + + if not (function_warns := member_type_details.function_warns): + function_warn_types_pairs = [ + ( + f"Does {function_member} throw any specific warnings?", + f"Docstring of {function_member} lacks any mention of specific warnings.", + ), + ( + f"Are there any specific warnings that {function_member} throws?", + f"There are no specific warnings mentioned in docstring of {function_member}.", + ), + ( + f"Can you tell me if {function_member} throws any specific warnings?", + f"According to the docstring of {function_member}," + " it doesn't throw any specific warnings.", + ), + ( + f"I want to know if {function_member} throws any specific warnings." + " Can you help?", + f"Sure, I checked the docstring of {function_member} and" + " found no mention of specific warnings.", + ), + ( + f"Could you check if {function_member} throws any specific warnings?", + f"I've checked the docstring of {function_member} and" + " it doesn't mention any specific warnings.", + ), + ( + f"Is it possible that {function_member} throws any specific warnings?", + f"Based on the docstring of {function_member}," + " it doesn't seem to throw any specific warnings.", + ), + ] + function_member_retrieval_chunks.append( + f"Mention of any warnings is missing in docstring of {function_member}." + ) + function_member_tuning_pairs.extend(function_warn_types_pairs) + else: + function_warn_types = enumerate_array_elements(function_warns, attribute="warns_details") + function_warn_types_pairs = [ + ( + f"Does {function_member} throw any specific warnings?", + f"Based on the docstring, {function_member} can throw the following warnings:" + f" {function_warn_types}.", + ), + ( + f"Can you tell me if {function_member} throws any specific warnings?", + f"Yes, according to docstring, {function_member} may throw these warnings:" + f" {function_warn_types}.", + ), + ( + f"I'm curious, does {function_member} generate any particular warnings?", + f"Indeed, docstring indicates that {function_member} can generate these warnings:" + f" {function_warn_types}.", + ), + ( + f"What specific warnings, if any, does {function_member} throw?", + f"{function_member} throws the following warnings as per the docstring:" + f" {function_warn_types}.", + ), + ( + f"Could {function_member} possibly throw any specific warnings?", + f"Yes, it could. Docstring of {function_member} mentions these specific warnings:" + f" {function_warn_types}.", + ), + ( + f"Are there any specific warnings that {function_member} throws?", + f"Yes, there are. The docstring for {function_member} lists following warnings:" + f" {function_warn_types}.", + ), + ] + function_member_retrieval_chunks.append( + f"{function_member} documents the following warnings: {function_warn_types}" + ) + function_member_tuning_pairs.extend(function_warn_types_pairs) + + if not (function_notes := member_type_details.function_notes): + function_notes_pairs = [ + ( + f"Is there any specific details for {function_member} to be aware of?", + f"Docstring of {function_member} lacks any notes on specific details.", + ), + ( + f"Are there any particular details I should know about {function_member}?", + f"There are no specific details noted in the docstring of {function_member}.", + ), + ( + f"What should I be aware of when using {function_member}?", + f"The docstring of {function_member} does not contain any details to be aware of.", + ), + ( + f"Could you tell me if there are any specific details for {function_member}?", + f"No specific details are mentioned in the docstring of {function_member}.", + ), + ( + f"I'm curious if there are any specific details about {function_member}?", + f"The docstring of {function_member} does not provide any specific details.", + ), + ( + f"Do I need to be aware of any specific details for {function_member}?", + f"The docstring of {function_member} does not include any specific details.", + ), + ] + function_member_retrieval_chunks.append( + f"{function_member} has no specific notes in the docstring." + ) + function_member_tuning_pairs.extend(function_notes_pairs) + else: + function_notes_pairs = [ + ( + f"Is there any specific details for {function_member} to be aware of?", + f"Docstring of {function_member} highlights the following: '{function_notes}'.", + ), + ( + f"What should I know about {function_member}?", + "You should be aware that docstring includes the following details:" + f" '{function_notes}'.", + ), + ( + f"Could you provide some details about {function_member}?", + f"Sure, the docstring of {function_member} provides the following information:" + f" '{function_notes}'.", + ), + ( + f"What are the important details of {function_member}?", + f"The important details of {function_member} are highlighted in its docstring:" + f" '{function_notes}'.", + ), + ( + f"Can you tell me more about {function_member}?", + f"Of course, the docstring of {function_member} contains the following details:" + f" '{function_notes}'.", + ), + ( + f"I need information about {function_member}.", + f"The docstring of {function_member} contains the following information:" + f" '{function_notes}'.", + ), + ] + function_member_retrieval_chunks.append( + f"Docstring for {function_member} has following notes: '{function_notes}'." + ) + function_member_tuning_pairs.extend(function_notes_pairs) + + if not (function_references := member_type_details.function_references): + function_references_pairs = [ + ( + f"Is there any reference for {function_member}?", + f"Documentation for {function_member} contains no references.", + ), + ( + f"Can I find any references in the documentation for {function_member}?", + f"No, the documentation for {function_member} does not contain any references.", + ), + ( + f"Does the documentation for {function_member} include any references?", + f"No, there are no references in the documentation for {function_member}.", + ), + ( + f"Are there references available in the {function_member} documentation?", + f"No, the {function_member} documentation does not include any references.", + ), + ( + f"I'm looking for references in {function_member} documentation. Are there any?", + f"Unfortunately, the documentation for {function_member} contains no references.", + ), + ( + f"Could you tell me if there are any references for {function_member}?", + f"I'm sorry, but documentation for {function_member} lacks any references.", + ), + ] + function_member_retrieval_chunks.append( + f"{function_member} documents no references in its docstring." + ) + function_member_tuning_pairs.extend(function_references_pairs) + else: + function_references_pairs = [ + ( + f"Is there any reference for {function_member}?", + f"The docstring links the following: '{function_references}'.", + ), + ( + f"Can you provide a reference for {function_member}?", + f"Sure, the docstring provides the following reference: '{function_references}'.", + ), + ( + f"Where can I find a reference for {function_member}?", + f"You can find it in the docstring, which links to: '{function_references}'.", + ), + ( + f"Could you point me to the reference for {function_member}?", + f"Of course, the docstring points to these reference: '{function_references}'.", + ), + ( + f"I'm looking for a reference for {function_member}. Can you help?", + f"Absolutely, the docstring links to this reference: '{function_references}'.", + ), + ( + f"What's the reference for {function_member}?", + f"The reference for that is in the docstring: '{function_references}'.", + ), + ] + function_member_retrieval_chunks.append( + f"{function_member} list the following references: {function_references}" + ) + function_member_tuning_pairs.extend(function_references_pairs) + + if not (function_examples := member_type_details.function_examples): + function_examples_pairs = [ + ( + f"Is there any example for {function_member}?", + f"Docstring for {function_member} lacks any examples.", + ), + ( + f"Can I find an example for {function_member} in the docstring?", + f"Unfortunately, docstring for {function_member} does not contain any examples.", + ), + ( + f"Does the docstring for {function_member} include any examples?", + f"No, the docstring for {function_member} does not include any examples.", + ), + ( + f"I'm looking for an example of {function_member} in docstring, is there one?", + f"I'm sorry, but docstring for {function_member} does not provide any examples.", + ), + ( + f"Are there any examples provided in the docstring for {function_member}?", + f"No examples are provided in the docstring for {function_member}.", + ), + ( + f"Could you tell me if there's an example for {function_member} in docstring?", + f"I regret to inform you that {function_member} documents no examples.", + ), + ] + function_member_retrieval_chunks.append( + f"Documentation of {function_member} lacks any examples." + ) + function_member_tuning_pairs.extend(function_examples_pairs) + else: + function_examples_pairs = [ + ( + f"Is there any example for {function_member}?", + f"Documentation of {function_member} contains these examples:" + f" '{function_examples}'.", + ), + ( + f"Can you provide an example of {function_member}?", + f"Sure, you can find examples of {function_member} in its documentation:" + f" '{function_examples}'.", + ), + ( + f"I'm looking for examples of {function_member}, can you help?", + f"Absolutely, examples for {function_member} are available in its documentation:" + f" '{function_examples}'.", + ), + ( + f"Where can I find examples for {function_member}?", + f"You can find examples for {function_member} in its documentation:" + f" '{function_examples}'.", + ), + ( + f"Could you show me some examples of {function_member}?", + f"Of course, the documentation of {function_member} includes these examples:" + f" '{function_examples}'.", + ), + ( + f"I need examples for {function_member}, where can I find them?", + f"You can find examples for {function_member} in its documentation:" + f" '{function_examples}'.", + ), + ] + function_member_retrieval_chunks.append( + f"Docstring of {function_member} contains following examples: '{function_examples}'." + ) + function_member_tuning_pairs.extend(function_examples_pairs) + + function_member_dataset = Dataset( + retrieval_chunks=function_member_retrieval_chunks[:2], + tuning_pairs=function_member_tuning_pairs, + ) + + return function_member_dataset, function_member_retrieval_chunks + + +@pydantic.validate_call(validate_return=True) +def generate_member_dataset(member_details: MemberDetails) -> tuple[Dataset, ...]: + member_name = member_details.member_name + member_full_name = member_details.member_qualified_name + member = f"'{member_name}' object" + + member_retrieval_chunks: list[str] = [] + member_tuning_pairs: list[tuple[str, str]] = [] + + module_parent_pairs = [ + ( + f"What is the parent module of {member}?", + f"'{member_details.member_module}' is the name of its parent module.", + ), + ( + f"Can you tell me the parent module of {member}?", + f"Sure, the parent module of {member} is '{member_details.member_module}'.", + ), + ( + f"I'm trying to find the parent module of {member}, can you help?", + f"Of course, parent module of {member} is '{member_details.member_module}'.", + ), + ( + f"Do you know the parent module of {member}?", + f"Yes, the parent module of {member} is '{member_details.member_module}'.", + ), + ( + f"I need to know the parent module of {member}, can you provide that?", + f"Absolutely, parent module of {member} is '{member_details.member_module}'.", + ), + ( + f"Could you inform me about the parent module of {member}?", + f"Certainly, '{member_details.member_module}' is parent module of {member}.", + ), + ] + member_retrieval_chunks.append( + f"{member} is part of parent module {member_details.member_module}." + ) + member_tuning_pairs.extend(module_parent_pairs) + + member_full_name_pairs = [ + ( + f"What is the full name of {member}?", + f"'{member_full_name}' is its fully qualified name.", + ), + ( + f"Can you tell me the full name of the {member}?", + f"Sure, the fully qualified name of {member} is '{member_full_name}'.", + ), + ( + f"I need to know the full name of {member}. Can you help?", + f"Of course, the full name of {member} is '{member_full_name}'.", + ), + ( + f"What's the fully qualified name for the {member}?", + f"The fully qualified name for {member} is '{member_full_name}'.", + ), + ( + f"Could you provide the full name of the {member}?", + f"Certainly, the full name of the {member} is '{member_full_name}'.", + ), + ( + f"I'm looking for the full name of {member}. What is it?", + f"The full name of {member} is '{member_full_name}'.", + ), + ] + member_retrieval_chunks.append(f"Full name of {member} is '{member_full_name}'.") + member_tuning_pairs.extend(member_full_name_pairs) + + member_hierarchy = enumerate_array_elements(member_details.member_hierarchy) + member_hierarchy_pairs = [ + ( + f"What is the hierarchy of {member}?", + f"The hierarchy of {member} is as follows: {member_hierarchy}.", + ), + ( + f"Can you explain the hierarchy of the {member}?", + f"Sure, the hierarchy of the {member} is: {member_hierarchy}.", + ), + ( + f"Could you tell me the hierarchy of {member}?", + f"Of course, the hierarchy of {member} is: {member_hierarchy}.", + ), + ( + f"I would like to know the hierarchy of {member}. Can you provide that?", + f"Absolutely, the hierarchy of {member} is: {member_hierarchy}.", + ), + ( + f"Please provide the hierarchy of {member}.", + f"The hierarchy of {member} is: {member_hierarchy}.", + ), + ( + f"I'm interested in the hierarchy of {member}. Could you share it?", + f"Sure, the hierarchy of {member} is: {member_hierarchy}.", + ), + ] + member_retrieval_chunks.append(f"Hierarchy of {member} is as follows: {member_hierarchy}.") + member_tuning_pairs.extend(member_hierarchy_pairs) + + if not (member_docstring := member_details.member_docstring): + member_documentation_pairs = [ + ( + f"What is the documentation of {member}?", + f"{member} does not have any documentation.", + ), + ( + f"Can you provide the documentation for the {member}?", + f"Sorry, the {member} does not have any documentation.", + ), + ( + f"Is there any documentation available for the {member}?", + f"No, there is no documentation available for the {member}.", + ), + ( + f"Could you show me the documentation of the {member}?", + f"Unfortunately, the {member} does not have any documentation.", + ), + ( + f"I'm looking for the documentation of {member}. Can you help?", + f"I'm sorry, but the {member} does not have any documentation.", + ), + ] + member_retrieval_chunks.append( + f"Unfortunately, {member} currently does not have any documentation." + ) + member_tuning_pairs.extend(member_documentation_pairs) + else: + member_documentation_pairs = [ + (f"What does {member} do?", f"Its documentation is as follows: '{member_docstring}'."), + ( + f"Can you explain the function of the {member}?", + f"Sure, here is its documentation: '{member_docstring}'.", + ), + ( + f"I'm not sure what {member} does. Can you clarify?", + f"Of course, here's its documentation for clarification: '{member_docstring}'.", + ), + ( + f"Could you tell me about the {member}?", + f"Certainly, its documentation is: '{member_docstring}'.", + ), + ( + f"I need information on the {member}.", + f"Here's the documentation you need: '{member_docstring}'.", + ), + ( + f"What's the purpose of the {member}?", + f"The purpose is described in its documentation: '{member_docstring}'.", + ), + ] + member_retrieval_chunks.append( + f"The following is the documentation of {member}: '{member_docstring}'." + ) + member_tuning_pairs.extend(member_documentation_pairs) + + if (member_type_details := member_details.member_type_details) is not None: + member_type = member_type_details.member_type + + member_type_pairs = [ + (f"What is the type of {member}?", f"{member} is of '{member_type.value}' type."), + ( + f"Can you tell me the type of the {member}?", + f"Sure, the {member} is of '{member_type.value}' type.", + ), + ( + f"I would like to know the type of {member}. Can you help?", + f"Absolutely, the {member} is of '{member_type.value}' type.", + ), + ( + f"Do you know the type of {member}?", + f"Yes, the {member} is of '{member_type.value}' type.", + ), + ( + f"Could you inform me about the type of {member}?", + f"Of course, the {member} is of '{member_type.value}' type.", + ), + ( + f"I'm curious about type of {member}. Can you provide some information?", + f"Certainly, the {member} is of '{member_type.value}' type.", + ), + ] + member_retrieval_chunks.insert(-1, f"'{member_name}' is a Python {member_type.value}.") + member_tuning_pairs.extend(member_type_pairs) + + if member_type_details is None: + member_retrieval_chunks.insert(0, f"'{member_name}' is a Python object.") + + member_dataset = Dataset( + retrieval_chunks=member_retrieval_chunks, tuning_pairs=member_tuning_pairs + ) + + return (member_dataset,) + + match member_type: + case MemberType.ENUM: + member_type_dataset, member_type_retrieval_chunks = generate_enum_member_dataset( + f"'{member_name}' enum", member_docstring, member_type_details + ) + case MemberType.CLASS: + member_type_dataset, member_type_retrieval_chunks = generate_class_member_dataset( + f"'{member_name}' class", member_docstring, member_type_details + ) + case MemberType.FUNCTION: + member_type_dataset, member_type_retrieval_chunks = generate_function_member_dataset( + f"'{member_name}' function", member_docstring, member_type_details + ) + case _: + LOGGER.critical(f"Received unsupported {member_type=}") + + raise ValueError("Unexpected member type: supports 'enum', 'class', 'function'") + + member_dataset = Dataset( + retrieval_chunks=member_retrieval_chunks + member_type_retrieval_chunks, + tuning_pairs=member_tuning_pairs, + ) + + return (member_dataset, member_type_dataset) + + +__all__ = [ + "enumerate_array_elements", + "generate_class_member_dataset", + "generate_enum_member_dataset", + "generate_function_member_dataset", + "generate_member_dataset", + "generate_module_dataset", + "generate_package_dataset", +] diff --git a/src/generative_ai/dataset_generation/utils_generation.py b/src/generative_ai/dataset_generation/utils_generation.py new file mode 100644 index 0000000..3f2d662 --- /dev/null +++ b/src/generative_ai/dataset_generation/utils_generation.py @@ -0,0 +1,211 @@ +import enum +import functools +import typing + +import pydantic + + +class Package(pydantic.BaseModel): + package_name: str + package_qualified_name: str + package_hierarchy: list[str] + parent_package_name: str | None + children_sub_packages_names: list[str] + children_modules_names: list[str] + package_summary: str | None = None + package_all_exports: list[str] | None = None + + +class ModuleMember(pydantic.BaseModel): + member_name: str + member_object: typing.Any + + +class Module(pydantic.BaseModel): + module_name: str + module_qualified_name: str + module_hierarchy: list[str] + package_name: str + module_members: list[ModuleMember] + module_summary: str | None = None + module_all_exports: list[str] | None = None + + +class MemberType(str, enum.Enum): + ENUM = "enum" + CLASS = "class" + FUNCTION = "function" + + +class EnumMember(pydantic.BaseModel): + enum_member_name: str + enum_member_value: typing.Any + + @pydantic.computed_field + @functools.cached_property + def enum_member(self: "EnumMember") -> str: + return f"{self.enum_member_name} (corresponding to '{self.enum_member_value}')" + + +class EnumDetails(pydantic.BaseModel): + member_type: typing.Literal[MemberType.ENUM] + enum_members: list[EnumMember] + + +class Parameter(pydantic.BaseModel): + parameter_name: str + parameter_default: typing.Any + parameter_annotation: typing.Any + parameter_kind: str + parameter_summary: str | None = None + + @pydantic.computed_field + @functools.cached_property + def parameter_details(self: "Parameter") -> str: + return f"'{self.parameter_name}', of type '{self.parameter_kind}'" + + +class Method(pydantic.BaseModel): + method_name: str + method_parameters: list[str] + method_summary: str | None = None + + +class Attribute(pydantic.BaseModel): + attribute_name: str + + +class ClassDetails(pydantic.BaseModel): + member_type: typing.Literal[MemberType.CLASS] + class_parameters: list[Parameter] + class_methods: list[Method] + class_attributes: list[Attribute] + class_summary: str | None = None + class_notes: str | None = None + + +class Returns(pydantic.BaseModel): + returns_annotation: typing.Any + returns_summary: str | None = None + + +class Raises(pydantic.BaseModel): + raises_type: str | None = None + raises_summary: str | None = None + + @pydantic.computed_field + @functools.cached_property + def raises_details(self: "Raises") -> str: + return f"'{self.raises_type}' ('{self.raises_summary}')" + + +class Warns(pydantic.BaseModel): + warns_type: str | None = None + warns_summary: str | None = None + + @pydantic.computed_field + @functools.cached_property + def warns_details(self: "Warns") -> str: + return f"'{self.warns_type}' ('{self.warns_summary}')" + + +class FunctionDetails(pydantic.BaseModel): + member_type: typing.Literal[MemberType.FUNCTION] + function_parameters: list[Parameter] + function_returns: Returns + function_summary: str | None = None + function_raises: list[Raises] | None = None + function_warns: list[Warns] | None = None + function_notes: str | None = None + function_references: str | None = None + function_examples: str | None = None + + +class MemberDetails(pydantic.BaseModel): + member_name: str + member_qualified_name: str + member_hierarchy: list[str] + member_module: str + member_docstring: str + member_type_details: EnumDetails | ClassDetails | FunctionDetails | None = pydantic.Field( + default=None, discriminator="member_type" + ) + + +class Document(pydantic.BaseModel): + context: str + question: str + answer: str + + @pydantic.computed_field + @functools.cached_property + def instruction_with_context(self: "Document") -> str: + system_instruction = ( + "Below is a question that can be answered using the following context. " + "Write an answer for the question appropriately without using any additional data." + ) + + return " ".join( + [ + "", + f"[INST] {system_instruction} [/INST]", + f"[INST] Context: {self.context} [/INST]", + f"[INST] Question: {self.question} [/INST]", + f"[INST] Answer: {self.answer} [/INST]", + "", + ] + ) + + @pydantic.computed_field + @functools.cached_property + def instruction_without_context(self: "Document") -> str: + return f"[INST] {self.question} [/INST] {self.answer} " + + +class Dataset(pydantic.BaseModel): + retrieval_chunks: list[str] + tuning_pairs: list[tuple[str, str]] + + @pydantic.computed_field + @functools.cached_property + def tuning_documents(self: "Dataset") -> list[Document]: + return [ + Document(context=" ".join(self.retrieval_chunks), question=question, answer=answer) + for question, answer in self.tuning_pairs + ] + + +class JSONDocument(pydantic.BaseModel): + context: str + question: str + answer: str + instruction_with_context: str + instruction_without_context: str + + +class JSONDataset(pydantic.BaseModel): + retrieval_documents: list[str] + tuning_documents: list[JSONDocument] + + +__all__ = [ + "Attribute", + "ClassDetails", + "Dataset", + "Document", + "EnumDetails", + "EnumMember", + "FunctionDetails", + "JSONDataset", + "JSONDocument", + "MemberDetails", + "MemberType", + "Method", + "Module", + "ModuleMember", + "Package", + "Parameter", + "Raises", + "Returns", + "Warns", +] diff --git a/src/generative_ai/fine_tuning/step_1_tuning.ipynb b/src/generative_ai/fine_tuning/step_1_tuning.ipynb new file mode 100644 index 0000000..3397a05 --- /dev/null +++ b/src/generative_ai/fine_tuning/step_1_tuning.ipynb @@ -0,0 +1,7589 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HHEHKsv3g2l8", + "outputId": "f09b6240-b150-4eca-af1f-4d0f858e81dd", + "trusted": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu118\n", + "Collecting accelerate (from -r requirements.txt (line 1))\n", + " Downloading accelerate-0.25.0-py3-none-any.whl (265 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m265.7/265.7 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting bitsandbytes (from -r requirements.txt (line 2))\n", + " Downloading bitsandbytes-0.41.3-py3-none-any.whl (92.6 MB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m92.6/92.6 MB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting datasets (from -r requirements.txt (line 3))\n", + " Downloading datasets-2.15.0-py3-none-any.whl (521 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m521.2/521.2 kB\u001b[0m \u001b[31m52.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting peft (from -r requirements.txt (line 4))\n", + " Downloading peft-0.7.0-py3-none-any.whl (168 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m168.3/168.3 kB\u001b[0m \u001b[31m24.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 5)) (0.4.1)\n", + "Collecting torch (from -r requirements.txt (line 6))\n", + " Downloading https://download.pytorch.org/whl/cu118/torch-2.1.1%2Bcu118-cp310-cp310-linux_x86_64.whl (2325.9 MB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m2.3/2.3 GB\u001b[0m \u001b[31m539.2 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 7)) (4.35.2)\n", + "Collecting trl (from -r requirements.txt (line 8))\n", + " Downloading trl-0.7.4-py3-none-any.whl (133 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m133.9/133.9 kB\u001b[0m \u001b[31m20.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate->-r requirements.txt (line 1)) (1.23.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate->-r requirements.txt (line 1)) (23.2)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate->-r requirements.txt (line 1)) (5.9.5)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate->-r requirements.txt (line 1)) (6.0.1)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from accelerate->-r requirements.txt (line 1)) (0.19.4)\n", + "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (9.0.0)\n", + "Collecting pyarrow-hotfix (from datasets->-r requirements.txt (line 3))\n", + " Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n", + "Collecting dill<0.3.8,>=0.3.0 (from datasets->-r requirements.txt (line 3))\n", + " Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (1.5.3)\n", + "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (4.66.1)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (3.4.1)\n", + "Collecting multiprocess (from datasets->-r requirements.txt (line 3))\n", + " Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m20.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: fsspec[http]<=2023.10.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (2023.6.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (3.9.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 6)) (3.13.1)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 6)) (4.5.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 6)) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 6)) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 6)) (3.1.2)\n", + "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 6)) (2.1.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers->-r requirements.txt (line 7)) (2023.6.3)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers->-r requirements.txt (line 7)) (0.15.0)\n", + "Collecting tyro>=0.5.11 (from trl->-r requirements.txt (line 8))\n", + " Downloading tyro-0.6.0-py3-none-any.whl (100 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m100.9/100.9 kB\u001b[0m \u001b[31m16.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.txt (line 3)) (23.1.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.txt (line 3)) (6.0.4)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.txt (line 3)) (1.9.3)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.txt (line 3)) (1.4.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.txt (line 3)) (1.3.1)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.txt (line 3)) (4.0.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->-r requirements.txt (line 3)) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->-r requirements.txt (line 3)) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->-r requirements.txt (line 3)) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->-r requirements.txt (line 3)) (2023.11.17)\n", + "Collecting docstring-parser>=0.14.1 (from tyro>=0.5.11->trl->-r requirements.txt (line 8))\n", + " Downloading docstring_parser-0.15-py3-none-any.whl (36 kB)\n", + "Requirement already satisfied: rich>=11.1.0 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl->-r requirements.txt (line 8)) (13.7.0)\n", + "Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl->-r requirements.txt (line 8))\n", + " Downloading shtab-1.6.5-py3-none-any.whl (13 kB)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->-r requirements.txt (line 6)) (2.1.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->-r requirements.txt (line 3)) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->-r requirements.txt (line 3)) (2023.3.post1)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->-r requirements.txt (line 6)) (1.3.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets->-r requirements.txt (line 3)) (1.16.0)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro>=0.5.11->trl->-r requirements.txt (line 8)) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro>=0.5.11->trl->-r requirements.txt (line 8)) (2.16.1)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=11.1.0->tyro>=0.5.11->trl->-r requirements.txt (line 8)) (0.1.2)\n", + "Installing collected packages: bitsandbytes, shtab, pyarrow-hotfix, docstring-parser, dill, torch, multiprocess, tyro, accelerate, datasets, trl, peft\n", + " Attempting uninstall: torch\n", + " Found existing installation: torch 2.1.0+cu118\n", + " Uninstalling torch-2.1.0+cu118:\n", + " Successfully uninstalled torch-2.1.0+cu118\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "torchaudio 2.1.0+cu118 requires torch==2.1.0, but you have torch 2.1.1+cu118 which is incompatible.\n", + "torchdata 0.7.0 requires torch==2.1.0, but you have torch 2.1.1+cu118 which is incompatible.\n", + "torchtext 0.16.0 requires torch==2.1.0, but you have torch 2.1.1+cu118 which is incompatible.\n", + "torchvision 0.16.0+cu118 requires torch==2.1.0, but you have torch 2.1.1+cu118 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed accelerate-0.25.0 bitsandbytes-0.41.3 datasets-2.15.0 dill-0.3.7 docstring-parser-0.15 multiprocess-0.70.15 peft-0.7.0 pyarrow-hotfix-0.6 shtab-1.6.5 torch-2.1.1+cu118 trl-0.7.4 tyro-0.6.0\n" + ] + } + ], + "source": [ + "!python3 -m pip install \\\n", + " --upgrade \\\n", + " --requirement requirements.txt \\\n", + " --constraint constraints.txt \\\n", + " --extra-index-url https://download.pytorch.org/whl/cu118" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_x4RfffVg8Nw", + "outputId": "b0022811-1c7d-413c-9eb0-e6830f672c51", + "trusted": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/trl/trainer/ppo_config.py:141: UserWarning: The `optimize_cuda_cache` arguement will be deprecated soon, please use `optimize_device_cache` instead.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "import gc\n", + "import json\n", + "import pathlib\n", + "import shutil\n", + "\n", + "import datasets\n", + "import peft\n", + "import torch\n", + "import transformers\n", + "import trl" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "rNebA6JYg8Lo", + "trusted": true + }, + "outputs": [], + "source": [ + "raw_dataset_path = pathlib.Path(\"json_documents.json\")\n", + "base_model_identifier = \"HuggingFaceH4/zephyr-7b-beta\"\n", + "tuning_checkpoint_directory = pathlib.Path(\"zephyr_tuning_checkpoints_directory\")\n", + "tuned_adapter_directory = pathlib.Path(\"tuned_zephyr_adapter_directory\")\n", + "tuned_adapter_archive = \"tuned_zephyr_adaptr_archive\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "T2ksh4jcg8Jj", + "trusted": true + }, + "outputs": [], + "source": [ + "with raw_dataset_path.open(encoding=\"utf-8\") as file_object:\n", + " raw_dataset = json.load(file_object)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "CMkyFv9og8HX", + "trusted": true + }, + "outputs": [], + "source": [ + "hugging_face_dataset = datasets.Dataset.from_list(raw_dataset[\"tuning_documents\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "b3pjFXj6g8Fb", + "trusted": true + }, + "outputs": [], + "source": [ + "quantisation_configuration = transformers.BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_compute_dtype=\"float16\",\n", + " bnb_4bit_quant_type=\"nf4\",\n", + " bnb_4bit_use_double_quant=False,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 433, + "referenced_widgets": [ + "07df370aa2c243e285d2fc79a85c7fde", + "73699ca649c64464b238ff37d07915de", + "6dae7824e19042cdb52f9fff4655b2f4", + "9f44f076d9814ec0b5d00462f68db4b1", + "176b326bab79416296b128b64a74dffa", + "b5d6ffe8087e4241bfd9ce456dd7dc98", + "80f588df8be74093aa2220e2ec56b8ac", + "ac5b0d2c6b3e4f01971c0d88fda1ea00", + "6f382477db344113874777c653befe95", + "1c9d1a1a20e7428e8132cc0e12e1d503", + "c8d699e0eb154de88591c21570e1ab2c", + "9420b28ead574ec0818ad38d673a4a22", + "181e18b7222a4eaa901585883c60d29c", + "cb2bd1a572fc4bfb8b1836ae91e316c6", + "fb841e8cd6cf42658bd2a4278ff5fd77", + "07bedd16874a4f5687394c4f936261e2", + "2951f44f15c24bfdba4ef2a85dc778f9", + "b787e05c120947ce98f98641b8f6c45c", + "4145a60168564eaeb658809ad6b7aa4e", + "389adb8bfbbd40a4977ba41b02fc1739", + "ee78a285765d4530b208759a100ab35b", + "cff8de1e75e5447e908fc49aac16f017", + "0ab5311fbafb41fbb1a734b95765e00a", + "456d154a13d349caaacbca90447cc4a8", + "2c3e05971c9b4534bf597735e3b3355e", + "cde82cb73bc84263a4b9b786f5f777f6", + "66e8bd5cec7d45bc853eb4a3ae281de4", + "919ff6a0fe2c475eab19f87cc0970cfc", + "03749a64798b469bb0adab1cb618db67", + "4a2a2c053de048129bbdb20bb2977b9b", + "3bbcaf28992f4117a7438a6e2ed66e8d", + "a43fc313187640308b924d4248b45a38", + "f196b24df94440a0974fe2545ec5d404", + "a6e30cba8ec74f4a90c08a42477be348", + "823ff0863e6f4f51bc4e375111503d0f", + "25e20296adf540198ec4771b1152bb8d", + "c33b4b12128d4cddbf36301090e7ee1c", + "5c7b7311020a49c48e4d50972958c15b", + "646981baa186423394f94417ffb548c4", + "e3f627d0f035427abe93035ca0e067db", + "209cc282d5d74e5493cb4c0e59324759", + "d11e8b4a270b44f9bae001984b5adad7", + "67d743a7bc7c4b64ad6d3e42485d195a", + "a025d05c8114473a8f0ee38ec2d880d9", + "3917a2598c5c42bd9681a005338f34d7", + "127d9fab6a6943febce137004b078ca3", + "e828c38645c941edaa4f1538f16f8ad6", + "c028548c75fb4b24b8222a92e521d551", + "7c0d798503ed414a99888deef2bae663", + "f5655604a6124cb8affe4e578ef3a9d0", + "0fe4ad29e1ec407abda1439318fd1e49", + "2c6c89f1df7e43c088d4da49d54c3492", + "d347864fa320455f9d65183b900b1ebe", + "4b1c76e2b72c4d2c9fd6fd86ac73cbf0", + "2b6aa6918b1c46108dfae2306128d394", + "48b72b62f2284c3d81a76a223ad00b1a", + "f172e4ca01eb4214a5a86afaa3f630d0", + "b06327d175d44b6bab631b8f88619d49", + "68b54843defa4fe8b633449749b6cdde", + "ace42011a97546a7bfd44633636db3fa", + "74ff0965efee4d07bd7d8354820bf3e0", + "4a4d7d40743c4dc1ab34c6164348e6c5", + "d9496c940e9d482ba3c0381901d39040", + "800152f817dc4fb6ba532af84822af52", + "a62bebfe702d4b4cb2c036620672c5cc", + "4f75f65c676b4e218f85cad935752aba", + "fd30b1b0665e4029a67b79eff69bddad", + "1d74948585a949be9e43f4eab670a5e3", + "1349cc1f194c4059a01d766e92e1f496", + "a04ff0580a844e218a1c324f958007f0", + "c50e954dcae84a5aae06a69701808f8e", + "61722cf185fc4d2b8df5f3e721ec4e96", + "2068f3ca95e141d08f6ccf80344d0eb7", + "dc501cc569f24be4909d460c2982f601", + "07e2b43d832f41f8bd2f31b2c7ab1db9", + "e0e653ebaea14dbdb058ca323db1bc48", + "2463289661c8473bb279d40a067fb1d3", + "e52212e786544c91b390c358c8c0b1a9", + "1017bddf6aa2491a934f843762e4d769", + "ae8a13b81b9542828834f6f91dc7881b", + "b083574c95e247b597381725b4500d5c", + "d7fd1545ebf24ea7be7c4d3580468ca1", + "23af875260eb4f6e9cfa58924f9e0f84", + "070196bfff7a40e1955542b07e928e61", + "5d244b03d8fe423ebc86055fa075de54", + "d119089cb25c47ad8f2819a28bbb8bca", + "03418c74c9b343a7a15df8e6a5c06037", + "af54328406854e4e891dfd04beef9806", + "c644623691084e0b881144652c0f31ec", + "44672c499e0d48d6a4ce0e088770e625", + "ec885745589f486891fe25deeb8c9312", + "46416efdf6c54664af4247147a1d4eb6", + "499cff098f5744baa8fe1250a94854b0", + "c4be9b1eb3a74fa29db1c1ddaa3d6b93", + "c38cc65dd83146d1aec861c4404225e2", + "e4f1915bbe5a493aadd4b20a2eeccb65", + "391bf170d0864495b9b52040d83a0c8d", + "df58cafb63c842b193687188c752e92b", + "cb289455571640b0b48c7759427de84d", + "6ae56fbadf4d488c97bc8ce0b1439786", + "e09f5d2f37fe45a4bde7a745b36b3c9c", + "4cc42006fef547c58655e09b6b2ce478", + "ddf873b21b1b41d9978d7b6b050e65d0", + "6760e13ff3c94ef89f2cff167ca82e7b", + "c539bffd517e477aa28fbfed3fdf0eaa", + "108ee1f4f0ab4d948ef7296e5dd25c7c", + "aa2ce018e85648cb98aa512b8f783719", + "35c52a25cb074d3eadc25ae08556bb5d", + "4b0ac607c0284a898a7dd672ba377c29", + "15bede68f8644745a75a7ad4ef31ea93", + "1250961292b44114bd23911d87f65747", + "4ab90f7385d4424c8f82e4d264845251", + "a287d589b85c49eb9f4fb6711a919d5e", + "d812f7765f2a4f79ade8b695b622364a", + "fb465bc9def94e00aedc6c2df69fe30b", + "aed95fb1f3984cf897e5b303e909ecbe", + "4915287dbe5b4a4a856c65dbf51c1229", + "9804c0ca28bd4617a5ef9494794d2520", + "9346ec32d8e0421f9120c4368553f570", + "63cac8d6dc7f4715b46ae5ba74c2e397", + "a6831777a9a341d7ad73799cfb53d785", + "a0051fa0a8584767bc36b0afd13b4386", + "0482f2abec544cecb4e93071ac70c802", + "ee4492e67b994a4db203c8be36118bc1", + "7d441115bec446ae9adb907d21ef75f4", + "a8127adfdead454d8d840cf8853b870f", + "c2bc67eecbd94864903b4eb2b5cb576b", + "0e90e19e3337489995a25f15e05e5393", + "19f56057ee7642d1ac110a972c81c4cb", + "3dfe38aaefe848df9fcc117def07c9f1", + "f7249bbc1e9b4641aecb3796cce19dd5", + "8e1f5a6b8a21451e937ef590772aec68", + "927fb91a51214d5fa8df5191f5c9d396", + "353fead6d5494fe4a5d63a6d7ad3f14c", + "52d66e638fd743d5a8c8a0f84ff8e819", + "924d96d4d3524fd8a4ebffc4d1adde33", + "ce4b4ee3ae534ab59e58b62e582a74a5", + "7ee4fe49c59e439c95518943c4da73a4", + "042f95f828334688afa4386d61dfb752", + "013b29ba4baa45789963aded99aa72d7", + "f6de121d8ace4fdf89c1ced1bdf41047", + "8ba77dc2c6ae4babbb8a37e2b7ec0d87", + "3ff7083836c340928dda9d09a4fc2ac8" + ] + }, + "id": "LkkX2R07g8DF", + "outputId": "d388644a-4ae8-4ed3-fe51-61c94b034713", + "trusted": true + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "07df370aa2c243e285d2fc79a85c7fde", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "config.json: 0%| | 0.00/638 [00:00\n", + " \n", + " \n", + " [7135/7135 1:15:02, Epoch 5/5]\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StepTraining Loss
5000.985400
10000.780300
15000.822400
20000.814500
25000.760100
30000.752200
35000.582600
40000.522300
45000.429100
50000.359600
55000.314100
60000.251300
65000.242300
70000.222400

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "TrainOutput(global_step=7135, training_loss=0.5535710768509179, metrics={'train_runtime': 4504.095, 'train_samples_per_second': 3.168, 'train_steps_per_second': 1.584, 'total_flos': 4.15361972379648e+16, 'train_loss': 0.5535710768509179, 'epoch': 5.0})" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "supervised_trainer.train()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "nBGVzOt7p2V4", + "trusted": true + }, + "outputs": [], + "source": [ + "supervised_trainer.model.save_pretrained(tuned_adapter_directory, safe_serialization=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "K9GKLM36p2Tt", + "outputId": "8da9dc19-939f-4a9a-faf1-55a9b4154e11", + "trusted": true + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'/content/tuned_zephyr_adaptr_archive.zip'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "shutil.make_archive(tuned_adapter_archive, \"zip\", tuned_adapter_directory)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "3S1Osnrwwy7t", + "trusted": true + }, + "outputs": [], + "source": [ + "del model\n", + "del tokeniser\n", + "del peft_model\n", + "del supervised_trainer" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MO4uSxQsw0Pz", + "outputId": "40d1a26d-a920-4843-c0d8-4878bbb7b161", + "trusted": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "35" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gc.collect()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "id": "nr11sY5ew2U0", + "trusted": true + }, + "outputs": [], + "source": [ + "torch.cuda.empty_cache()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kaggle": { + "accelerator": "nvidiaTeslaT4", + "dataSources": [ + { + "datasetId": 4141328, + "sourceId": 7168609, + "sourceType": "datasetVersion" + }, + { + "modelInstanceId": 3899, + "sourceId": 5111, + "sourceType": "modelInstanceVersion" + } + ], + "dockerImageVersionId": 30616, + "isGpuEnabled": true, + "isInternetEnabled": true, + "language": "python", + "sourceType": "notebook" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "00dc4e0838b7405a9717ae1eaba4f742": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "013b29ba4baa45789963aded99aa72d7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "015725f756ac43bc892bc3a7046ca372": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7613fa817cbb440fa29ca93f8ae47a90", + "placeholder": "\u200b", + "style": "IPY_MODEL_1b11c05f35cc455eafd1aabfa86d46b1", + "value": "tokenizer.model: 100%" + } + }, + "0200690821004c53b8a823d6a80be6d2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_218e487e641845fab0f156731f3f27de", + "IPY_MODEL_06282dd869984966b6b63596c0277f5f", + "IPY_MODEL_537fc9864935444080a1471c3306ffa9" + ], + "layout": "IPY_MODEL_08142a29c33a424999e895e211d4cffe" + } + }, + "02cb78b27b484ad6966182619e29fd54": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a7164b6aed6a4543b06a30c9d6ff2723", + "placeholder": "\u200b", + "style": "IPY_MODEL_9f25f3443c3245239d046250bf76317a", + "value": "tokenizer_config.json: 100%" + } + }, + "02cc17e2fc1546e8887b696b1155cc4e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "03418c74c9b343a7a15df8e6a5c06037": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "03749a64798b469bb0adab1cb618db67": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "042f95f828334688afa4386d61dfb752": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0482f2abec544cecb4e93071ac70c802": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c2bc67eecbd94864903b4eb2b5cb576b", + "placeholder": "\u200b", + "style": "IPY_MODEL_0e90e19e3337489995a25f15e05e5393", + "value": "Loading checkpoint shards: 100%" + } + }, + "053fa2533d3f4ba7ae6081470f18795b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2dd8db9b09304fb891131df193c639ac", + "placeholder": "\u200b", + "style": "IPY_MODEL_997364118d5c4b57af4509cbf2398d06", + "value": " 42.0/42.0 [00:00<00:00, 2.65kB/s]" + } + }, + "06282dd869984966b6b63596c0277f5f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6dec6a1d8ec44e978fc1ded152f68da9", + "max": 2854, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_51131685f20e4b5cab193e0661b82389", + "value": 2854 + } + }, + "070196bfff7a40e1955542b07e928e61": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "07bedd16874a4f5687394c4f936261e2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "07df370aa2c243e285d2fc79a85c7fde": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_73699ca649c64464b238ff37d07915de", + "IPY_MODEL_6dae7824e19042cdb52f9fff4655b2f4", + "IPY_MODEL_9f44f076d9814ec0b5d00462f68db4b1" + ], + "layout": "IPY_MODEL_176b326bab79416296b128b64a74dffa" + } + }, + "07e2b43d832f41f8bd2f31b2c7ab1db9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "08142a29c33a424999e895e211d4cffe": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0ab5311fbafb41fbb1a734b95765e00a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_456d154a13d349caaacbca90447cc4a8", + "IPY_MODEL_2c3e05971c9b4534bf597735e3b3355e", + "IPY_MODEL_cde82cb73bc84263a4b9b786f5f777f6" + ], + "layout": "IPY_MODEL_66e8bd5cec7d45bc853eb4a3ae281de4" + } + }, + "0e90e19e3337489995a25f15e05e5393": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0fe4ad29e1ec407abda1439318fd1e49": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1017bddf6aa2491a934f843762e4d769": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_23af875260eb4f6e9cfa58924f9e0f84", + "placeholder": "\u200b", + "style": "IPY_MODEL_070196bfff7a40e1955542b07e928e61", + "value": "model-00005-of-00008.safetensors: 100%" + } + }, + "108ee1f4f0ab4d948ef7296e5dd25c7c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1250961292b44114bd23911d87f65747": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_4ab90f7385d4424c8f82e4d264845251", + "IPY_MODEL_a287d589b85c49eb9f4fb6711a919d5e", + "IPY_MODEL_d812f7765f2a4f79ade8b695b622364a" + ], + "layout": "IPY_MODEL_fb465bc9def94e00aedc6c2df69fe30b" + } + }, + "127d9fab6a6943febce137004b078ca3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f5655604a6124cb8affe4e578ef3a9d0", + "placeholder": "\u200b", + "style": "IPY_MODEL_0fe4ad29e1ec407abda1439318fd1e49", + "value": "model-00002-of-00008.safetensors: 100%" + } + }, + "1349cc1f194c4059a01d766e92e1f496": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_dc501cc569f24be4909d460c2982f601", + "max": 1946243984, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_07e2b43d832f41f8bd2f31b2c7ab1db9", + "value": 1946243984 + } + }, + "15bede68f8644745a75a7ad4ef31ea93": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "176b326bab79416296b128b64a74dffa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "181e18b7222a4eaa901585883c60d29c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2951f44f15c24bfdba4ef2a85dc778f9", + "placeholder": "\u200b", + "style": "IPY_MODEL_b787e05c120947ce98f98641b8f6c45c", + "value": "model.safetensors.index.json: 100%" + } + }, + "19f56057ee7642d1ac110a972c81c4cb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1b11c05f35cc455eafd1aabfa86d46b1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1b4b4871db8e45b5ba018ec41466ba28": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7c6de2b186944f1ea92237bdb3d13e16", + "placeholder": "\u200b", + "style": "IPY_MODEL_971fda8bbd214dbcb23c047b2dda39e8", + "value": "tokenizer.json: 100%" + } + }, + "1c9d1a1a20e7428e8132cc0e12e1d503": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1d74948585a949be9e43f4eab670a5e3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_61722cf185fc4d2b8df5f3e721ec4e96", + "placeholder": "\u200b", + "style": "IPY_MODEL_2068f3ca95e141d08f6ccf80344d0eb7", + "value": "model-00004-of-00008.safetensors: 100%" + } + }, + "2068f3ca95e141d08f6ccf80344d0eb7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "209cc282d5d74e5493cb4c0e59324759": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "218e487e641845fab0f156731f3f27de": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_dfe26c04317046f98e348be1a8a5a3f7", + "placeholder": "\u200b", + "style": "IPY_MODEL_a0d0db553d28462e8b299f2871c28c12", + "value": "Map: 100%" + } + }, + "23af875260eb4f6e9cfa58924f9e0f84": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2463289661c8473bb279d40a067fb1d3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "25b72b95cf2745be9bfe8a047085df99": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "25e20296adf540198ec4771b1152bb8d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_209cc282d5d74e5493cb4c0e59324759", + "max": 1889587040, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d11e8b4a270b44f9bae001984b5adad7", + "value": 1889587040 + } + }, + "263044aacfbc452c9a185f356b899f88": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b3a876a45a2e4ac893fc9f311294a886", + "IPY_MODEL_a7bc6a0a0a4345cca62b0db408198d36", + "IPY_MODEL_3b6758bbf1d04599b583e80c594a73ee" + ], + "layout": "IPY_MODEL_8a80afee56684a6082bded8c6fd0cabb" + } + }, + "2951f44f15c24bfdba4ef2a85dc778f9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2b6aa6918b1c46108dfae2306128d394": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2c3e05971c9b4534bf597735e3b3355e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4a2a2c053de048129bbdb20bb2977b9b", + "max": 8, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3bbcaf28992f4117a7438a6e2ed66e8d", + "value": 8 + } + }, + "2c6c89f1df7e43c088d4da49d54c3492": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2dd8db9b09304fb891131df193c639ac": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "332be8ac435844a29dc48a66e37d7cfa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "33729e9ab63b4e6fa4cb9f537f06dede": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "346a06cd9e1840ea8362143b697bc645": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "353fead6d5494fe4a5d63a6d7ad3f14c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7ee4fe49c59e439c95518943c4da73a4", + "placeholder": "\u200b", + "style": "IPY_MODEL_042f95f828334688afa4386d61dfb752", + "value": "generation_config.json: 100%" + } + }, + "35c52a25cb074d3eadc25ae08556bb5d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3633a1ca2f0d4cd3a2df9985c8bc3caa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "389adb8bfbbd40a4977ba41b02fc1739": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3917a2598c5c42bd9681a005338f34d7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_127d9fab6a6943febce137004b078ca3", + "IPY_MODEL_e828c38645c941edaa4f1538f16f8ad6", + "IPY_MODEL_c028548c75fb4b24b8222a92e521d551" + ], + "layout": "IPY_MODEL_7c0d798503ed414a99888deef2bae663" + } + }, + "391bf170d0864495b9b52040d83a0c8d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3b6758bbf1d04599b583e80c594a73ee": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5f7c82759e8649f48d008cce3ac7ff96", + "placeholder": "\u200b", + "style": "IPY_MODEL_4fb7adf0ecb94787ad35b05149854c02", + "value": " 168/168 [00:00<00:00, 9.00kB/s]" + } + }, + "3bbcaf28992f4117a7438a6e2ed66e8d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3dfe38aaefe848df9fcc117def07c9f1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3f3ddd1da98e44729947e1f17cccff75": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3ff7083836c340928dda9d09a4fc2ac8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4145a60168564eaeb658809ad6b7aa4e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "44672c499e0d48d6a4ce0e088770e625": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c4be9b1eb3a74fa29db1c1ddaa3d6b93", + "placeholder": "\u200b", + "style": "IPY_MODEL_c38cc65dd83146d1aec861c4404225e2", + "value": "model-00006-of-00008.safetensors: 100%" + } + }, + "456d154a13d349caaacbca90447cc4a8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_919ff6a0fe2c475eab19f87cc0970cfc", + "placeholder": "\u200b", + "style": "IPY_MODEL_03749a64798b469bb0adab1cb618db67", + "value": "Downloading shards: 100%" + } + }, + "46416efdf6c54664af4247147a1d4eb6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_df58cafb63c842b193687188c752e92b", + "placeholder": "\u200b", + "style": "IPY_MODEL_cb289455571640b0b48c7759427de84d", + "value": " 1.95G/1.95G [00:13<00:00, 168MB/s]" + } + }, + "46474d2e15a44c588791b6a21479e6a5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "48a9f43772fb44c690137928a5c8b0e3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "48b72b62f2284c3d81a76a223ad00b1a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f172e4ca01eb4214a5a86afaa3f630d0", + "IPY_MODEL_b06327d175d44b6bab631b8f88619d49", + "IPY_MODEL_68b54843defa4fe8b633449749b6cdde" + ], + "layout": "IPY_MODEL_ace42011a97546a7bfd44633636db3fa" + } + }, + "4915287dbe5b4a4a856c65dbf51c1229": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "499cff098f5744baa8fe1250a94854b0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4a2a2c053de048129bbdb20bb2977b9b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4a4d7d40743c4dc1ab34c6164348e6c5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4ab90f7385d4424c8f82e4d264845251": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_aed95fb1f3984cf897e5b303e909ecbe", + "placeholder": "\u200b", + "style": "IPY_MODEL_4915287dbe5b4a4a856c65dbf51c1229", + "value": "model-00008-of-00008.safetensors: 100%" + } + }, + "4b0ac607c0284a898a7dd672ba377c29": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4b1c76e2b72c4d2c9fd6fd86ac73cbf0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4c5d58753f584144baf64cf52de93add": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4cc42006fef547c58655e09b6b2ce478": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_aa2ce018e85648cb98aa512b8f783719", + "max": 1979781448, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_35c52a25cb074d3eadc25ae08556bb5d", + "value": 1979781448 + } + }, + "4f75f65c676b4e218f85cad935752aba": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4fb7adf0ecb94787ad35b05149854c02": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "51131685f20e4b5cab193e0661b82389": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "52d66e638fd743d5a8c8a0f84ff8e819": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_013b29ba4baa45789963aded99aa72d7", + "max": 111, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_f6de121d8ace4fdf89c1ced1bdf41047", + "value": 111 + } + }, + "537fc9864935444080a1471c3306ffa9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e17571f7aa6e4d1ab9424822bd7e3c2e", + "placeholder": "\u200b", + "style": "IPY_MODEL_02cc17e2fc1546e8887b696b1155cc4e", + "value": " 2854/2854 [00:00<00:00, 3983.53 examples/s]" + } + }, + "5b7e083e4b8240859d3b0127fa42289d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5c7b7311020a49c48e4d50972958c15b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5d244b03d8fe423ebc86055fa075de54": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5f7c82759e8649f48d008cce3ac7ff96": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "61722cf185fc4d2b8df5f3e721ec4e96": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "63cac8d6dc7f4715b46ae5ba74c2e397": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "646981baa186423394f94417ffb548c4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6571f6b5b0784f06bae68a31124e2a30": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6698637e8a794ebf897fe033221018ac": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "66e8bd5cec7d45bc853eb4a3ae281de4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6760e13ff3c94ef89f2cff167ca82e7b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "67d743a7bc7c4b64ad6d3e42485d195a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "68b54843defa4fe8b633449749b6cdde": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a62bebfe702d4b4cb2c036620672c5cc", + "placeholder": "\u200b", + "style": "IPY_MODEL_4f75f65c676b4e218f85cad935752aba", + "value": " 1.98G/1.98G [00:13<00:00, 157MB/s]" + } + }, + "692ef916c3af458f8907fa28e6697796": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_73640b0a6d9d4cbcaba58bd91fdf5b80", + "max": 42, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ea1a245aab5a4d46adc7b55492899432", + "value": 42 + } + }, + "6ae56fbadf4d488c97bc8ce0b1439786": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_e09f5d2f37fe45a4bde7a745b36b3c9c", + "IPY_MODEL_4cc42006fef547c58655e09b6b2ce478", + "IPY_MODEL_ddf873b21b1b41d9978d7b6b050e65d0" + ], + "layout": "IPY_MODEL_6760e13ff3c94ef89f2cff167ca82e7b" + } + }, + "6dae7824e19042cdb52f9fff4655b2f4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ac5b0d2c6b3e4f01971c0d88fda1ea00", + "max": 638, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_6f382477db344113874777c653befe95", + "value": 638 + } + }, + "6dec6a1d8ec44e978fc1ded152f68da9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6ee0263bb70f42ca95e283d731708b94": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_48a9f43772fb44c690137928a5c8b0e3", + "max": 493443, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_25b72b95cf2745be9bfe8a047085df99", + "value": 493443 + } + }, + "6f382477db344113874777c653befe95": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "73640b0a6d9d4cbcaba58bd91fdf5b80": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "73699ca649c64464b238ff37d07915de": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b5d6ffe8087e4241bfd9ce456dd7dc98", + "placeholder": "\u200b", + "style": "IPY_MODEL_80f588df8be74093aa2220e2ec56b8ac", + "value": "config.json: 100%" + } + }, + "74ff0965efee4d07bd7d8354820bf3e0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7613fa817cbb440fa29ca93f8ae47a90": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "79592715da7f4d5b9f766d32499bbf3a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d89561c94ecf414d80a24f4aeba289cf", + "placeholder": "\u200b", + "style": "IPY_MODEL_346a06cd9e1840ea8362143b697bc645", + "value": " 493k/493k [00:00<00:00, 5.25MB/s]" + } + }, + "7c0d798503ed414a99888deef2bae663": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7c6de2b186944f1ea92237bdb3d13e16": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7d441115bec446ae9adb907d21ef75f4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f7249bbc1e9b4641aecb3796cce19dd5", + "placeholder": "\u200b", + "style": "IPY_MODEL_8e1f5a6b8a21451e937ef590772aec68", + "value": " 8/8 [01:17<00:00, 7.94s/it]" + } + }, + "7ee4fe49c59e439c95518943c4da73a4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "800152f817dc4fb6ba532af84822af52": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "80f588df8be74093aa2220e2ec56b8ac": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "823ff0863e6f4f51bc4e375111503d0f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_646981baa186423394f94417ffb548c4", + "placeholder": "\u200b", + "style": "IPY_MODEL_e3f627d0f035427abe93035ca0e067db", + "value": "model-00001-of-00008.safetensors: 100%" + } + }, + "8a80afee56684a6082bded8c6fd0cabb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8ba77dc2c6ae4babbb8a37e2b7ec0d87": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8e1f5a6b8a21451e937ef590772aec68": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "919ff6a0fe2c475eab19f87cc0970cfc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "924d96d4d3524fd8a4ebffc4d1adde33": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8ba77dc2c6ae4babbb8a37e2b7ec0d87", + "placeholder": "\u200b", + "style": "IPY_MODEL_3ff7083836c340928dda9d09a4fc2ac8", + "value": " 111/111 [00:00<00:00, 7.21kB/s]" + } + }, + "927fb91a51214d5fa8df5191f5c9d396": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_353fead6d5494fe4a5d63a6d7ad3f14c", + "IPY_MODEL_52d66e638fd743d5a8c8a0f84ff8e819", + "IPY_MODEL_924d96d4d3524fd8a4ebffc4d1adde33" + ], + "layout": "IPY_MODEL_ce4b4ee3ae534ab59e58b62e582a74a5" + } + }, + "92d95391d06c4bcc89d3517e1111dc28": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4c5d58753f584144baf64cf52de93add", + "max": 1795303, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5b7e083e4b8240859d3b0127fa42289d", + "value": 1795303 + } + }, + "9346ec32d8e0421f9120c4368553f570": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9420b28ead574ec0818ad38d673a4a22": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_181e18b7222a4eaa901585883c60d29c", + "IPY_MODEL_cb2bd1a572fc4bfb8b1836ae91e316c6", + "IPY_MODEL_fb841e8cd6cf42658bd2a4278ff5fd77" + ], + "layout": "IPY_MODEL_07bedd16874a4f5687394c4f936261e2" + } + }, + "971fda8bbd214dbcb23c047b2dda39e8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "97ddbbcc782e4af09c2c84e52b6f95fb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9a46cccba7004d5e991bffa680796f85", + "max": 1431, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b4c678188f7c479a884597aa76ebc2c2", + "value": 1431 + } + }, + "9804c0ca28bd4617a5ef9494794d2520": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "997364118d5c4b57af4509cbf2398d06": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9a46cccba7004d5e991bffa680796f85": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9f25f3443c3245239d046250bf76317a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9f44f076d9814ec0b5d00462f68db4b1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1c9d1a1a20e7428e8132cc0e12e1d503", + "placeholder": "\u200b", + "style": "IPY_MODEL_c8d699e0eb154de88591c21570e1ab2c", + "value": " 638/638 [00:00<00:00, 17.1kB/s]" + } + }, + "a0051fa0a8584767bc36b0afd13b4386": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0482f2abec544cecb4e93071ac70c802", + "IPY_MODEL_ee4492e67b994a4db203c8be36118bc1", + "IPY_MODEL_7d441115bec446ae9adb907d21ef75f4" + ], + "layout": "IPY_MODEL_a8127adfdead454d8d840cf8853b870f" + } + }, + "a025d05c8114473a8f0ee38ec2d880d9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a04ff0580a844e218a1c324f958007f0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e0e653ebaea14dbdb058ca323db1bc48", + "placeholder": "\u200b", + "style": "IPY_MODEL_2463289661c8473bb279d40a067fb1d3", + "value": " 1.95G/1.95G [00:14<00:00, 184MB/s]" + } + }, + "a0d0db553d28462e8b299f2871c28c12": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a287d589b85c49eb9f4fb6711a919d5e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9804c0ca28bd4617a5ef9494794d2520", + "max": 815834680, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9346ec32d8e0421f9120c4368553f570", + "value": 815834680 + } + }, + "a43fc313187640308b924d4248b45a38": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a62bebfe702d4b4cb2c036620672c5cc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a6831777a9a341d7ad73799cfb53d785": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a6e30cba8ec74f4a90c08a42477be348": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_823ff0863e6f4f51bc4e375111503d0f", + "IPY_MODEL_25e20296adf540198ec4771b1152bb8d", + "IPY_MODEL_c33b4b12128d4cddbf36301090e7ee1c" + ], + "layout": "IPY_MODEL_5c7b7311020a49c48e4d50972958c15b" + } + }, + "a7164b6aed6a4543b06a30c9d6ff2723": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a7565a528e4e4ddaa321e67c28e531a1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a7bc6a0a0a4345cca62b0db408198d36": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3633a1ca2f0d4cd3a2df9985c8bc3caa", + "max": 168, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a7565a528e4e4ddaa321e67c28e531a1", + "value": 168 + } + }, + "a8127adfdead454d8d840cf8853b870f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "aa2ce018e85648cb98aa512b8f783719": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ac5b0d2c6b3e4f01971c0d88fda1ea00": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ace42011a97546a7bfd44633636db3fa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ae8a13b81b9542828834f6f91dc7881b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5d244b03d8fe423ebc86055fa075de54", + "max": 1979781448, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d119089cb25c47ad8f2819a28bbb8bca", + "value": 1979781448 + } + }, + "aed95fb1f3984cf897e5b303e909ecbe": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "af54328406854e4e891dfd04beef9806": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b06327d175d44b6bab631b8f88619d49": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d9496c940e9d482ba3c0381901d39040", + "max": 1979781432, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_800152f817dc4fb6ba532af84822af52", + "value": 1979781432 + } + }, + "b083574c95e247b597381725b4500d5c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_03418c74c9b343a7a15df8e6a5c06037", + "placeholder": "\u200b", + "style": "IPY_MODEL_af54328406854e4e891dfd04beef9806", + "value": " 1.98G/1.98G [00:14<00:00, 184MB/s]" + } + }, + "b3a876a45a2e4ac893fc9f311294a886": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_df663d20a7834c21823923d859cb26dc", + "placeholder": "\u200b", + "style": "IPY_MODEL_c3dc9543ec424d57ab067e5bc9c8182b", + "value": "special_tokens_map.json: 100%" + } + }, + "b4c678188f7c479a884597aa76ebc2c2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "b5d6ffe8087e4241bfd9ce456dd7dc98": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b787e05c120947ce98f98641b8f6c45c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b7ba3c43942c442fb397422c67a43b4c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "bcef83603618409a96baf42d7db14b3d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1b4b4871db8e45b5ba018ec41466ba28", + "IPY_MODEL_92d95391d06c4bcc89d3517e1111dc28", + "IPY_MODEL_d991ee4630e042bf8e03d8a1db3ea4b8" + ], + "layout": "IPY_MODEL_6571f6b5b0784f06bae68a31124e2a30" + } + }, + "c028548c75fb4b24b8222a92e521d551": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4b1c76e2b72c4d2c9fd6fd86ac73cbf0", + "placeholder": "\u200b", + "style": "IPY_MODEL_2b6aa6918b1c46108dfae2306128d394", + "value": " 1.95G/1.95G [00:18<00:00, 81.8MB/s]" + } + }, + "c2bc67eecbd94864903b4eb2b5cb576b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c33b4b12128d4cddbf36301090e7ee1c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_67d743a7bc7c4b64ad6d3e42485d195a", + "placeholder": "\u200b", + "style": "IPY_MODEL_a025d05c8114473a8f0ee38ec2d880d9", + "value": " 1.89G/1.89G [00:10<00:00, 180MB/s]" + } + }, + "c38cc65dd83146d1aec861c4404225e2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c3dc9543ec424d57ab067e5bc9c8182b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c4be9b1eb3a74fa29db1c1ddaa3d6b93": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c50e954dcae84a5aae06a69701808f8e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c539bffd517e477aa28fbfed3fdf0eaa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c644623691084e0b881144652c0f31ec": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_44672c499e0d48d6a4ce0e088770e625", + "IPY_MODEL_ec885745589f486891fe25deeb8c9312", + "IPY_MODEL_46416efdf6c54664af4247147a1d4eb6" + ], + "layout": "IPY_MODEL_499cff098f5744baa8fe1250a94854b0" + } + }, + "c8d699e0eb154de88591c21570e1ab2c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cb289455571640b0b48c7759427de84d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cb2bd1a572fc4bfb8b1836ae91e316c6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4145a60168564eaeb658809ad6b7aa4e", + "max": 23950, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_389adb8bfbbd40a4977ba41b02fc1739", + "value": 23950 + } + }, + "cde82cb73bc84263a4b9b786f5f777f6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a43fc313187640308b924d4248b45a38", + "placeholder": "\u200b", + "style": "IPY_MODEL_f196b24df94440a0974fe2545ec5d404", + "value": " 8/8 [01:41<00:00, 10.53s/it]" + } + }, + "ce4b4ee3ae534ab59e58b62e582a74a5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cff8de1e75e5447e908fc49aac16f017": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d119089cb25c47ad8f2819a28bbb8bca": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d11e8b4a270b44f9bae001984b5adad7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d347864fa320455f9d65183b900b1ebe": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d6574b9913e449ae9cd7a072994367c0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_02cb78b27b484ad6966182619e29fd54", + "IPY_MODEL_97ddbbcc782e4af09c2c84e52b6f95fb", + "IPY_MODEL_fbbc1565972c413cbc29543bdd8900d0" + ], + "layout": "IPY_MODEL_00dc4e0838b7405a9717ae1eaba4f742" + } + }, + "d7fd1545ebf24ea7be7c4d3580468ca1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d812f7765f2a4f79ade8b695b622364a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_63cac8d6dc7f4715b46ae5ba74c2e397", + "placeholder": "\u200b", + "style": "IPY_MODEL_a6831777a9a341d7ad73799cfb53d785", + "value": " 816M/816M [00:06<00:00, 153MB/s]" + } + }, + "d89561c94ecf414d80a24f4aeba289cf": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d9496c940e9d482ba3c0381901d39040": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d991ee4630e042bf8e03d8a1db3ea4b8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6698637e8a794ebf897fe033221018ac", + "placeholder": "\u200b", + "style": "IPY_MODEL_46474d2e15a44c588791b6a21479e6a5", + "value": " 1.80M/1.80M [00:00<00:00, 18.3MB/s]" + } + }, + "dc501cc569f24be4909d460c2982f601": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ddf873b21b1b41d9978d7b6b050e65d0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4b0ac607c0284a898a7dd672ba377c29", + "placeholder": "\u200b", + "style": "IPY_MODEL_15bede68f8644745a75a7ad4ef31ea93", + "value": " 1.98G/1.98G [00:08<00:00, 241MB/s]" + } + }, + "df58cafb63c842b193687188c752e92b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "df663d20a7834c21823923d859cb26dc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dfe26c04317046f98e348be1a8a5a3f7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e09f5d2f37fe45a4bde7a745b36b3c9c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c539bffd517e477aa28fbfed3fdf0eaa", + "placeholder": "\u200b", + "style": "IPY_MODEL_108ee1f4f0ab4d948ef7296e5dd25c7c", + "value": "model-00007-of-00008.safetensors: 100%" + } + }, + "e0e653ebaea14dbdb058ca323db1bc48": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e17571f7aa6e4d1ab9424822bd7e3c2e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e3f627d0f035427abe93035ca0e067db": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e4f1915bbe5a493aadd4b20a2eeccb65": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e52212e786544c91b390c358c8c0b1a9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1017bddf6aa2491a934f843762e4d769", + "IPY_MODEL_ae8a13b81b9542828834f6f91dc7881b", + "IPY_MODEL_b083574c95e247b597381725b4500d5c" + ], + "layout": "IPY_MODEL_d7fd1545ebf24ea7be7c4d3580468ca1" + } + }, + "e7156242d7b24330a01da48362d4d187": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e828c38645c941edaa4f1538f16f8ad6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2c6c89f1df7e43c088d4da49d54c3492", + "max": 1946243936, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d347864fa320455f9d65183b900b1ebe", + "value": 1946243936 + } + }, + "ea1a245aab5a4d46adc7b55492899432": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ec885745589f486891fe25deeb8c9312": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e4f1915bbe5a493aadd4b20a2eeccb65", + "max": 1946243984, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_391bf170d0864495b9b52040d83a0c8d", + "value": 1946243984 + } + }, + "ed3dbc72f4804fb2950f6649e5ea4b75": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3f3ddd1da98e44729947e1f17cccff75", + "placeholder": "\u200b", + "style": "IPY_MODEL_f229c20327574a91bfe9fec859c128c0", + "value": "added_tokens.json: 100%" + } + }, + "ee4492e67b994a4db203c8be36118bc1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_19f56057ee7642d1ac110a972c81c4cb", + "max": 8, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3dfe38aaefe848df9fcc117def07c9f1", + "value": 8 + } + }, + "ee78a285765d4530b208759a100ab35b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f172e4ca01eb4214a5a86afaa3f630d0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_74ff0965efee4d07bd7d8354820bf3e0", + "placeholder": "\u200b", + "style": "IPY_MODEL_4a4d7d40743c4dc1ab34c6164348e6c5", + "value": "model-00003-of-00008.safetensors: 100%" + } + }, + "f196b24df94440a0974fe2545ec5d404": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f229c20327574a91bfe9fec859c128c0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f4061b6242754cf38bf27971e41c3f4c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_015725f756ac43bc892bc3a7046ca372", + "IPY_MODEL_6ee0263bb70f42ca95e283d731708b94", + "IPY_MODEL_79592715da7f4d5b9f766d32499bbf3a" + ], + "layout": "IPY_MODEL_e7156242d7b24330a01da48362d4d187" + } + }, + "f5655604a6124cb8affe4e578ef3a9d0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f6de121d8ace4fdf89c1ced1bdf41047": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f7249bbc1e9b4641aecb3796cce19dd5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fb465bc9def94e00aedc6c2df69fe30b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fb841e8cd6cf42658bd2a4278ff5fd77": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ee78a285765d4530b208759a100ab35b", + "placeholder": "\u200b", + "style": "IPY_MODEL_cff8de1e75e5447e908fc49aac16f017", + "value": " 23.9k/23.9k [00:00<00:00, 786kB/s]" + } + }, + "fbbc1565972c413cbc29543bdd8900d0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_332be8ac435844a29dc48a66e37d7cfa", + "placeholder": "\u200b", + "style": "IPY_MODEL_b7ba3c43942c442fb397422c67a43b4c", + "value": " 1.43k/1.43k [00:00<00:00, 80.7kB/s]" + } + }, + "fcb0306afa004784803ed769fda4df8c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ed3dbc72f4804fb2950f6649e5ea4b75", + "IPY_MODEL_692ef916c3af458f8907fa28e6697796", + "IPY_MODEL_053fa2533d3f4ba7ae6081470f18795b" + ], + "layout": "IPY_MODEL_33729e9ab63b4e6fa4cb9f537f06dede" + } + }, + "fd30b1b0665e4029a67b79eff69bddad": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1d74948585a949be9e43f4eab670a5e3", + "IPY_MODEL_1349cc1f194c4059a01d766e92e1f496", + "IPY_MODEL_a04ff0580a844e218a1c324f958007f0" + ], + "layout": "IPY_MODEL_c50e954dcae84a5aae06a69701808f8e" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/src/generative_ai/fine_tuning/step_2_tuning.ipynb b/src/generative_ai/fine_tuning/step_2_tuning.ipynb new file mode 100644 index 0000000..6e018a2 --- /dev/null +++ b/src/generative_ai/fine_tuning/step_2_tuning.ipynb @@ -0,0 +1,756 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HHEHKsv3g2l8", + "outputId": "48518ae9-c7de-4dcc-af5c-20dc47d0ea8b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu118\n", + "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 1)) (0.25.0)\n", + "Requirement already satisfied: bitsandbytes in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 2)) (0.41.3)\n", + "Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 3)) (2.15.0)\n", + "Requirement already satisfied: peft in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 4)) (0.7.0)\n", + "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 5)) (0.4.1)\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 6)) (2.1.1+cu118)\n", + "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 7)) (4.35.2)\n", + "Requirement already satisfied: trl in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 8)) (0.7.4)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate->-r requirements.txt (line 1)) (1.23.5)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate->-r requirements.txt (line 1)) (23.2)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate->-r requirements.txt (line 1)) (5.9.5)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate->-r requirements.txt (line 1)) (6.0.1)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from accelerate->-r requirements.txt (line 1)) (0.19.4)\n", + "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (9.0.0)\n", + "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (0.6)\n", + "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (0.3.7)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (1.5.3)\n", + "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (4.66.1)\n", + "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (3.4.1)\n", + "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (0.70.15)\n", + "Requirement already satisfied: fsspec[http]<=2023.10.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (2023.6.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.txt (line 3)) (3.9.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 6)) (3.13.1)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 6)) (4.5.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 6)) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 6)) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 6)) (3.1.2)\n", + "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.txt (line 6)) (2.1.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers->-r requirements.txt (line 7)) (2023.6.3)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers->-r requirements.txt (line 7)) (0.15.0)\n", + "Requirement already satisfied: tyro>=0.5.11 in /usr/local/lib/python3.10/dist-packages (from trl->-r requirements.txt (line 8)) (0.6.0)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.txt (line 3)) (23.1.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.txt (line 3)) (6.0.4)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.txt (line 3)) (1.9.3)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.txt (line 3)) (1.4.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.txt (line 3)) (1.3.1)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.txt (line 3)) (4.0.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->-r requirements.txt (line 3)) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->-r requirements.txt (line 3)) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->-r requirements.txt (line 3)) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->-r requirements.txt (line 3)) (2023.11.17)\n", + "Requirement already satisfied: docstring-parser>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl->-r requirements.txt (line 8)) (0.15)\n", + "Requirement already satisfied: rich>=11.1.0 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl->-r requirements.txt (line 8)) (13.7.0)\n", + "Requirement already satisfied: shtab>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl->-r requirements.txt (line 8)) (1.6.5)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->-r requirements.txt (line 6)) (2.1.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->-r requirements.txt (line 3)) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->-r requirements.txt (line 3)) (2023.3.post1)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->-r requirements.txt (line 6)) (1.3.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets->-r requirements.txt (line 3)) (1.16.0)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro>=0.5.11->trl->-r requirements.txt (line 8)) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro>=0.5.11->trl->-r requirements.txt (line 8)) (2.16.1)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=11.1.0->tyro>=0.5.11->trl->-r requirements.txt (line 8)) (0.1.2)\n" + ] + } + ], + "source": [ + "!python3 -m pip install \\\n", + " --upgrade \\\n", + " --requirement requirements.txt \\\n", + " --constraint constraints.txt \\\n", + " --extra-index-url https://download.pytorch.org/whl/cu118" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "_x4RfffVg8Nw" + }, + "outputs": [], + "source": [ + "import gc\n", + "import pathlib\n", + "import shutil\n", + "\n", + "import peft\n", + "import torch\n", + "import transformers" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "rNebA6JYg8Lo" + }, + "outputs": [], + "source": [ + "base_model_identifier = \"HuggingFaceH4/zephyr-7b-beta\"\n", + "tuned_adapter_archive = pathlib.Path(\"tuned_zephyr_adaptr_archive.zip\")\n", + "tuned_adapter_directory = pathlib.Path(\"tuned_zephyr_adapter_directory\")\n", + "\n", + "prompt = \"\"\"You are a chat assistant to help new users for a Python package.\n", + "\n", + "1. You will be provided with a specific question and a context relevant to answer that question.\n", + "2. Your response should be based solely on the given context.\n", + "3. Keep your answer concise, not exceeding five sentences.\n", + "4. If the answer is not found within the context, respond with \"I do not know.\".\n", + "5. Do not fabricate any information.\n", + "\n", + "Context: 'subtract_numbers' function documents itself as follows: 'Perform subtraction of two real numbers.'.\n", + "\n", + "Based on docstring, return of 'subtract_numbers' function is as follows: 'difference of ``first_number`` from ``second_number``'.\n", + "\n", + "Based on docstring, return of 'add_numbers' function is as follows: 'sum of ``first_number`` and ``second_number``'.\n", + "Question: Tell me how to add the number 2 and 3, and subtract the result by 5.\n", + "\n", + "Answer:\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "zQhbiErbrad3" + }, + "outputs": [], + "source": [ + "shutil.unpack_archive(tuned_adapter_archive, tuned_adapter_directory)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "1X72BtfjxHQo" + }, + "outputs": [], + "source": [ + "quantisation_configuration = transformers.BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_compute_dtype=\"float16\",\n", + " bnb_4bit_quant_type=\"nf4\",\n", + " bnb_4bit_use_double_quant=False,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "6e3f0ea9ed8b4d019f8da74f82dc2fb3", + "861cb7a6a84840ac85ec4f55f1428bc5", + "1a6c9624861c499ca7d7f47f78a2f730", + "5e30bb776e1e462385fc8727b0a7291c", + "8146ae8bf6a946fba84e6ee409dbe87c", + "8671b8a2f8174a89aca1b7148b95ea17", + "dc8c3007bee0471cad92490aad6987d4", + "1bb8dfadf9ae4d94b74c1526c2231391", + "5b6febdebf084ee4a8c0bd933017f4ec", + "8b9bea5823a3474a918af514c75ad5d9", + "54e415d7d3c343378e23b6645121bf48" + ] + }, + "id": "TT26-TkexIFb", + "outputId": "a38cbab8-abb8-4d63-f03c-33ddec27bb16" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6e3f0ea9ed8b4d019f8da74f82dc2fb3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/8 [00:00>> from package_name_to_import_with ``first_number``\\n\\n >>> Parentheses(1, \"+2)\\n -2.0\\n >>> get_negative(1)\\n -2.0\\')\\n -1.0\\n >>> Difference(1, 2)\\n '}]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tuned_pipeline(prompt)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "uB13FNir26ka" + }, + "outputs": [], + "source": [ + "del tuned_pipeline\n", + "del tuned_model\n", + "del untuned_pipeline\n", + "del tokeniser\n", + "del untuned_model" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6c4pSk564Iu8", + "outputId": "c028ce8f-513a-425d-9e98-a976c40785c2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "126" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gc.collect()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "1m6DFd2B4KWr" + }, + "outputs": [], + "source": [ + "torch.cuda.empty_cache()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "1a6c9624861c499ca7d7f47f78a2f730": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1bb8dfadf9ae4d94b74c1526c2231391", + "max": 8, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5b6febdebf084ee4a8c0bd933017f4ec", + "value": 8 + } + }, + "1bb8dfadf9ae4d94b74c1526c2231391": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "54e415d7d3c343378e23b6645121bf48": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5b6febdebf084ee4a8c0bd933017f4ec": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5e30bb776e1e462385fc8727b0a7291c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8b9bea5823a3474a918af514c75ad5d9", + "placeholder": "\u200b", + "style": "IPY_MODEL_54e415d7d3c343378e23b6645121bf48", + "value": " 8/8 [01:20<00:00, 8.30s/it]" + } + }, + "6e3f0ea9ed8b4d019f8da74f82dc2fb3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_861cb7a6a84840ac85ec4f55f1428bc5", + "IPY_MODEL_1a6c9624861c499ca7d7f47f78a2f730", + "IPY_MODEL_5e30bb776e1e462385fc8727b0a7291c" + ], + "layout": "IPY_MODEL_8146ae8bf6a946fba84e6ee409dbe87c" + } + }, + "8146ae8bf6a946fba84e6ee409dbe87c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "861cb7a6a84840ac85ec4f55f1428bc5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8671b8a2f8174a89aca1b7148b95ea17", + "placeholder": "\u200b", + "style": "IPY_MODEL_dc8c3007bee0471cad92490aad6987d4", + "value": "Loading checkpoint shards: 100%" + } + }, + "8671b8a2f8174a89aca1b7148b95ea17": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8b9bea5823a3474a918af514c75ad5d9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dc8c3007bee0471cad92490aad6987d4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/src/generative_ai/information_retrieval/__init__.py b/src/generative_ai/information_retrieval/__init__.py new file mode 100644 index 0000000..5020e80 --- /dev/null +++ b/src/generative_ai/information_retrieval/__init__.py @@ -0,0 +1,53 @@ +from .orchestrate_retrieval import ( + configure_language_model, + create_embedding_database, + load_embedding_database, + load_source_documents, + prepare_question_answer_chain, + run_question_answer_chain, + store_embedding_database, +) +from .step_1_retrieval import ( + create_document_embedder, + create_vector_store, + load_json_documents, + partition_documents, +) +from .step_2_retrieval import create_database_retriever, create_llm, generate_retrieval_chain +from .step_3_retrieval import CaptureDetailsCallback +from .utils_retrieval import ( + LanguageModel, + LanguageModelAdapter, + PipelineType, + QuantisedModel, + RetrievalType, + StandardModel, + TransformerType, + ValidatedChroma, +) + +__all__ = [ + "CaptureDetailsCallback", + "LanguageModel", + "LanguageModelAdapter", + "PipelineType", + "QuantisedModel", + "RetrievalType", + "StandardModel", + "TransformerType", + "ValidatedChroma", + "configure_language_model", + "create_database_retriever", + "create_document_embedder", + "create_embedding_database", + "create_llm", + "create_vector_store", + "generate_retrieval_chain", + "load_embedding_database", + "load_source_documents", + "load_json_documents", + "partition_documents", + "prepare_question_answer_chain", + "run_question_answer_chain", + "store_embedding_database", +] diff --git a/src/generative_ai/information_retrieval/orchestrate_retrieval.py b/src/generative_ai/information_retrieval/orchestrate_retrieval.py new file mode 100644 index 0000000..c0b1ad2 --- /dev/null +++ b/src/generative_ai/information_retrieval/orchestrate_retrieval.py @@ -0,0 +1,127 @@ +import pathlib + +import pydantic +from langchain.docstore.document import Document +from langchain.schema.runnable import RunnableSerializable + +from .step_1_retrieval import ( + create_document_embedder, + create_vector_store, + load_json_documents, + partition_documents, +) +from .step_2_retrieval import create_database_retriever, create_llm, generate_retrieval_chain +from .step_3_retrieval import CaptureDetailsCallback +from .utils_retrieval import ( + LanguageModel, + LanguageModelAdapter, + PipelineType, + RetrievalType, + TransformerType, + ValidatedChroma, +) + + +def load_source_documents(file_path: pathlib.Path) -> list[Document]: + raw_documents = load_json_documents(file_path) + partitioned_documents = partition_documents(raw_documents) + + return partitioned_documents + + +def create_embedding_database( + embedding_model: str, directory_path: pathlib.Path, source_documents: list[Document] +) -> ValidatedChroma: + document_embedder = create_document_embedder(embedding_model) + + vector_store = create_vector_store(document_embedder, directory_path) + vector_store.add_documents(source_documents) + + return vector_store + + +def store_embedding_database(vector_store: ValidatedChroma) -> None: + vector_store.persist() + + +def load_embedding_database(embedding_model: str, directory_path: pathlib.Path) -> ValidatedChroma: + document_embedder = create_document_embedder(embedding_model) + + vector_store = create_vector_store(document_embedder, directory_path) + + return vector_store + + +@pydantic.validate_call(validate_return=True) +def configure_language_model( # noqa: PLR0913 + language_model_type: TransformerType, + standard_pipeline_type: PipelineType, + standard_model_name: str, + quantised_model_name: str, + quantised_model_file: str, + quantised_model_type: str, +) -> LanguageModel: + language_model: dict = {"language_model_type": language_model_type} + + match language_model_type: + case TransformerType.STANDARD_TRANSFORMERS: + language_model.update( + { + "standard_pipeline_type": standard_pipeline_type, + "standard_model_name": standard_model_name, + } + ) + case TransformerType.QUANTISED_CTRANSFORMERS: + language_model.update( + { + "quantised_model_name": quantised_model_name, + "quantised_model_file": quantised_model_file, + "quantised_model_type": quantised_model_type, + } + ) + case _: + raise ValueError("Unexpected language model type") + + return LanguageModelAdapter.validate_python(language_model) + + +def prepare_question_answer_chain( # noqa: PLR0913 + embedding_database: ValidatedChroma, + search_type: RetrievalType, + number_of_documents: int, + initial_number_of_documents: int, + diversity_level: float, + language_model: LanguageModel, +) -> RunnableSerializable: + database_retriever = create_database_retriever( + embedding_database, + search_type, + number_of_documents, + initial_number_of_documents, + diversity_level, + ) + llm = create_llm(language_model) + + question_answer_chain = generate_retrieval_chain(database_retriever, llm) + + return question_answer_chain + + +def run_question_answer_chain( + question_answer_chain: RunnableSerializable, question: str +) -> tuple[dict, CaptureDetailsCallback]: + details_callback = CaptureDetailsCallback() + answer = question_answer_chain.invoke(question, config={"callbacks": [details_callback]}) + + return answer, details_callback + + +__all__ = [ + "configure_language_model", + "create_embedding_database", + "load_embedding_database", + "load_source_documents", + "prepare_question_answer_chain", + "run_question_answer_chain", + "store_embedding_database", +] diff --git a/src/generative_ai/information_retrieval/step_1_retrieval.py b/src/generative_ai/information_retrieval/step_1_retrieval.py new file mode 100644 index 0000000..e216862 --- /dev/null +++ b/src/generative_ai/information_retrieval/step_1_retrieval.py @@ -0,0 +1,45 @@ +import pathlib + +from langchain.docstore.document import Document +from langchain.document_loaders import JSONLoader +from langchain.embeddings import HuggingFaceEmbeddings +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.vectorstores.chroma import Chroma + +from .utils_retrieval import ValidatedChroma + + +def load_json_documents(file_path: pathlib.Path) -> list[Document]: + json_loader = JSONLoader(file_path, ".retrieval_documents[]") + raw_documents = json_loader.load() + + return raw_documents + + +def partition_documents(raw_documents: list[Document]) -> list[Document]: + text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64) + partitioned_documents = text_splitter.split_documents(raw_documents) + + return partitioned_documents + + +def create_document_embedder(embedding_model: str) -> HuggingFaceEmbeddings: + embedder = HuggingFaceEmbeddings(model_name=embedding_model) + + return embedder + + +def create_vector_store( + embedder: HuggingFaceEmbeddings, directory_path: pathlib.Path +) -> ValidatedChroma: + vector_store = Chroma(embedding_function=embedder, persist_directory=str(directory_path)) + + return vector_store + + +__all__ = [ + "create_document_embedder", + "create_vector_store", + "load_json_documents", + "partition_documents", +] diff --git a/src/generative_ai/information_retrieval/step_2_retrieval.py b/src/generative_ai/information_retrieval/step_2_retrieval.py new file mode 100644 index 0000000..5f50610 --- /dev/null +++ b/src/generative_ai/information_retrieval/step_2_retrieval.py @@ -0,0 +1,105 @@ +import transformers +from langchain.chains import RetrievalQA +from langchain.chains.retrieval_qa.base import BaseRetrievalQA +from langchain.llms.ctransformers import CTransformers +from langchain.llms.huggingface_pipeline import HuggingFacePipeline +from langchain.prompts import PromptTemplate +from langchain.schema.vectorstore import VectorStoreRetriever + +from .utils_retrieval import LanguageModel, RetrievalType, TransformerType, ValidatedChroma + + +def create_database_retriever( + embedding_database: ValidatedChroma, + search_type: RetrievalType, + number_of_documents: int, + initial_number_of_documents: int, + diversity_level: float, +) -> VectorStoreRetriever: + retriever = embedding_database.as_retriever( + search_type=search_type, + search_kwargs={ + "k": number_of_documents, + "fetch_k": initial_number_of_documents, + "lambda_mult": diversity_level, + }, + ) + + return retriever + + +def create_llm(language_model: LanguageModel) -> CTransformers | HuggingFacePipeline: + common_parameters = {"max_new_tokens": 256} + + match language_model.language_model_type: + case TransformerType.STANDARD_TRANSFORMERS: + common_parameters.update({"do_sample": True, "top_k": 1}) + + tokeniser = transformers.AutoTokenizer.from_pretrained( + language_model.standard_model_name, + use_fast=True, + padding="max_length", + truncation=True, + ) + tokeniser.pad_token = tokeniser.eos_token + + pipeline = transformers.pipeline( + task=language_model.standard_pipeline_type, + model=language_model.standard_model_name, + tokenizer=tokeniser, + model_kwargs={"low_cpu_mem_usage": True}, + **common_parameters, + ) + + llm = HuggingFacePipeline(pipeline=pipeline) + case TransformerType.QUANTISED_CTRANSFORMERS: + common_parameters.update({"temperature": 0}) + + llm = CTransformers( + model=language_model.quantised_model_name, + model_type=language_model.quantised_model_type, + model_file=language_model.quantised_model_file, + config=common_parameters, + ) + case _: + raise ValueError("Unexpected language model type") + + return llm + + +def generate_retrieval_chain( + database_retriever: VectorStoreRetriever, llm: CTransformers | HuggingFacePipeline +) -> BaseRetrievalQA: + prompt_template = """You are a chat assistant for question answering tasks. + +Use the following retrieved context to answer the given question. + +If the answer is not in the context, say "I do not know.". + +Keep your answer as concise as possible. + +Context + +{context} + +Question + +{question} + +Answer + +""" + + prompt = PromptTemplate.from_template(prompt_template) + + retrieval_chain = RetrievalQA.from_chain_type( + llm, + chain_type_kwargs={"prompt": prompt}, + retriever=database_retriever, + return_source_documents=True, + ) + + return retrieval_chain + + +__all__ = ["create_database_retriever", "create_llm", "generate_retrieval_chain"] diff --git a/src/generative_ai/information_retrieval/step_3_retrieval.py b/src/generative_ai/information_retrieval/step_3_retrieval.py new file mode 100644 index 0000000..f9db1c9 --- /dev/null +++ b/src/generative_ai/information_retrieval/step_3_retrieval.py @@ -0,0 +1,53 @@ +import time +import typing +import uuid + +from langchain.callbacks.base import BaseCallbackHandler +from langchain_core.outputs import LLMResult + + +class CaptureDetailsCallback(BaseCallbackHandler): + def __init__(self: "CaptureDetailsCallback") -> None: + super().__init__() + + self.effective_prompt: str | None = None + self.effective_duration: float | None = None + + def on_llm_start( # noqa: PLR0913 + self: "CaptureDetailsCallback", + serialized: dict, + prompts: list[str], + *, + run_id: uuid.UUID, + parent_run_id: uuid.UUID | None = None, + tags: list[str] | None = None, + metadata: dict | None = None, + **kwargs: typing.Any, + ) -> None: + del serialized + del run_id + del parent_run_id + del tags + del metadata + del kwargs + + self.effective_prompt = prompts[0] + self.effective_duration = time.perf_counter() + + def on_llm_end( + self: "CaptureDetailsCallback", + response: LLMResult, + *, + run_id: uuid.UUID, + parent_run_id: uuid.UUID | None = None, + **kwargs: typing.Any, + ) -> None: + del response + del run_id + del parent_run_id + del kwargs + + self.effective_duration = time.perf_counter() - self.effective_duration + + +__all__ = ["CaptureDetailsCallback"] diff --git a/src/generative_ai/information_retrieval/utils_retrieval.py b/src/generative_ai/information_retrieval/utils_retrieval.py new file mode 100644 index 0000000..4ccdd94 --- /dev/null +++ b/src/generative_ai/information_retrieval/utils_retrieval.py @@ -0,0 +1,57 @@ +import enum +import typing + +import pydantic +import typing_extensions +from langchain.vectorstores.chroma import Chroma + + +class RetrievalType(str, enum.Enum): + MMR = "mmr" + SIMILARITY = "similarity" + + +class TransformerType(str, enum.Enum): + STANDARD_TRANSFORMERS = "standard_transformers" + QUANTISED_CTRANSFORMERS = "quantised_ctransformers" + + +class PipelineType(str, enum.Enum): + TEXT_GENERATION = "text-generation" + TEXT2TEXT_GENERATION = "text2text-generation" + + +class StandardModel(pydantic.BaseModel): + language_model_type: typing.Literal[TransformerType.STANDARD_TRANSFORMERS] + standard_pipeline_type: PipelineType + standard_model_name: str + + +class QuantisedModel(pydantic.BaseModel): + language_model_type: typing.Literal[TransformerType.QUANTISED_CTRANSFORMERS] + quantised_model_name: str + quantised_model_file: str + quantised_model_type: str + + +LanguageModel = typing_extensions.TypeAliasType( + "LanguageModel", + typing.Annotated[ + QuantisedModel | StandardModel, pydantic.Field(discriminator="language_model_type") + ], +) +LanguageModelAdapter = pydantic.TypeAdapter(LanguageModel) + +ValidatedChroma = pydantic.InstanceOf[Chroma] + + +__all__ = [ + "LanguageModel", + "LanguageModelAdapter", + "TransformerType", + "PipelineType", + "QuantisedModel", + "RetrievalType", + "StandardModel", + "ValidatedChroma", +] diff --git a/src/generative_ai/metadata.json b/src/generative_ai/metadata.json new file mode 100644 index 0000000..3d98f0e --- /dev/null +++ b/src/generative_ai/metadata.json @@ -0,0 +1,21 @@ +{ + "Authors": [ + "Anirban Ray" + ], + "Description": "A package to explore documentations", + "Keywords": [ + "documentation", + "generative-ai" + ], + "License": "MIT", + "Links": { + "Bug Tracker": "https://github.com/yarnabrina/query-package-documentation/issues", + "Documentation": "https://query-package-documentation.readthedocs.io", + "Source Code": "https://github.com/yarnabrina/query-package-documentation" + }, + "Maintainers": [ + "Anirban Ray <39331844+yarnabrina@users.noreply.github.com>" + ], + "Name": "query-package-documentation", + "Version": "0.0.1" +} diff --git a/src/generative_ai/py.typed b/src/generative_ai/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/src/generative_ai/top_level.py b/src/generative_ai/top_level.py new file mode 100644 index 0000000..e05c2e4 --- /dev/null +++ b/src/generative_ai/top_level.py @@ -0,0 +1,130 @@ +import logging +import pathlib +import shutil + +import pydantic + +from .dataset_generation import generate_json_dataset, generate_raw_datasets, store_json_dataset +from .information_retrieval import ( + PipelineType, + RetrievalType, + TransformerType, + configure_language_model, + create_embedding_database, + load_embedding_database, + load_source_documents, + prepare_question_answer_chain, + run_question_answer_chain, + store_embedding_database, +) +from .utils_top_level import Response + +LOGGER = logging.getLogger(__name__) + + +@pydantic.validate_call(validate_return=True) +def create_dataset( + package_name: str, dataset_file: pathlib.Path, force: bool = False +) -> pathlib.Path: + if dataset_file.exists() and not force: + LOGGER.error(f"{dataset_file=} refers to an existing file but {force=}") + + raise FileExistsError("Dataset exists already, skipping.") + + if dataset_file.exists(): + dataset_file.unlink() + LOGGER.warning("Deleted existed dataset.") + + raw_datasets = generate_raw_datasets(package_name) + json_dataset = generate_json_dataset(raw_datasets) + + store_json_dataset(json_dataset, dataset_file) + + return dataset_file.resolve() + + +@pydantic.validate_call(validate_return=True) +def create_database( + dataset_file: pathlib.Path, embedding_model: str, database_directory: pathlib.Path, force: bool +) -> pathlib.Path: + if database_directory.exists() and not force: + LOGGER.error(f"{database_directory=} refers to an existing file but {force=}") + + raise FileExistsError("Dataset exists already, skipping.") + + if database_directory.exists(): + shutil.rmtree(database_directory) + LOGGER.warning("Deleted existed database.") + + if not dataset_file.exists(): + LOGGER.error(f"{dataset_file=} refers to a non-existing file") + + raise FileNotFoundError("Dataset file is missing, skipping. Use 'generate-dataset' first.") + + source_documents = load_source_documents(dataset_file) + embedding_database = create_embedding_database( + embedding_model, database_directory, source_documents + ) + + store_embedding_database(embedding_database) + + return database_directory.resolve() + + +@pydantic.validate_call(validate_return=True) +def get_response( # noqa: PLR0913 + question: str, + embedding_model: str, + database_directory: pathlib.Path, + search_type: RetrievalType, + number_of_documents: int, + initial_number_of_documents: int, + diversity_level: float, + language_model_type: TransformerType, + standard_pipeline_type: PipelineType, + standard_model_name: str, + quantised_model_name: str, + quantised_model_file: str, + quantised_model_type: str, +) -> Response: + if not database_directory.exists(): + LOGGER.error(f"{database_directory=} refers to a non-existing directory") + + raise FileNotFoundError( + "Database directory is missing, skipping. Use 'generate-database' first." + ) + + embedding_database = load_embedding_database(embedding_model, database_directory) + language_model = configure_language_model( + language_model_type, + standard_pipeline_type, + standard_model_name, + quantised_model_name, + quantised_model_file, + quantised_model_type, + ) + question_answer_chain = prepare_question_answer_chain( + embedding_database, + search_type, + number_of_documents, + initial_number_of_documents, + diversity_level, + language_model, + ) + + answer, callback = run_question_answer_chain(question_answer_chain, question) + + return Response.model_validate( + { + "query": answer["query"], + "answer": answer["result"], + "source_documents": [ + source_document.page_content for source_document in answer["source_documents"] + ], + "used_prompt": callback.effective_prompt, + "llm_duration": callback.effective_duration, + } + ) + + +__all__ = ["create_database", "create_dataset", "get_response"] diff --git a/src/generative_ai/utils_top_level.py b/src/generative_ai/utils_top_level.py new file mode 100644 index 0000000..789a237 --- /dev/null +++ b/src/generative_ai/utils_top_level.py @@ -0,0 +1,12 @@ +import pydantic + + +class Response(pydantic.BaseModel): + query: str + answer: str + source_documents: list[str] + used_prompt: str + llm_duration: float + + +__all__ = ["Response"] diff --git a/src/gui.py b/src/gui.py new file mode 100644 index 0000000..7c3a6e2 --- /dev/null +++ b/src/gui.py @@ -0,0 +1,303 @@ +import gc +import pathlib + +import gradio + +from generative_ai.information_retrieval import PipelineType, RetrievalType, TransformerType +from generative_ai.top_level import create_database, create_dataset, get_response + + +def generate_dataset( + package_name: str, dataset_file: pathlib.Path, force: bool = False +) -> pathlib.Path: + try: + dataset_path = create_dataset(package_name, dataset_file, force) + except FileExistsError as error: + raise gradio.Error(message=str(error)) from error + else: + gradio.Info("Dataset generation complete.") + + return dataset_path.resolve() + + +def generate_database( + dataset_file: pathlib.Path, embedding_model: str, database_directory: pathlib.Path, force: bool +) -> pathlib.Path: + try: + database_path = create_database(dataset_file, embedding_model, database_directory, force) + except (FileExistsError, FileNotFoundError) as error: + raise gradio.Error(message=str(error)) from error + else: + gradio.Info("Database generation complete.") + + return database_path.resolve() + + +def answer_query( # noqa: PLR0913 + query: str, + embedding_model: str, + database_directory: pathlib.Path, + search_type: RetrievalType, + number_of_documents: int, + initial_number_of_documents: int, + diversity_level: float, + language_model_type: TransformerType, + standard_pipeline_type: PipelineType, + standard_model_name: str, + quantised_model_name: str, + quantised_model_file: str, + quantised_model_type: str, +) -> tuple[str, list[str], str, float]: + language_model: dict = {"language_model_type": language_model_type} + + match language_model_type: + case TransformerType.STANDARD_TRANSFORMERS: + language_model.update( + { + "standard_pipeline_type": standard_pipeline_type, + "standard_model_name": standard_model_name, + } + ) + case TransformerType.QUANTISED_CTRANSFORMERS: + language_model.update( + { + "quantised_model_name": quantised_model_name, + "quantised_model_file": quantised_model_file, + "quantised_model_type": quantised_model_type, + } + ) + case _: + raise ValueError("Unexpected language model type") + + try: + response = get_response( + query, + embedding_model, + database_directory, + search_type, + number_of_documents, + initial_number_of_documents, + diversity_level, + language_model_type, + standard_pipeline_type, + standard_model_name, + quantised_model_name, + quantised_model_file, + quantised_model_type, + ) + except FileNotFoundError as error: + raise gradio.Error(message=str(error)) from error + else: + return ( + response.answer, + response.source_documents, + response.used_prompt, + response.llm_duration, + ) + finally: + _ = gc.collect() + + +def step1_tab_flow() -> None: + with gradio.Group(): + package_name_step1_input = gradio.Textbox(label="name to import package") + dataset_file_step1_input = gradio.Textbox( + value="json_documents.json", label="file where generated dataset needs to be stored" + ) + + force_step1_input = gradio.Checkbox(value=False, label="override existing dataset, if any") + + step1_button = gradio.Button(value="Generate Dataset") + dataset_path_step1_output = gradio.Textbox( + label="path to file storing dataset", show_copy_button=True + ) + + step1_button.click( + generate_dataset, + inputs=[package_name_step1_input, dataset_file_step1_input, force_step1_input], + outputs=[dataset_path_step1_output], + ) + + +def step2_tab_flow() -> None: + dataset_file_step2_input = gradio.Textbox( + value="json_documents.json", label="path to file storing dataset" + ) + + with gradio.Group(): + embedding_model_step2_input = gradio.Textbox( + value="sentence-transformers/all-MiniLM-L6-v2", label="embedding model to use" + ) + database_directory_step2_input = gradio.Textbox( + value="embeddings_database", + label="directory where generated database needs to be stored", + ) + + force_step2_input = gradio.Checkbox(value=False, label="override existing database, if any") + + step2_button = gradio.Button(value="Generate Database") + database_path_step2_output = gradio.Textbox( + label="path to directory storing database", show_copy_button=True + ) + + step2_button.click( + generate_database, + inputs=[ + dataset_file_step2_input, + embedding_model_step2_input, + database_directory_step2_input, + force_step2_input, + ], + outputs=[database_path_step2_output], + ) + + +def step3_tab_flow() -> None: + query_step3_input = gradio.Textbox(label="user question") + + with gradio.Group(): + embedding_model_step3_input = gradio.Textbox( + value="sentence-transformers/all-MiniLM-L6-v2", label="embedding model to use" + ) + database_directory_step3_input = gradio.Textbox( + value="embeddings_database", label="path to directory storing database" + ) + + with gradio.Accordion(label="Retrieval", open=False): + search_type_step3_input = gradio.Radio( + choices=[(element.name, element.value) for element in RetrievalType], + value=RetrievalType.MMR.value, + label="kind of retrieval", + ) + number_of_documents_step3_input = gradio.Slider( + minimum=1, + maximum=10, + value=3, + step=1, + label="number of documents to retrieve", + randomize=False, + ) + initial_number_of_documents_step3_input = gradio.Slider( + minimum=2, + maximum=30, + value=5, + step=3, + label="initial number of documents to consider", + randomize=False, + ) + diversity_level_step3_input = gradio.Slider( + minimum=0, + maximum=1, + value=0.5, + step=0.1, + label="similarity between retrieved documents", + randomize=False, + ) + + with gradio.Accordion(label="Language Model", open=False): + language_model_type_step3_input = gradio.Radio( + choices=[(element.name, element.value) for element in TransformerType], + value=TransformerType.STANDARD_TRANSFORMERS.value, + label="kind of language model", + ) + with gradio.Row(): + with gradio.Group(): + standard_pipeline_type_step3_input = gradio.Radio( + choices=[(element.name, element.value) for element in PipelineType], + value=PipelineType.TEXT2TEXT_GENERATION.value, + label="kind of Hugging Face pipeline", + ) + standard_model_name_step3_input = gradio.Textbox( + value="google/flan-t5-large", label="name of Hugging Face model" + ) + with gradio.Group(): + quantised_model_name_step3_input = gradio.Textbox( + value="TheBloke/zephyr-7B-beta-GGUF", label="name of Hugging Face model" + ) + quantised_model_file_step3_input = gradio.Textbox( + value="zephyr-7b-beta.Q4_K_M.gguf", label="name of Hugging Face model file" + ) + quantised_model_type_step3_input = gradio.Textbox( + value="mistral", label="type of Hugging Face model" + ) + + step3_button = gradio.Button(value="Get Response") + + with gradio.Row(): + with gradio.Group(): + llm_response_step3_output = gradio.Textbox(label="answer from language model") + retrieved_context_step3_output = gradio.JSON(label="relevant documents") + llm_duration_step3_output = gradio.Number( + label="duration of language model in seconds" + ) + + with gradio.Group(): + llm_prompt_step3_output = gradio.Markdown(label="prompt used by language model") + + step3_button.click( + answer_query, + inputs=[ + query_step3_input, + embedding_model_step3_input, + database_directory_step3_input, + search_type_step3_input, + number_of_documents_step3_input, + initial_number_of_documents_step3_input, + diversity_level_step3_input, + language_model_type_step3_input, + standard_pipeline_type_step3_input, + standard_model_name_step3_input, + quantised_model_name_step3_input, + quantised_model_file_step3_input, + quantised_model_type_step3_input, + ], + outputs=[ + llm_response_step3_output, + retrieved_context_step3_output, + llm_prompt_step3_output, + llm_duration_step3_output, + ], + ) + + +def main() -> None: + tab_title = "GUI for Generative AI aaplication" + summary = """# Retrieval Augmented Generation from package docstrings . + +## Dataset Generation + +1. list all modules in the package (recursively from all sub-packages) +2. generate a set of documents based on package/module/object docstrings +3. documents are stored in a JSON dataset for retrieval (and tuning, optionally) + +## Database Generation + +1. read the retrieval dataset +2. generate embeddings for each document +3. store document embeddings in a vector database + +## Response Generation + +1. read the retrieval database +2. generate embeddings for user question +3. retrieve most similar documents from database +4. pass relevant documents to language model as context +5. generate answer using language model""" + + with gradio.Blocks(analytics_enabled=False, title=tab_title) as gui_application: + _ = gradio.Markdown(value=summary, label="Description") + + with gradio.Tab(label="Step 1"): + step1_tab_flow() + + with gradio.Tab(label="Step 2"): + step2_tab_flow() + + with gradio.Tab(label="Step 3"): + step3_tab_flow() + + gui_application.launch(share=False, show_error=True, show_api=False) + + +if __name__ == "__main__": + main()