diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..3036895 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,43 @@ +name: deploy + +on: + # Trigger the workflow on push to main branch + push: + branches: + - main + +# This job installs dependencies, builds the book, and pushes it to `gh-pages` +jobs: + deploy-book: + runs-on: ubuntu-latest + permissions: + pages: write + id-token: write + steps: + - uses: actions/checkout@v3 + + # Install dependencies + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: 3.11 + + - name: Install dependencies + run: | + pip install -r requirements.txt + + # Build the book + - name: Build the book + run: | + jupyter-book build alchemistry + + # Upload the book's HTML as an artifact + - name: Upload artifact + uses: actions/upload-pages-artifact@v2 + with: + path: "alchemistry/_build/html" + + # Deploy the book's HTML to GitHub Pages + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v2 \ No newline at end of file diff --git a/.github/workflows/jekyll.yml b/.github/workflows/jekyll.yml deleted file mode 100644 index 68520b5..0000000 --- a/.github/workflows/jekyll.yml +++ /dev/null @@ -1,64 +0,0 @@ -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party and are governed by -# separate terms of service, privacy policy, and support -# documentation. - -# Sample workflow for building and deploying a Jekyll site to GitHub Pages -name: Deploy Jekyll site to Pages - -on: - # Runs on pushes targeting the default branch - push: - branches: ["main"] - - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages -permissions: - contents: read - pages: write - id-token: write - -# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. -# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. -concurrency: - group: "pages" - cancel-in-progress: false - -jobs: - # Build job - build: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Setup Ruby - uses: ruby/setup-ruby@8575951200e472d5f2d95c625da0c7bec8217c42 # v1.161.0 - with: - ruby-version: '3.1' # Not needed with a .ruby-version file - bundler-cache: true # runs 'bundle install' and caches installed gems automatically - cache-version: 0 # Increment this number if you need to re-download cached gems - - name: Setup Pages - id: pages - uses: actions/configure-pages@v5 - - name: Build with Jekyll - # Outputs to the './_site' directory by default - run: bundle exec jekyll build --baseurl "${{ steps.pages.outputs.base_path }}" - env: - JEKYLL_ENV: production - - name: Upload artifact - # Automatically uploads an artifact from the './_site' directory by default - uses: actions/upload-pages-artifact@v3 - - # Deployment job - deploy: - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} - runs-on: ubuntu-latest - needs: build - steps: - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore index cee9e12..f324224 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,146 @@ package-lock.json # Misc assets/js/dist + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +*/_build/ + +*/.ipynb_checkpoints/ diff --git a/.nojekyll b/.nojekyll deleted file mode 100644 index 8b13789..0000000 --- a/.nojekyll +++ /dev/null @@ -1 +0,0 @@ - diff --git a/404.html b/404.html deleted file mode 100644 index 086a5c9..0000000 --- a/404.html +++ /dev/null @@ -1,25 +0,0 @@ ---- -permalink: /404.html -layout: default ---- - - - -
-

404

- -

Page not found :(

-

The requested page could not be found.

-
diff --git a/CONDUCT.md b/CONDUCT.md new file mode 100644 index 0000000..3f5562b --- /dev/null +++ b/CONDUCT.md @@ -0,0 +1,44 @@ + +# Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4). diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..4fb267c --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,56 @@ +# Contributing + +Contributions are welcome, and they are greatly appreciated! Every little bit +helps, and credit will always be given. You can contribute in the ways listed below. + +## Report Bugs + +Report bugs using GitHub issues. + +If you are reporting a bug, please include: + +* Your operating system name and version. +* Any details about your local setup that might be helpful in troubleshooting. +* Detailed steps to reproduce the bug. + +## Fix Bugs + +Look through the GitHub issues for bugs. Anything tagged with "bug" and "help +wanted" is open to whoever wants to implement it. + +## Implement Features + +Look through the GitHub issues for features. Anything tagged with "enhancement" +and "help wanted" is open to whoever wants to implement it. + +## Write Documentation + +Scientific Visualization using Python could always use more documentation, whether as part of the +official Scientific Visualization using Python docs, in docstrings, or even on the web in blog posts, +articles, and such. + +## Submit Feedback + +The best way to send feedback is to file an issue on GitHub. + +If you are proposing a feature: + +* Explain in detail how it would work. +* Keep the scope as narrow as possible, to make it easier to implement. +* Remember that this is a volunteer-driven project, and that contributions + are welcome :) + +## Get Started + +Ready to contribute? Here's how to set up `Scientific Visualization using Python` for local development. + +1. Fork the repo on GitHub. +2. Clone your fork locally. +3. Install your local copy into a virtualenv, e.g., using `conda`. +4. Create a branch for local development and make changes locally. +5. Commit your changes and push your branch to GitHub. +6. Submit a pull request through the GitHub website. + +## Code of Conduct + +Please note that the Scientific Visualization using Python project is released with a [Contributor Code of Conduct](CONDUCT.md). By contributing to this project you agree to abide by its terms. diff --git a/Gemfile b/Gemfile deleted file mode 100644 index d9d2e6b..0000000 --- a/Gemfile +++ /dev/null @@ -1,23 +0,0 @@ -# frozen_string_literal: true - -source "https://rubygems.org" - -gem "jekyll-theme-chirpy", "~> 6.5", ">= 6.5.3" - -group :test do - gem "html-proofer", "~> 4.4" -end - -# Windows and JRuby does not include zoneinfo files, so bundle the tzinfo-data gem -# and associated library. -platforms :mingw, :x64_mingw, :mswin, :jruby do - gem "tzinfo", ">= 1", "< 3" - gem "tzinfo-data" -end - -# Performance-booster for watching directories on Windows -gem "wdm", "~> 0.1.1", :platforms => [:mingw, :x64_mingw, :mswin] - -# Lock `http_parser.rb` gem to `v0.6.x` on JRuby builds since newer versions of the gem -# do not have a Java counterpart. -gem "http_parser.rb", "~> 0.6.0", :platforms => [:jruby] diff --git a/LICENSE b/LICENSE index 87562d3..550b084 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,30 @@ -MIT License +BSD License -Copyright (c) 2024 Shirts Research Group +Copyright (c) 2021, Jessica A. Nash +All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/README.md b/README.md index 5d698dc..c188931 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,35 @@ -# alchemistry.org -Source for alchemistry.org web page +# Alchemistry.org + +Homepage for alchemistry.org + +## Usage + +### Building the book + +If you'd like to develop on and build the Scientific Visualization using Python book, you should: + +- Clone this repository and run +- Run `pip install -r requirements.txt` (it is recommended you do this within a virtual environment) +- (Recommended) Remove the existing `alchemistry/_build/` directory +- Run `jupyter-book build alchemistry/` + +A fully-rendered HTML version of the book will be built in `alchemistry/_build/html/`. + +### Hosting the book + +The html version of the book is hosted on the `gh-pages` branch of this repo. A GitHub actions workflow has been created that automatically builds and pushes the book to this branch on a push or pull request to main. + +If you wish to disable this automation, you may remove the GitHub actions workflow and build the book manually by: + +- Navigating to your local build; and running, +- `ghp-import -n -p -f alchemistry/_build/html` + +This will automatically push your build to the `gh-pages` branch. More information on this hosting process can be found [here](https://jupyterbook.org/publish/gh-pages.html#manually-host-your-book-with-github-pages). + +## Contributors + +We welcome and recognize all contributions. You can see a list of current contributors in the [contributors tab](https://github.com/janash/molssi_python_visualization/graphs/contributors). + +## Credits + +This project is created using the excellent open source [Jupyter Book project](https://jupyterbook.org/) and the [executablebooks/cookiecutter-jupyter-book template](https://github.com/executablebooks/cookiecutter-jupyter-book). diff --git a/_config.yml b/_config.yml deleted file mode 100644 index 9038507..0000000 --- a/_config.yml +++ /dev/null @@ -1,212 +0,0 @@ -# The Site Configuration - -# Import the theme -theme: jekyll-theme-chirpy - -# The language of the webpage › http://www.lingoes.net/en/translator/langcode.htm -# If it has the same name as one of the files in folder `_data/locales`, the layout language will also be changed, -# otherwise, the layout language will use the default value of 'en'. -lang: en - -# Change to your timezone › https://kevinnovak.github.io/Time-Zone-Picker -timezone: - -# jekyll-seo-tag settings › https://github.com/jekyll/jekyll-seo-tag/blob/master/docs/usage.md -# ↓ -------------------------- - -title: Alchemistry.org # the main title - -tagline: A place to learn, share, and discuss Computational Free Energy Calculations # it will display as the sub-title - -description: >- # used by seo meta and the atom feed - Modern alchemy, done computationally, can turn protein structural information into the - "gold" of binding free energies and other information. In these pages, learn how it - works, and how to do it yourself. - -# Fill in the protocol & hostname for your site. -# e.g. 'https://username.github.io', note that it does not end with a '/'. -url: "https://alchemistry.org" - -github: - username: github_username # change to your github username - -twitter: - username: twitter_username # change to your twitter username - -social: - # Change to your full name. - # It will be displayed as the default author of the posts and the copyright owner in the Footer - name: The Alchemistry.org Team - email: example@domain.com # change to your email address - links: - # The first element serves as the copyright owner's link - - https://twitter.com/username # change to your twitter homepage - - https://github.com/username # change to your github homepage - # Uncomment below to add more social links - # - https://www.facebook.com/username - # - https://www.linkedin.com/in/username - -google_site_verification: # fill in to your verification string - -# ↑ -------------------------- -# The end of `jekyll-seo-tag` settings - -google_analytics: - id: # fill in your Google Analytics ID - -goatcounter: - id: # fill in your Goatcounter ID - -# Prefer color scheme setting. -# -# Note: Keep empty will follow the system prefer color by default, -# and there will be a toggle to switch the theme between dark and light -# on the bottom left of the sidebar. -# -# Available options: -# -# light - Use the light color scheme -# dark - Use the dark color scheme -# -theme_mode: # [light | dark] - -# The CDN endpoint for images. -# Notice that once it is assigned, the CDN url -# will be added to all image (site avatar & posts' images) paths starting with '/' -# -# e.g. 'https://cdn.com' -img_cdn: - -# the avatar on sidebar, support local or CORS resources -avatar: /assets/images/staurosporine-hydrated-1.png - -# The URL of the site-wide social preview image used in SEO `og:image` meta tag. -# It can be overridden by a customized `page.image` in front matter. -social_preview_image: # string, local or CORS resources - -# boolean type, the global switch for TOC in posts. -toc: true - -comments: - active: # The global switch for posts comments, e.g., 'disqus'. Keep it empty means disable - # The active options are as follows: - disqus: - shortname: # fill with the Disqus shortname. › https://help.disqus.com/en/articles/1717111-what-s-a-shortname - # utterances settings › https://utteranc.es/ - utterances: - repo: # / - issue_term: # < url | pathname | title | ...> - # Giscus options › https://giscus.app - giscus: - repo: # / - repo_id: - category: - category_id: - mapping: # optional, default to 'pathname' - input_position: # optional, default to 'bottom' - lang: # optional, default to the value of `site.lang` - reactions_enabled: # optional, default to the value of `1` - -# Self-hosted static assets, optional › https://github.com/cotes2020/chirpy-static-assets -assets: - self_host: - enabled: # boolean, keep empty means false - # specify the Jekyll environment, empty means both - # only works if `assets.self_host.enabled` is 'true' - env: # [development | production] - -pwa: - enabled: true # the option for PWA feature (installable) - cache: - enabled: true # the option for PWA offline cache - # Paths defined here will be excluded from the PWA cache. - # Usually its value is the `baseurl` of another website that - # shares the same domain name as the current website. - deny_paths: - # - "/example" # URLs match `/example/*` will not be cached by the PWA - -paginate: 10 - -# The base URL of your site -baseurl: "" - -# ------------ The following options are not recommended to be modified ------------------ - -kramdown: - syntax_highlighter: rouge - syntax_highlighter_opts: # Rouge Options › https://github.com/jneen/rouge#full-options - css_class: highlight - # default_lang: console - span: - line_numbers: false - block: - line_numbers: true - start_line: 1 - -collections: - tabs: - output: true - sort_by: order - lessons: - output: true - sort_by: order - examples: - output: true - sort_by: order - -defaults: - - scope: - path: "" # An empty string here means all files in the project - type: posts - values: - layout: post - comments: true # Enable comments in posts. - toc: true # Display TOC column in posts. - # DO NOT modify the following parameter unless you are confident enough - # to update the code of all other post links in this project. - permalink: /posts/:title/ - - scope: - path: _drafts - values: - comments: false - - scope: - path: "" - type: tabs # see `site.collections` - values: - layout: page - permalink: /:title/ - - scope: - path: assets/js/dist - values: - swcache: true - -sass: - style: compressed - -compress_html: - clippings: all - comments: all - endings: all - profile: false - blanklines: false - ignore: - envs: [development] - -exclude: - - "*.gem" - - "*.gemspec" - - docs - - tools - - README.md - - LICENSE - - rollup.config.js - - package*.json - -jekyll-archives: - enabled: [categories, tags] - layouts: - category: category - tag: tag - permalinks: - tag: /tags/:name/ - category: /categories/:name/ diff --git a/_examples/ExamplesA.md b/_examples/ExamplesA.md deleted file mode 100644 index 2e5689a..0000000 --- a/_examples/ExamplesA.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -layout: example -order: 1 -title: Example Free Energy Calculations -summary: A look into practical examples without software-specific details. We outline the steps needed for realistic free energy problem, detailing problems you can expect to encounter, and how to analyze the data to get a robust free energy difference. ---- - -I am the first Example - -# Behold my example form! diff --git a/_includes/post-like-gen.html b/_includes/post-like-gen.html deleted file mode 100644 index ae63576..0000000 --- a/_includes/post-like-gen.html +++ /dev/null @@ -1,54 +0,0 @@ -
- {% for iterator in site.[include.content_type] %} -
- - {% assign card_body_col = '12' %} - - {% if iterator.image %} - {% assign src = iterator.image.path | default: iterator.image %} - {% unless src contains '//' %} - {% assign src = iterator.img_path | append: '/' | append: src | replace: '//', '/' %} - {% endunless %} - - {% assign alt = iterator.image.alt | xml_escape | default: 'Preview Image' %} - - {% assign lqip = null %} - - {% if iterator.image.lqip %} - {% capture lqip %}lqip="{{ iterator.image.lqip }}"{% endcapture %} - {% endif %} - -
- {{ alt }} -
- - {% assign card_body_col = '7' %} - {% endif %} - -
-
-

{{ iterator.title }}

- -
-

- - {% assign summary = iterator.summary | strip_newlines %} - {% if summary != "" %} - {% include no-linenos.html content=summary %} - {{ summary | markdownify | strip_html | escape }} - {% else % } - {% include no-linenos.html content=iterator.content %} - {{ iterator.content | markdownify | strip_html | truncate: 200 | escape }} - {% endif %} -

-
- -
- -
-
-
- {% endfor %} -
- - diff --git a/_layouts/example.html b/_layouts/example.html deleted file mode 100644 index 6840e3f..0000000 --- a/_layouts/example.html +++ /dev/null @@ -1,141 +0,0 @@ ---- -layout: default -refactor: true -panel_includes: - - toc -tail_includes: - - related-posts - - post-nav - - comments ---- - - - -{% include lang.html %} - -
-
-

{{ page.title }}

- - - -
- -
- {{ content }} -
- -
- - {% if page.categories.size > 0 %} - - {% endif %} - - - {% if page.tags.size > 0 %} - - {% endif %} - -
-
- {% if site.data.locales[lang].copyright.license.template %} - {% capture _replacement %} - - {{ site.data.locales[lang].copyright.license.name }} - - {% endcapture %} - - {{ site.data.locales[lang].copyright.license.template | replace: ':LICENSE_NAME', _replacement }} - {% endif %} -
- - {% include post-sharing.html lang=lang %} -
- -
- -
diff --git a/_layouts/examples.html b/_layouts/examples.html deleted file mode 100644 index 6043a88..0000000 --- a/_layouts/examples.html +++ /dev/null @@ -1,6 +0,0 @@ ---- -layout: page -# All the Examples. ---- - -{% include post-like-gen.html content_type="examples" %} diff --git a/_layouts/home.html b/_layouts/home.html deleted file mode 100644 index 949d999..0000000 --- a/_layouts/home.html +++ /dev/null @@ -1,143 +0,0 @@ ---- -layout: default -refactor: false ---- - -{% include lang.html %} - -
- - - -
- {% capture header %}{% include_relative homepage/header.md %}{% endcapture %} - - {{ header | markdownify }} -
-
-
- - -
-
- {% capture lesson_and_tut %}{% include_relative homepage/lessons_and_tutorials.md %}{% endcapture %} - - {{ lesson_and_tut | markdownify }} - {% include_relative homepage/lessons_and_tutorials.html %} -
-
- -{% assign pinned = site.posts | where: 'pin', 'true' %} -{% assign default = site.posts | where_exp: 'item', 'item.pin != true and item.hidden != true' %} - -{% assign posts = '' | split: '' %} - - - -{% assign offset = paginator.page | minus: 1 | times: paginator.per_page %} -{% assign pinned_num = pinned.size | minus: offset %} - -{% if pinned_num > 0 %} - {% for i in (offset..pinned.size) limit: pinned_num %} - {% assign posts = posts | push: pinned[i] %} - {% endfor %} -{% else %} - {% assign pinned_num = 0 %} -{% endif %} - - - -{% assign default_beg = offset | minus: pinned.size %} - -{% if default_beg < 0 %} - {% assign default_beg = 0 %} -{% endif %} - -{% assign default_num = paginator.posts | size | minus: pinned_num %} -{% assign default_end = default_beg | plus: default_num | minus: 1 %} - -{% if default_num > 0 %} - {% for i in (default_beg..default_end) %} - {% assign posts = posts | push: default[i] %} - {% endfor %} -{% endif %} - -{% assign about = site.pages | where: 'name','index.md' %} -{{about}} - -
- {% for post in posts %} -
- - {% assign card_body_col = '12' %} - - {% if post.image %} - {% assign src = post.image.path | default: post.image %} - {% unless src contains '//' %} - {% assign src = post.img_path | append: '/' | append: src | replace: '//', '/' %} - {% endunless %} - - {% assign alt = post.image.alt | xml_escape | default: 'Preview Image' %} - - {% assign lqip = null %} - - {% if post.image.lqip %} - {% capture lqip %}lqip="{{ post.image.lqip }}"{% endcapture %} - {% endif %} - -
- {{ alt }} -
- - {% assign card_body_col = '7' %} - {% endif %} - -
-
-

{{ post.title }}

- -
-

- {% include no-linenos.html content=post.content %} - {{ content | markdownify | strip_html | truncate: 200 | escape }} -

-
- - - -
- -
-
-
- {% endfor %} -
- - -{% if paginator.total_pages > 1 %} - {% include post-paginator.html %} -{% endif %} diff --git a/_layouts/lesson.html b/_layouts/lesson.html deleted file mode 100644 index 6840e3f..0000000 --- a/_layouts/lesson.html +++ /dev/null @@ -1,141 +0,0 @@ ---- -layout: default -refactor: true -panel_includes: - - toc -tail_includes: - - related-posts - - post-nav - - comments ---- - - - -{% include lang.html %} - -
-
-

{{ page.title }}

- - - -
- -
- {{ content }} -
- -
- - {% if page.categories.size > 0 %} - - {% endif %} - - - {% if page.tags.size > 0 %} - - {% endif %} - -
-
- {% if site.data.locales[lang].copyright.license.template %} - {% capture _replacement %} - - {{ site.data.locales[lang].copyright.license.name }} - - {% endcapture %} - - {{ site.data.locales[lang].copyright.license.template | replace: ':LICENSE_NAME', _replacement }} - {% endif %} -
- - {% include post-sharing.html lang=lang %} -
- -
- -
diff --git a/_layouts/lessons.html b/_layouts/lessons.html deleted file mode 100644 index 0fd7482..0000000 --- a/_layouts/lessons.html +++ /dev/null @@ -1,6 +0,0 @@ ---- -layout: page -# All the Lessons. ---- - -{% include post-like-gen.html content_type="lessons" %} diff --git a/_lessons/FEFundamentals.md b/_lessons/FEFundamentals.md deleted file mode 100644 index d18d8c8..0000000 --- a/_lessons/FEFundamentals.md +++ /dev/null @@ -1,18 +0,0 @@ ---- -layout: lesson -order: 1 -title: Free Energy Fundamentals -image: /assets/images/Transformation_small.png -summary: Learn the methods and techniques behind free energy methods with in-depth coverage of free energy definitions and topics. ---- - -I am the first lesson - -adding some math $x_i$. - -$$ -\begin{equation} -E = mc^3 -\end{equation} -$$ -# Shoop da woop diff --git a/_lessons/Introduction.md b/_lessons/Introduction.md deleted file mode 100644 index 93f458e..0000000 --- a/_lessons/Introduction.md +++ /dev/null @@ -1,75 +0,0 @@ ---- -layout: lesson -order: 2 -title: Introduction -image: /assets/images/Transformation_small.png -summary: Introduction to alchemical free energy calculations ---- - -To understand the advantages and disadvantages of different free energy methods, it is important to begin -with a review of the underlying principles. This page is dedicated to the most fundamental concepts of free -energy calculations and is designed to give an in-depth view of the approaches, starting from the basics. T -his page also contains some of the common nomenclature and symbols that are seen throughout the rest of -the (lessons pages)*What category/organization section should this refer to?* and in the literature as a whole. - -This page is not meant to be an end-all repository of the background mathematics and principals required for -free energy calculations, but it will serve as a good start point and hopefully a quick reference. - -**Why the name "Alchemical"?** - -One of the first questions, and often most confusing points, about a number of free energy calculations is why we refer -to them as "alchemical" changes in a large number of computational methods. When people first hear the word "alchemy," -they usually think of the medieval alchemists who were trying to turn lead into gold, or other such transformations. -How computational free energy adopted the name takes a bit of understanding of simulation limitations and the properties -of what we are calculating. - -Considering that the natural evolution of some of the processes we try to model are well beyond reasonable simulation time -scales, we must come up with very efficient ways to compute the free energy differences. Because free energy is a state variable -(path independent), we can design simulations that provide a convenient pathway to computing free energy. Furthermore, since we -are doing simulations, we are not limited by experimentally observable conditions so long as we are meticulous with our calculations. - -With these observations in mind, it was found that we can simulate modifications to atoms which can change their properties -to reflect non-physical or entirely different species. This is roughly the same definition of alchemy from old in that we are -changing the properties of the atoms to be something else, although we do it in a mathematically sound and rigorous manner; -hence, the term "alchemical" was coined as many free energy calculations are "like alchemy" in their pathways and methods. - -There are of course many factors and checks that must be done to ensure accuracy and robustness of the calculations, many of -which will be shown in the rest of the (Some link to pages) - -### Assumptions for the Fundamentals -The assumptions listed here are carried out through the rest of the fundamentals sections. Free energy calculations -can usually be set up without these assumptions, but we'll make these assumptions to simplify the explanations. - -- **A standard molecular mechanics model will be assumed**; this includes: - - Harmonic bond angle terms - - Periodic dihedral terms - - Non-bonded terms made up of point charges and Lennard-Jones repulsion/dispersion terms - - **constant temperature is maintained** during the alchemical change (see below for more information)[fun-facts-from-the-free-energy-difference-definition] - - **Masses do not alchemically change.** If one wishes to do this, substitute all potential energies, $U$, with the more general Hamiltonian, $\mathcal{H}$. - - **QN and QM/MM** will **not be considered** there are number of other more complicated factors to consider that are beyond the scope of this discussion. - -### Free Energy Difference Equation -Most free energy calculations and methods started from a single core equation derived from statistical mechanics. The free energy difference between two states is directly related to the ratio of probabilities of those states through the partition functions. This free energy difference for an NVT ensemble is - -$$ -\begin{equation} -\Delta A_{ij} = -k_B T \ln \frac{Q_j}{Q_i} = -k_B T \ln \frac{\int_{\Gamma_j} e^{-\beta U_j(\vec{q})} d\vec{q}}{\int_{\Gamma_i} e^{-\beta U_i(\vec{q})} d\vec{q}} -\end{equation} -$$ - -where $\Delta A_{ij}$ is the Helmholtz free energy difference between state $j$ and state $i$, $\beta = 1/k_B$ with $k_B$ the Boltzmann constant, -$Q$ the canonical partition function, $T$ is the temperature, $U_i$ and $U_j$ are the potential energies as a function of the coordinates and momenta $\vec{q}$ for two states, and $\Gamma_i$ and $\Gamma_j$ are the ''phase space volumes'' of $\vec{q}$ -over which we sample, or the total set of all allowed positions and momenta of the system. - -From this equation, all free energy calculations are derived. - -### Fun Facts from the Free Energy Difference Definition -There are a number of key notes we can learn from the definition of free energy differences. Each of these can be important in interpreting simulation results. - -- **Only free energy __differences__ are ever calculated** There is never a calculation where absolute free energies - are needed (and rarely can they be calculated at all) as all of the biological or thermodynamical quantities of interest are based on a free energy difference. As such, there must always be a minimum of two defined thermodynamic states. Even ''absolute'' free energies of binding are still free energy differences between two states: the ligand restricted to the binding site, and the ligand free to explore all other configurations. -- **Free energy differences between states at different temperatures are usually not what you want to be calculating** for problems of interest. If it did, you would get $\Delta A_{ij} = -k_B T_j \ln Q_j + k_B T_i \ln Q_i$, which is no longer a ratio calculation and not needed for biological systems of interest. Temperature dependence on free energy is more likely to be "what is $\Delta A_{ij}$ at two different temperatures?" -- **There are two potentially different phase space volumes.** $\Gamma_i$ and $\Gamma_j$ are often the same, but they are not required to be. The methods presented here almost always assumes that the two phase spaces overlap. However, when the spaces do not overlap, these methods break down and it is difficult to identify this problem without in depth knowledge of your system. Consider the example of a hard sphere solute with radius $\sigma$ at state $i$ and a Lennard Jones repulsion/dispersion potential, with the same $\sigma$ at state $j$. Since $\Gamma_i$ will not have molecules at a distance less than $\sigma$, but $\Gamma_j$ will, the two phase spaces are not the same and these methods will either break down or return very error-prone results. - The degree to which the phase spaces are shared is called the "phase space overlap". Efficient free energy calculations require significant phase space overlap. There are (a number of strategies)[Intermediate-States] to address the lack of overlap between target spaces, but determining the best way for any given situation is still a research question. -- It should also be noted that "near zero probability" and "always zero probability" are two distinct things when considering phase space. So long as there is a chance for an observation to be made, no matter how small, it is considered part of the phase space (though long simulations might be needed to sample this overlap). - diff --git a/_tabs/about.md b/_tabs/about.md deleted file mode 100644 index ecfecb3..0000000 --- a/_tabs/about.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -# the default layout is 'page' -icon: fas fa-info-circle -order: 4 ---- - -> Add Markdown syntax content to file `_tabs/about.md`{: .filepath } and it will show up on this page. -{: .prompt-tip } - -Edit this file to change the about tabs diff --git a/_tabs/examples.md b/_tabs/examples.md deleted file mode 100644 index d071954..0000000 --- a/_tabs/examples.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -layout: examples -icon: fas fa-puzzle-piece -order: 2 ---- diff --git a/_tabs/lessons.md b/_tabs/lessons.md deleted file mode 100644 index 8bc000f..0000000 --- a/_tabs/lessons.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -layout: lessons -icon: fas fa-school -order: 1 ---- diff --git a/alchemistry/.DS_Store b/alchemistry/.DS_Store new file mode 100644 index 0000000..60f283b Binary files /dev/null and b/alchemistry/.DS_Store differ diff --git a/alchemistry/_config.yml b/alchemistry/_config.yml new file mode 100755 index 0000000..ecb7bb8 --- /dev/null +++ b/alchemistry/_config.yml @@ -0,0 +1,45 @@ +# Book settings +# Learn more at https://jupyterbook.org/customize/config.html + +title: Alchemistry.org +author: Levi Naden, Michael Shirts, and the Alchemistry.or Team +logo: images/staurosporine-hydrated-1.png + +# Force re-execution of notebooks on each build. +# See https://jupyterbook.org/content/execute.html +execute: + execute_notebooks: off + +# Define the name of the latex output file for PDF builds +latex: + latex_documents: + targetname: alchemistry.tex + +# Add a bibtex file so that we can create citations +bibtex_bibfiles: + - references.bib + +# Information about where the book exists on the web +repository: + url: https://github.com/shirtsgroup/alchemistry.org # Online location of your book + path_to_book: alchemistry/ # Optional path to your book, relative to the repository root + branch: main # Which branch of the repository should be used when creating links (optional) + +# Add GitHub buttons to your book +# See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository +html: + use_issues_button: true + use_repository_button: true + #google_analytics_id: G-FHKVGE8HKZ + +sphinx: + # Uses the default Sphinx Book Theme, which in turn borrows from its parent PyData Theme + config: # This block is parsed as YAML -> Python dict as a drop in replacement for the Sphinx conf.py file + html_js_files: + - https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js + html_favicon: images/alchemistry-favicon.ico + # This is the PyData theme control + html_theme_options: + logo: + text: "Alchemistry.org" # Sets the subtext under the logo using PyData Theme settings + html_title: Alchemistry.org # This doesn't seem to actually do anything, despite Book Theme saying it should diff --git a/alchemistry/_static/custom.css b/alchemistry/_static/custom.css new file mode 100644 index 0000000..a1f4905 --- /dev/null +++ b/alchemistry/_static/custom.css @@ -0,0 +1,91 @@ +.overview .overview-title, .question .question-title { + position: relative; + margin: 0 -.6rem!important; + padding: .4rem .6rem .4rem 2rem; + font-weight: 700; + background-color: #ffc10733; +} + +.overview, .question { + margin: 1.5625em auto; + padding: 0 .6rem .8rem!important; + overflow: hidden; + page-break-inside: avoid; + border-left: .2rem solid #ffc107; + border-radius: .1rem; + box-shadow: 0 0.2rem 0.5rem rgb(0 0 0 / 5%), 0 0 0.05rem rgb(0 0 0 / 10%); + transition: color .25s,background-color .25s,border-color .25s; +} + +.overview .overview-title:before, .question .question-title:before { + position: absolute; + left: .6rem; + width: 1rem; + height: 1rem; + color: #ffc107; + font-family: Font Awesome\ 5 Free; + font-weight: 900; + content: "\f128"; +} + +.exercise .exercise-title { + position: relative; + margin: 0 -.6rem!important; + padding: .4rem .6rem .4rem 2rem; + font-weight: 700; + background-color: #fd7e1433; +} + +.exercise { + margin: 1.5625em auto; + padding: 0 .6rem .8rem!important; + overflow: hidden; + page-break-inside: avoid; + border-left: .2rem solid #fd7e14; + border-radius: .1rem; + box-shadow: 0 0.2rem 0.5rem rgb(0 0 0 / 5%), 0 0 0.05rem rgb(0 0 0 / 10%); + transition: color .25s,background-color .25s,border-color .25s; +} + +.exercise .exercise-title:before { + position: absolute; + left: .6rem; + width: 1rem; + height: 1rem; + color: #fd7e14; + font-family: Font Awesome\ 5 Free; + font-weight: 900; + content: "\f303"; +} + +.admonition { + margin: 1.5625em auto; + padding: 0 .6rem .8rem!important; + overflow: hidden; + page-break-inside: avoid; + border-left: .2rem solid #fd7e14; + border-radius: .1rem; + box-shadow: 0 0.2rem 0.5rem rgb(0 0 0 / 5%), 0 0 0.05rem rgb(0 0 0 / 10%); + transition: color .25s,background-color .25s,border-color .25s; +} + +.admonition .admonition-title { + position: relative; + margin: 0 -.6rem!important; + padding: .4rem .6rem .4rem 2rem; + font-weight: 700; + background-color: #fd7e1433; +} + +.admonition .admonition-title:before { + position: absolute; + left: .6rem; + width: 1rem; + height: 1rem; + color: #fd7e14; + font-family: Font Awesome\ 5 Free; + font-weight: 900; + content: "\f06e"; +} + + diff --git a/alchemistry/_toc.yml b/alchemistry/_toc.yml new file mode 100644 index 0000000..9998331 --- /dev/null +++ b/alchemistry/_toc.yml @@ -0,0 +1,36 @@ +format: jb-book +root: welcome +parts: + + - caption: Fundamentals of Free Energy + chapters: # Uses the header of first file for label, otherwise can use "caption" as main directive + - file: fundamentals/FreeEnergyFundamentals + sections: # Creates subsections under the above file in the sidebar TOC + - file: fundamentals/ExponentialAveraging + - file: fundamentals/ThermodynamicIntegration + - file: fundamentals/BennetAcceptanceRatio + - file: fundamentals/WHAM + - file: fundamentals/MBAR + - file: fundamentals/ThermodynamicCycle + sections: + - file: fundamentals/IntermediateStates + - file: fundamentals/RunningSimulations + - file: fundamentals/ExtractingSimData + - file: fundamentals/Analysis + - file: fundamentals/ImprovingSampling + + - caption: Tutorials and External Resources + chapters: + - file: examples/ExamplesIntro + - file: external/ExternalTutorials + + - caption: Events + chapters: + - file: events/UpcomingEvents + sections: + - file: events/2024WorkFEDD + - file: events/PastEvents + + - caption: About + chapters: + - file: about \ No newline at end of file diff --git a/alchemistry/about.ipynb b/alchemistry/about.ipynb new file mode 100644 index 0000000..de54de2 --- /dev/null +++ b/alchemistry/about.ipynb @@ -0,0 +1,46 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9c189433-2fdd-4afa-901f-3dff54b40519", + "metadata": {}, + "source": [ + "(about)=\n", + "# About the Alchemistry.org Editors" + ] + }, + { + "cell_type": "markdown", + "id": "53cb0349-2f69-4944-a68b-138a527c786c", + "metadata": {}, + "source": [ + "Alchemistry's Editors:\n", + "* Michael R Shirts - an associate professor at [The University of Colorado Boulder](https://www.colorado.edu/) in the department of chemical engineering, formally of [The University of Virginia](http://www.virginia.edu). He has done a number of studies on free energy methods and calculation: see [Michael Shirts' group page.](https://www.colorado.edu/chbe/michael-r-shirts)\n", + "* Levi Naden - a Software Scientist of the [The Molecular Sciences Software Institute (MolSSI)](https://molssi.org/), formerly Postdoctoral Fellow [in the Chodera Lab](http://www.choderalab.org/) at Memorial Sloan Kettering Cancer Center, and formally a graduate student [in the Shirts Group](http://faculty.virginia.edu/shirtsgroup/people.php#grads). Background in computational free energy software development and currently CMS software development and training.\n", + "* [David L Mobley](http://www.mobleylab.org) - an assistant professor at [University of California-Irvine](http://www.uci.edu) in the Department of Pharmaceutical Sciences and Chemistry, who works mostly on free energy calculations for ligand binding and small molecule solvation. See [David Mobley's group page.](http://www.mobleylab.org)\n", + "* John D Chodera - an Assistant Member at the [Memorial Sloan-Kettering Cancer Center](http://www.mskcc.org) in the Computational Biology Program He has diverse interests including free energy calculations and experimental measurements of binding affinities.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/events/2024WorkFEDD.ipynb b/alchemistry/events/2024WorkFEDD.ipynb new file mode 100644 index 0000000..9e4edc8 --- /dev/null +++ b/alchemistry/events/2024WorkFEDD.ipynb @@ -0,0 +1,215 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3fb82f10-7d82-474f-9b08-d7b2d72aae21", + "metadata": {}, + "source": [ + "(2024WorkFEDD)=\n", + "# 2024 Workshop on Free Energy Methods in Drug Design" + ] + }, + { + "cell_type": "markdown", + "id": "51ff7f91-236b-4b06-b639-a04ab9f09836", + "metadata": {}, + "source": [ + "Our goal in this workshop is to bring together experts from pharma and supporting industries, as well as academia, in an intense and focused workshop to identify challenges and help chart the path forward. We are particularly interested in hearing about use cases, pitfalls and their solutions. We also firmly believe we can learn a great deal from failure, so we hope participants will go beyond just highlighting success stories to provide more detailed insight into successes and failures.\n", + "\n", + "Previous iterations of this workshop: https://alchemistry.org/wiki/Events\n", + "\n", + "## Dates and Location\n", + "The conference is scheduled for 13-15 May 2024, at the [Scheltema in Leiden, The Netherlands - Marktsteeg 1, 2312 CS](https://www.scheltemaleiden.nl/en/)\n", + "\n", + "Directly following Alchemistry 2024, the [Open Molecular Software Foundation](https://www.linkedin.com/company/omsf) will host it's annual gathering - OMSF Symposium 2024. This event will bring together OMSF's community of software developers, academics, and industry partners, focusing on its [open source molecular software projects](https://omsf.io/projects/project-list/) and their impact. For more information and registration please see [this page.](https://omsf.notion.site/OMSF-Symposium-2024-077c73e9d80a4a0ba01fcbafd20885fa)\n", + "\n", + "```{image} ../images/events/2024WorkFEDD/Scheltema.jpeg\n", + ":width: 500px\n", + ":alt: https://www.scheltemaleiden.nl/en/\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "53f3b06f-bd4c-4561-b1b6-650da9f7a84a", + "metadata": {}, + "source": [ + "## Registration \n", + "\n", + "Registration for this event has opened. This is managed by our partner company Congress by Design and can be completed by following [this link.](https://cbd.eventsair.com/alchemical-free-energy-workshop-2024/registration/Site/Register)\n", + "\n", + "For this event no further abstracts will be accepted until further notice. " + ] + }, + { + "cell_type": "markdown", + "id": "a9ca1037-9bf7-4722-8588-09f017e86e61", + "metadata": {}, + "source": [ + "## Confirmed speakers & workshop themes \n", + "\n", + "We are honored to host Prof. Max Welling for a keynote on May 14th. Prof. Welling is a renowned and highly influential figure in machine-learning and will give his insights on its role in science.\n", + "\n", + "```{image} ../images/events/2024WorkFEDD/Day1.png\n", + ":width: 400px\n", + ":alt: FEworkshop_program_1\n", + "```\n", + "```{image} ../images/events/2024WorkFEDD/Day2.png\n", + ":width: 400px\n", + ":alt: FEworkshop_program_2\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "8a1db422-109a-41d6-8e8e-76170f59b94e", + "metadata": {}, + "source": [ + "## Accommodation\n", + "\n", + "You will need to book your own stay in Leiden; there are many hotels in Leiden that will be able to house you. There are many small hotels close to the event venue - here we are listing several of the major hotels that have confirmed vacancies during the week of the workshop and are 10 minutes away on foot:\n", + "\n", + "- [Golden Tulip](https://goldengreenhotels.nl/en/)\n", + "\n", + "- [Ibis](https://all.accor.com/hotel/8087/index.en.shtml?utm_campaign=seo+maps&utm_medium=seo+maps&utm_source=google+Maps)\n", + "\n", + "- [VIC](https://www.hotelvic.nl/en)" + ] + }, + { + "cell_type": "markdown", + "id": "b6d5cb2d-0a80-418a-b5b0-529fbf7e97d9", + "metadata": {}, + "source": [ + "## Travel & public transport\n", + "\n", + "Public transport is very well connected in The Netherlands. Although rechargeable public transport cards can be purchased, it is recommended to use your contactless bank card for checking in and out of trains, buses, trams and subways. In principle all Mastercard and Visa cards will work, for more information see [the national railway website FAQ](https://www.ns.nl/en/customer-service/payment/ovpay.html)\n", + "\n", + "To reach Leiden from Schiphol airport, you can take a train (every 10-15 minutes from Schiphol), or a taxi (~25 min drive, recommended to pre-book to save costs)." + ] + }, + { + "cell_type": "markdown", + "id": "404051a0-5a99-4416-9f38-5da93407bed4", + "metadata": {}, + "source": [ + "## Social Media \n", + "\n", + "* X hashtags: [#alchemy2024](https://twitter.com/hashtag/alchemy2024)\n", + "* Slack: [https://alchemistry.slack.com https://alchemistry.slack.com]\n", + "\n", + "\n", + "Join the slack workspace using [this link](https://join.slack.com/t/alchemistry/shared_invite/zt-1ujdwluzr-BQDUasqHrpnoL_A5gUcG1w)" + ] + }, + { + "cell_type": "markdown", + "id": "eb029bd2-e405-4aa2-b9ba-80738ad9fe97", + "metadata": {}, + "source": [ + "## Sponsors \n", + "\n", + "We are currently looking for sponsorship for the event. If your organisation would be interested, please contact Vytautas Gapsys." + ] + }, + { + "cell_type": "markdown", + "id": "bc2da4f7-f5cc-4a5d-b196-78120d22a07e", + "metadata": {}, + "source": [ + "## Posters \n", + "\n", + "During this workshop we will be able to host 30-40 poster presentation slots. There will be two poster prizes hosted by **Acellera** and **MODSIM Pharma**, respectively. If you wish to print your poster in Leiden, we recommend using a local copying store called [CopyCopy (an 8-min walk from the venue)](https://www.copycopyleiden.nl/service/posters-2/): you can email them a PDF of your poster on Friday 11th May at the latest. They are not opened on Sundays, but are available for pickup on Monday 13th May between 1PM and 6PM.\n", + "\n", + "```{image} ../images/brands/Acellera_TX_logo_300.png\n", + ":width: 400px\n", + ":alt: Sponsor Acellera\n", + "```\n", + "```{image} ../images/brands/Helix-blue.jpeg\n", + ":width: 400px\n", + ":alt: Sponsor Modsim\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "0dd9fd22-a6c2-4831-ab5c-dda8daedce19", + "metadata": {}, + "source": [ + "## Organizing Committee \n", + "\n", + "* Irfan Alibay - irfan.alibay@omsf.io\n", + "* James Eastwood\n", + "* Vytautas Gapsys - vytautas.gapsys@mpinat.mpg.de\n", + "* Bert de Groot\n", + "* Hugo Gutierrez De Teran\n", + "* David Hahn\n", + "* Willem Jespers\n", + "* Dmitry Lupyan - dmitry.lupyan@schrodinger.com\n", + "* Jenke Scheen - jenke.scheen@choderalab.org\n", + "* Barbara Zarzycka" + ] + }, + { + "cell_type": "markdown", + "id": "0c5e91ee-66f4-4e28-907a-8d1268a8bb1e", + "metadata": {}, + "source": [ + "## Conference Code of Conduct \n", + "\n", + "All attendees, speakers, sponsors and volunteers at our conference are required to agree with the following code of conduct. Organisers will enforce this code throughout the event. We expect cooperation from all participants to help ensure a safe environment for everybody.\n", + "\n", + "Need Help? Please contact the organising committee either in person or via email. Members of the organising committee are listed above.\n", + "\n", + "### The Quick Version \n", + "Our conference is dedicated to providing a harassment-free conference experience for everyone, regardless of gender, gender identity and expression, age, sexual orientation, disability, physical appearance, body size, race, ethnicity, religion (or lack thereof), or technology choices. We do not tolerate harassment of conference participants in any form. Sexual language and imagery is not appropriate for any conference venue, including talks, workshops, parties, Twitter and other online media. Conference participants violating these rules may be sanctioned or expelled from the conference without a refund at the discretion of the conference organisers.\n", + "\n", + "### The Less Quick Version \n", + "Harassment includes offensive verbal comments related to gender, gender identity and expression, age, sexual orientation, disability, physical appearance, body size, race, ethnicity, religion, technology choices, sexual images in public spaces, deliberate intimidation, stalking, following, harassing photography or recording, sustained disruption of talks or other events, inappropriate physical contact, and unwelcome sexual attention.\n", + "\n", + "Participants asked to stop any harassing behavior are expected to comply immediately.\n", + "\n", + "Sponsors are also subject to the anti-harassment policy. In particular, sponsors should not use sexualised images, activities, or other material. Booth staff (including volunteers) should not use sexualised clothing/uniforms/costumes, or otherwise create a sexualised environment.\n", + "\n", + "If a participant engages in harassing behavior, the conference organisers may take any action they deem appropriate, including warning the offender or expulsion from the conference with no refund.\n", + "\n", + "If you are being harassed, notice that someone else is being harassed, or have any other concerns, please contact a member of the organising committee immediately. \n", + "\n", + "A member of the organising committee will be happy to help participants contact local law enforcement, provide escorts, or otherwise assist those experiencing harassment to feel safe for the duration of the conference. We value your attendance.\n", + "\n", + "We expect participants to follow these rules at conference and workshop venues and conference-related social events.\n", + "\n", + "The code of conduct text was taken from https://github.com/confcodeofconduct/confcodeofconduct.com in a slightly adapted form." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b26e232a-9736-4e9f-8de5-85c49fb7a97b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/events/PastEvents.ipynb b/alchemistry/events/PastEvents.ipynb new file mode 100644 index 0000000..fc772c9 --- /dev/null +++ b/alchemistry/events/PastEvents.ipynb @@ -0,0 +1,51 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "94021f0d-9183-45d1-b54a-c20b9ee86846", + "metadata": {}, + "source": [ + "(Past)=\n", + "# Past Events" + ] + }, + { + "cell_type": "markdown", + "id": "46382f60-5eea-4f04-812e-d462f7810bde", + "metadata": {}, + "source": [ + "* **[[2023 Workshop on Free Energy Methods in Drug Design]]**\n", + "* **[[2022 Virtual Workshop on Free Energy Methods in Drug Design]]**\n", + "* **[[2020 Workshop on Free Energy Methods in Drug Design]]**\n", + "* **[http://pmx.mpibpc.mpg.de/workshop_alchemistry2019/index.html 2019 Alchemical Free Energy Workshop]**\n", + "* **[[2018 Workshop on Free Energy Methods, Kinetics and Markov State Models in Drug Design]]**\n", + "* **[[2016 Workshop on Free Energy Methods in Drug Design: Targeting Cancer]]**\n", + "* **[[2016 Workshop on Kinetics and Markov State Models in Drug Design]]**\n", + "* **[[2014 Workshop on Free Energy Methods in Drug Design]]**\n", + "* **[[2012 Workshop on Free Energy Methods in Drug Design]]**\n", + "* **[[2010 Workshop on Free Energy Methods in Drug Design]]**\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/events/UpcomingEvents.ipynb b/alchemistry/events/UpcomingEvents.ipynb new file mode 100644 index 0000000..0ff92d6 --- /dev/null +++ b/alchemistry/events/UpcomingEvents.ipynb @@ -0,0 +1,42 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6bfedfab-256b-4ff1-b0e7-5c6e2690c9fa", + "metadata": {}, + "source": [ + "(Upcoming)=\n", + "# Upcoming Events" + ] + }, + { + "cell_type": "markdown", + "id": "bad855e7-d09f-43fa-a5ed-86c4ddde1db0", + "metadata": {}, + "source": [ + "* [](2024WorkFEDD)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/examples/ExamplesIntro.ipynb b/alchemistry/examples/ExamplesIntro.ipynb new file mode 100644 index 0000000..43f640a --- /dev/null +++ b/alchemistry/examples/ExamplesIntro.ipynb @@ -0,0 +1,50 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "03064882-ea08-4df1-9b90-4e8d3049a9b0", + "metadata": {}, + "source": [ + "(Examples)=\n", + "# Examples" + ] + }, + { + "cell_type": "markdown", + "id": "400379e7-b36f-4330-b209-7a55898c8edc", + "metadata": {}, + "source": [ + "Placeholder" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b0fb00f-4f5f-4870-a21c-0a9aa18a67a9", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/external/ExternalTutorials.ipynb b/alchemistry/external/ExternalTutorials.ipynb new file mode 100644 index 0000000..aa4465b --- /dev/null +++ b/alchemistry/external/ExternalTutorials.ipynb @@ -0,0 +1,42 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "97b809f5-4599-4963-a12a-ec7989bf8c0e", + "metadata": {}, + "source": [ + "(Ext_Tut)=\n", + "# External Tutorials" + ] + }, + { + "cell_type": "markdown", + "id": "5ca8cb2d-4422-4973-83ff-16befeb490b7", + "metadata": {}, + "source": [ + "Placeholder" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/fundamentals/Analysis.ipynb b/alchemistry/fundamentals/Analysis.ipynb new file mode 100644 index 0000000..9e64b75 --- /dev/null +++ b/alchemistry/fundamentals/Analysis.ipynb @@ -0,0 +1,117 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e88706f1-7aa3-4e9f-8449-c79d43190930", + "metadata": {}, + "source": [ + "(Analysis)=\n", + "# Analyzing Simulation Results" + ] + }, + { + "cell_type": "markdown", + "id": "97901f2c-4ec3-47fe-bdbb-e9e9db338b4b", + "metadata": {}, + "source": [ + "Once you have [[Thermodynamic_Cycle| setup your transformation pathway]], defined necessary [[Intermediate_States| intermediate states]], [[Simulating States of Interest|run your equilibrium simulation]], and acquired the [[Simulation Information Gathering|correct data from the runs]], you may now analyze the data to get free energies. Each of the free energy techniques discussed in the theory section of these fundamentals talks about how you get the free energies, but will also be recapped here. This page also talks about how you can get uncertainty estimates with the [[#Bootstrap Sampling| bootstrap method]]." + ] + }, + { + "cell_type": "markdown", + "id": "3d66e22d-e8ba-46aa-b9d1-5f0efee80bfe", + "metadata": {}, + "source": [ + "## Calculating Free Energies\n", + "Shown below is a brief summary of the information needed and the method to calculating free energy with the various techniques. If you have read the pages for the free energy techniques, this will be a recap.\n", + "\n", + "* [[Exponential Averaging|EXP]] is a very straightforward calculation. You will need the $\\Delta U$ from a given pair of states. From there, you can calculate the free energy from the effective sample size found by [[Simulation_Information_Gathering#Correlation|correcting for correlation/subsampling]]. Variances will be additive.\n", + "* [[Thermodynamic Integration| TI]] needs $\\frac{dU}{d\\lambda}$ and the average at each of $K$ states needs calculated. Since its more common to have discrete $\\lambda$ states, you will need to choose an appropriate [[Thermodynamic_Integration#Estimating_Free_Energies_with_TI|weighting method]] to correctly calculate the free energy from the [[Simulation_Information_Gathering#Correlation|uncorrelated/subsampled]] data. By-state Variance will not be additive, but this is [[Thermodynamic_Integration#Variance_of_TI|simple to account for]].\n", + "* [[Weighted Histogram Analysis Method|WHAM]] requires binning all your results then calculating $\\Delta U$ from all states. It is highly recommended that you take advantage of the tools already out there for calculating WHAM instead of writing your own. There is no direct way to calculate variance, and so methods such as [[#Bootstrap Sampling|bootstrap sampling]] are needed.\n", + "* [[Bennett Acceptance Ratio|BAR]] requires the $\\Delta U$ from two states. An iterative, often numeric, solution is needed to find the free energy, but this is relatively straightforward and can be done for each pair of states. The variance does not have as simple a relationship as with TI, and methods such as [[#Bootstrap Sampling|bootstrap sampling]] are recommended. There is a Python implementation of BAR available with [https://simtk.org/home/pymbar PyMBAR].\n", + "* [[Multistate Bennett Acceptance Ratio|MBAR]] also requires $\\Delta U$, but it needs it for all combination of states. Again, an iterative set of solutions is needed and it is not recommended that users attempt to code MBAR themselves. Instead, please take advantage of a Python implementation available called [https://simtk.org/home/pymbar PyMBAR]. Error estimates are directly available with MBAR, however, it can still benefit from methods such as [[#Bootstrap Sampling|bootstrap sampling]]." + ] + }, + { + "cell_type": "markdown", + "id": "5cf70428-60d5-46a5-be4d-e4ba4d19bbaf", + "metadata": {}, + "source": [ + "## Bootstrap Sampling\n", + "Bootstrap sampling is a statistical tool in when we can get decent estimates for the uncertainty with very limited data;{{cite|Efron1993}} this method works with all techniques presented in the [[:Category:Free Energy Fundamentals|fundamentals section]]. We carry out bootstrap sampling as the time taken to do bootstrap is often substantially less than the time it would take to generate new samples.\n", + "\n", + "If we assume we have some function, $F(x)$ that is computed from $N$ data samples, $x_1,x_2\\ldots,x_N$, and we have that $\\lim_{N\\to\\infty} F(x) = \\overline{F}$ where $\\overline{F}$ is a constant. As an example, say that $F(x)$ is the simple average of all $x$ \n", + "\n", + "$\\displaystyle F(x)=\\frac{1}{N}\\sum\\limits_{i=1}^N x_i$,\n", + "\n", + "or the average of some function $ X(x)$ such that\n", + "\n", + "$\\displaystyle F(x)=\\frac{1}{N}\\sum\\limits_{i=1}^N X(x_i)$;\n", + "\n", + "we are not limited to such simple choices as the functions could be as complicated the MBAR or WHAM free energy estimators. To calculate the bootstrap variance, carry out the following procedure.\n", + "\n", + "1. **Pick with replacement** $n$ samples from the complete list of samples $\\vec{x}\\{x_1,\\ldots,x_N\\}$ to create a new set of samples $\\vec{x}_i$. Since you are picking randomly with replacement, there are bound to be repeated samples. This method is called sampling from the *empirical distribution,* that is, sampling from the distribution we measured, rather than true distribution. For example, if $\\vec{x} = \\{1,2,6,4,3\\}$, a possible set of $\\vec{x}_i$ might be $\\vec{x}_i = \\{6,1,1,4,4\\}$; note that $\\vec{x}_i$ is *the same size* as the original $\\vec{x}$ set.\n", + "1. **Compute $\\hat{F}_i = F(x_i)$.** That is, find the estimate for your function based on the bootstrap sample taken from the empirical distribution. If we were finding simple averages, we would calculate the average of the bootstrap sample. If we were calculating free energies with MBAR, we would generate a bootstrap sample at *each* $K$ state and estimate the full free energy at each state based on the bootstrap samples.\n", + "1. **Repeat steps 1 and 2** for $M$ number of times. You will need at least 50-200 times to obtain accurate variances {{cite|Efron1993}}. If the calculation in step 2 is cheap, then $M$ can be very large to get even better estimates of the variance; relative uncertainty scales as $M^{-1/2}$ and can take more than 1000 steps to get variance within 1% convergence.\n", + "1. **Compute the variance** $\\mathrm{var}\\left(\\hat{F}\\right)$, which is the variance over the $M$ bootstrap values. The standard deviation is then just the square root of the bootstrap variance.\n", + "\n", + "The values of $\\hat{F_i}$ are the bootstrap distribution of the function $F(x)$. In the large sample limit, the bootstrap distribution will converge to the true distribution of $\\overline{F}$ under most conditions, but will still be a good estimate for moderate values of $N$. If N is too low though, the bootstrap distribution will not have good agreement with the true distribution.\n", + "\n", + "Bootstrap sampling can be used with *any* statistical estimator, regardless of how complicated. There is the extra overhead of calculating $F(x)$ $M$ times, but this is often negligible compared to how long it would take to collect $M$ times the number of data. For MBAR, the time required for bootstrap is only 5-10min, where as TI is only seconds. There is also a variant of bootstrap called \"moving block bootstrapping\" which accounts for [[Simulation Information Gathering#Correlation time correlations]] *without* subsampling by taking random block of length $\\tau$.{{cite|Efron1993}}" + ] + }, + { + "cell_type": "markdown", + "id": "646d0ba4-90b5-449f-ab18-8d13ec60713f", + "metadata": {}, + "source": [ + "### Example of Bootstrap Method\n", + "```{figure} ../images/Bootstrap_Comparison.png\n", + "---\n", + "width: 400px\n", + "name: bootstrapExample\n", + "---\n", + "A comparison of the estimated distribution of exponential average (EXP) results using bootstrap sampling to the true distribution obtained using multiple draws. Each draw consists of N samples from a normal distribution with mean 0 and variance 1. For N = 10 samples, bootstrap sampling does not give the true distribution; for N = 1000 samples, bootstrap sampling yields a very close match to the distribution, with standard error 0:0437 vs 0:0414. Either 100,000 bootstrap samples or 100,000 independent draws were performed in each case.\n", + "```\n", + "\n", + "Let's take a simple example where we will sample values of $x$ from a 1D Gaussian with mean 0 and variance 1 ($x$~$N(0,1)$). We will now compute [[Exponential Averaging|EXP]] with $k_BT=1$ for this sampling by\n", + "\n", + "$F(x_i) = -\\ln \\left[N^{-1}\\sum_{i=1}^N \\exp(-x_i)\\right]$.\n", + "\n", + "We then compute the distribution of free energies obtained by $N$ points for 100,000 bootstrap samples, and 100,000 independent samples (see figure on right). For small $N$, the distributions are not close, but nearly converge for larger $N$. Even at N=200, the variances are near identical. To emphasize the point, the \"Repeated Samples\" distribution uses data from $1000\\times 100,000$ points collected, where the \"Bootstrap Samples\" needs only *one* set of 1000 samples run through the bootstrap process 100,000 times. The same amount of analysis is carried out in both cases, but the bootstrap samples only require 0.001% of the data collection. One catch to the bootstrap method that should be noted is that rare events requiring more than 1000 samples would not have been observed, so it still takes careful analysis to properly use bootstrap sampling." + ] + }, + { + "cell_type": "markdown", + "id": "51795686-499f-40a7-b08b-15cf18fa577d", + "metadata": {}, + "source": [ + "## References\n", + "\n", + "{{cite|Efron1993|Efron, B., and Tibshirani, R. J. *An Introduction to the Bootstrap*; Chapman and Hall/CRC:Boca Raton, FL, 1993. | http://www.citeulike.org/group/14929/article/9052177}}\n", + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/fundamentals/BennetAcceptanceRatio.ipynb b/alchemistry/fundamentals/BennetAcceptanceRatio.ipynb new file mode 100644 index 0000000..6216fa8 --- /dev/null +++ b/alchemistry/fundamentals/BennetAcceptanceRatio.ipynb @@ -0,0 +1,124 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2a888987-7528-4c31-97fe-a42106e2d34e", + "metadata": {}, + "source": [ + "(BAR)=\n", + "# Bennett Acceptance Ratio" + ] + }, + { + "cell_type": "markdown", + "id": "168017bc-7f94-4e48-8e7d-2414d129976f", + "metadata": {}, + "source": [ + "The Bennett Acceptance Ratio (BAR) is one of the earliest free energy methods which draws on data from multiple states to estimate the free energy difference; This method has significantly improved results over [[Exponential Averaging|EXP]]. Both EXP and [[Thermodynamic Integration|TI]] require the ensemble average from a *single* state to estimate free energies. Although TI needs the derivatives at state $k$, it does not require the configurations from any neighboring state; BAR however, *requires configuration information from two states* to estimate free energy differences.\n", + "\n", + "BAR works under the principal that at the same configuration, $\\vec{q}$, at two separate states, $i$ and $j$, there is a pathway connecting the two potentials and a difference of $\\Delta U_{ij}(\\vec{q})$. Because the states are in the same configuration, there is a exact relationship between the distributions of potential energy differences $\\Delta U_{ij}(\\vec{q})$ of states sampled from $i$ and $\\Delta U_{ji}(\\vec{q})$ the distribution of potential energy differences sampled from the state $j$.{{Cite| Crooks2000}}. Since it's an exact function of distributions, statistics can be applied to find the optimal way to use the information between two states, improving the free energy estimate. This is where Bennett started his derivation and, since he was the first to derive this, the method was named after him.{{Cite | Bennett1976}}" + ] + }, + { + "cell_type": "markdown", + "id": "7d23a23a-25ac-4bcd-ba0a-b8fc3f8d8256", + "metadata": {}, + "source": [ + "## Derivation\n", + "This derivation starts from a slightly modified version of the [[Theoretic Principals#Core Free Energy Equation | core free energy equation]]. Taking the properties of expectation values, we can write the free energy difference as\n", + "\n", + "$\\Delta A_{ij} = -k_{B} T \\ln \\frac{Q_j}{Q_i} = k_{B}T \\ln \\frac{\\left\\langle\\alpha(\\vec{q}) \\exp[-\\beta U_{i}(\\vec{q})]\\right\\rangle_j}{\\left\\langle\\alpha(\\vec{q}) \\exp[-\\beta U_{j}(\\vec{q})]\\right\\rangle_i}$\n", + "\n", + "which is true for any $\\alpha(\\vec{q})>0$ for all $\\vec{q}$. This was Bennett's start point and he then used variational calculus to select the value pf $\\alpha(\\vec{q})$ minimizing the variance of the free energy. The end result is an implicit function of the free energy, and the number of samples at each state, $n_i$ and $n_j$, and is\n", + "\n", + "$\\sum_{i=1}^{n_i} \\frac{1}{1 + \\exp(\\ln(n_i/n_j) + \\beta \\Delta U_{ij} - \\beta \\Delta A))} - \\sum_{j=1}^{n_j} \\frac{1}{1 + \\exp(\\ln(n_j/n_i) - \\beta \\Delta U_{ji} + \\beta \\Delta A))} = 0$\n", + "\n", + "which must be solved numerically. This is the full BAR equation and its full derivation can be found in Bennett's paper.{{cite|Bennett1976}} To get the equation above from Bennett's paper, you will have to take the exponential of both sides of his self-consistent Equation 12, then a bit of algebra to get the end result. This equation can also be derived from a maximum likelihood approach.{{Cite| Shirts2003}} There are a number of other equivalent ways to express this implicit equation." + ] + }, + { + "cell_type": "markdown", + "id": "ed908939-535f-492e-aa8d-72d7d0fa9832", + "metadata": {}, + "source": [ + "## Comparison with Other Methods\n", + "BAR has been shown to be superior to [[Exponential Averaging|EXP]] in every way (save maybe simplicity). BAR is not only better from a practical and theoretical aspect,{{cite| Shirts2005}}{{cite| Lu2003}} and it is shown to converge to EXP in assuming all samples come from one state.{{cite|Bennett1976}}{{Cite| Shirts2003}} Even so, less phase overlap is needed to run BAR effectively.\n", + "\n", + "Comparing [[Thermodynamic Integration|TI]] and BAR is not such a simple thing since each approach requires very different information. Based on experience, BAR will do better than TI on average, although more details are needed to make a better comparison. One such detail is that BAR can give in fewer intermediate states the same statistical precision as TI. However, if the integrand is very smooth for TI, it will perform at the same level as BAR (examples of this are changes, not [[Intermediate States | removing]], non- and bonded parameters).{{cite|Shirts2003}}{{cite|Ytreberg2006}} One advantage that could be attributed to BAR over TI is that you do not need to calculate {{#tag:math|{{dudl}}}} and so you do not have to modify your code to do so.\n", + "\n", + "A variant of BAR which takes in data from more than two states has been developed, it is coincidentally called the [[Multistate Bennett Acceptance Ratio]] or MBAR for short." + ] + }, + { + "cell_type": "markdown", + "id": "ff1f1bef-6546-4231-9f20-a2517ebc4bd2", + "metadata": {}, + "source": [ + "## Estimating Accurate Free Energies with BAR\n", + "BAR has a very straightforward method to calculate the free energy difference between two states. It still suffers from the same [[Intermediate States#Rules of Thumb for Intermediate States|general problems]] as the other methods do with regards to variance along the pathway, what parameters are changing, sequence of change etc. and must still be treated with care. BAR can estimate free energies between many states, however, it must do this a pair at a time. As such, BAR requires information collected from $k-1$, $k$, and $k+1$ state for each configuration stored.\n", + "\n", + "BAR require an iterative solution but all the samples will be correlated since results are taken from adjacent states. Unlike in [[Thermodynamic Integration|TI]], however, the results are not so clean to get an error estimate. More indirect methods, such as [[Analyzing Simulation Results#Bootstrap Method | Bootstrap Sampling]] are required to get variances. Furthermore, subsampling is needed since each state can have different correlation times." + ] + }, + { + "cell_type": "markdown", + "id": "db35fa11-d6a9-4dc5-b4b6-3bd661e724dc", + "metadata": {}, + "source": [ + "## Download BAR\n", + "Although you could write your own BAR code for analysis, there is a free, python-based code available for download at [https://simtk.org/home/pymbar https://simtk.org/home/pymbar]. Although this site is mostly for MBAR, there is a BAR implementation bundled with it along with examples." + ] + }, + { + "cell_type": "markdown", + "id": "3867464f-f829-43b7-b52e-ea6c79039a98", + "metadata": {}, + "source": [ + "## References\n", + "\n", + "{{Cite| Crooks2000 | Crooks, G. E. (2000) Path-ensemble averages in systems driven far from equilibrium. *Phys. Rev. E* 61, 2361–2366. | http://www.citeulike.org/group/14929/article/9052148}}\n", + "\n", + "{{Cite | Bennett1976 | Bennett, C. H. (1976) Efficient Estimation of Free Energy differences from Monte Carlo Data. *J. Comput. Phys.* 22, 245–268. | http://www.citeulike.org/group/14929/article/9052076}}\n", + "\n", + "{{cite| Shirts2005 |Shirts, M. R., and Pande, V. S. (2005) Comparison of efficiency and bias of free energies computed by exponential averaging, the Bennett acceptance ratio, and thermodynamic integration. *J. Chem. Phys.* 122, 144107.|http://www.citeulike.org/user/ashaytan/article/2284440}}\n", + "\n", + "{{cite| Lu2003 |Lu, N. D., Singh, J. K., and Kofke, D. A. (2003) Appropriate methods to combine forward and reverse free-energy perturbation averages. *J. Chem. Phys.* 118, 2977–2984.|http://www.citeulike.org/group/14929/article/9052389}}\n", + "\n", + "{{Cite| Shirts2003 |Shirts, M. R., Bair, E., Hooker, G., and Pande, V. S. (2003) Equilibrium free energies from nonequilibrium measurements using maximum-likelihood methods. *Phys. Rev. Let*t 91, 140601.|http://www.citeulike.org/group/14929/article/9052565}}\n", + "\n", + "{{cite|Ytreberg2006|Ytreberg, F. M., Swendsen, R. H., and Zuckerman, D. M. (2006) Comparison of free energy methods for molecular systems. *J. Chem. Phys.* 125, 184114.|http://www.citeulike.org/user/ashaytan/article/1699471}}\n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28ee3fcf-5ff9-48f2-b61c-c6ebd384e2b5", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/fundamentals/ExponentialAveraging.ipynb b/alchemistry/fundamentals/ExponentialAveraging.ipynb new file mode 100644 index 0000000..cfb9830 --- /dev/null +++ b/alchemistry/fundamentals/ExponentialAveraging.ipynb @@ -0,0 +1,95 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a119f00b-d8c3-48f8-a92c-73287764e3ea", + "metadata": {}, + "source": [ + "(exp_avg)=\n", + "# Exponential Averaging" + ] + }, + { + "cell_type": "markdown", + "id": "303403d9-8584-4d41-bfe7-4d8a4daf4522", + "metadata": {}, + "source": [ + "Exponential Averaging (EXP) is one of the earliest free energy methods available to researchers. Mostly used to evaluate between only two states of interest, it has an exact solution where as many other methods require approximations. Other names for this technique are the \"Zwanzig Relationship\" (named after the person who first derived it){{Cite|Zwanzig1954}} and \"free energy perturbation (FEP).\" To avoid confusion, the latter term should be avoided as it can easily be confused with other definitions in the scientific field.\n", + "\n", + "## Derivation\n", + "Starting from the [[Theoretic Principals#Core Free Energy Equation | statistical relation for free energy]] of\n", + "\n", + "$ \\displaystyle A = -\\beta^{-1} \\mathrm{ln}\\, Q$\n", + "\n", + "the free energy difference between two states with potentials $U_0(\\vec{q})$ and $U_1(\\vec{q})$ is simply\n", + "\n", + "$ \\displaystyle A_1 - A_0 = -\\beta^{-1} \\left[\\mathrm{ln}(Q_1) - \\mathrm{ln}(Q_0)\\right] = -\\beta^{-1} \\mathrm{ln}\\left[\\frac{Q_1}{Q_0}\\right] $.\n", + "\n", + "By then adding and subtracting $e^{-\\beta U_0(\\vec{q})}$ from the integral in the partition function in the numerator, we get\n", + "\n", + ":$ \\Delta A_{10} = -\\beta^{-1} \\mathrm{ln} \\left[\\frac{\\int e^{-\\beta \\left(U_1(\\vec{q})-U_0(\\vec{q})+U_0(\\vec{q})\\right)} d\\vec{q}}{Q_0}\\right] = -\\beta^{-1} \\mathrm{ln} \\left[\\frac{\\int e^{-\\beta \\left(U_1(\\vec{q})-U_0(\\vec{q})\\right)} e^{-\\beta U_0(\\vec{q})} d\\vec{q}}{Q_0}\\right]$\n", + "\n", + "which gives the final relationship of:\n", + "\n", + "$ \\displaystyle \\Delta A_{10} = -\\beta^{-1} \\mathrm{ln} \\left\\langle e^{-\\beta \\left(U_1(\\vec{q})-U_0(\\vec{q})\\right)} \\right\\rangle_0 = -\\beta^{-1} \\mathrm{ln} \\left\\langle e^{-\\beta \\Delta U(\\vec{q})} \\right\\rangle_0 $" + ] + }, + { + "cell_type": "markdown", + "id": "d0a1d895-a10e-44a4-8aef-fda2a3377157", + "metadata": {}, + "source": [ + "## Estimating Free Energies and variance of EXP\n", + "Because EXP is a two state method, estimating free energies is straightforward and each state's individual variance is additive because there are only two.\n", + "\n", + "For any given EXP calculation, you only need the $ k $ and one of the $ k \\pm 1 $ states." + ] + }, + { + "cell_type": "markdown", + "id": "41386621-8255-4d22-b1b5-9ac3cb27fd37", + "metadata": {}, + "source": [ + "## Limitations of EXP\n", + "Although this is an exact solution and one of the simplest methods to understand, it is also one of the poorest methods in terms of efficiency. The EXP method does not converge quickly with number of samples, and even converged results show a very poor [[Intermediate States | phase space overlap]]{{Cite|Shirts2005}}{{Cite|Lu2003}} For these reasons, unless the potential energy distributions for all $\\vec{q}$ are known to be small (on the order of 1-2 $k_B T$), then **EXP should not be used.**\n", + "\n", + "There are a few explicit cases where EXP is beneficial:\n", + "* Calculating the free energy difference between a cheap potential and a more expensive potential that are very close to each other. One can simulate at the cheap parameters and estimate results at the expensive parameters.\n", + "* If the unsampled \"target\" state's phase space is a subset of the simulated state, then EXP is much more accurate;{{Cite|Lu2003}}{{Cite|Lu2005}}{{Cite|Wu2005a}}{{Cite|Wu2005b}}{{Cite|Jarzynski2006}} a rigid molecule insertion into a dense fluid is one such example.\n", + "\n", + "Despite these cases, it is challenging to know the phase spaces *a priori* and even then, its not known which state is the best to sample from.\n", + "\n", + "Lastly, EXP only uses two states worth of data for all of its calculations, limiting the ability to efficiently estimate more states." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6aad463-4f95-4da6-b209-33bf28fbfb58", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/fundamentals/ExtractingSimData.ipynb b/alchemistry/fundamentals/ExtractingSimData.ipynb new file mode 100644 index 0000000..9eaab11 --- /dev/null +++ b/alchemistry/fundamentals/ExtractingSimData.ipynb @@ -0,0 +1,117 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "16e9c987-78cc-4479-aa6a-cb1651296d28", + "metadata": {}, + "source": [ + "(Pulling_Sim_Data)=\n", + "# Extracting Simulation Data" + ] + }, + { + "cell_type": "markdown", + "id": "f797f174-867b-4bd4-8137-ca7c5f969a94", + "metadata": {}, + "source": [ + "Once you have actually run your simulations, you now need to extract the correct information and in a meaningful way. It is unfortunately not as easy as simply running all the data through a master analysis program; one must first choose how they will analyze the data (e.g. using any of the analysis methods shown in the theory section), then ensure that your data is both independent and uncorrelated [[Simulating States of Interest| as discussed previously]]." + ] + }, + { + "cell_type": "markdown", + "id": "1edaee77-6c96-4567-a568-73aaf2857b37", + "metadata": {}, + "source": [ + "## Extracting the Correct Information\n", + "For this step, we must ensure that the data we are extracting from our simulation is meaningful to the analysis technique we will run. Recall that each method needs different information\n", + "\n", + "* [[Thermodynamic Integration| TI]] requires $\\frac{dU}{d\\lambda}$ at each $\\vec{q}$ point.\n", + "* [[Exponential Averaging|EXP]] needs *either* $\\Delta U_{k,k+1}(\\vec{q})$ or $\\Delta U_{k,k-1}(\\vec{q})$ depending on which direction EXP is being operated in. \n", + "* [[Bennett Acceptance Ratio|BAR]] must have *both* $\\Delta U_{k,k+1}(\\vec{q})$ and $\\Delta U_{k,k-1}(\\vec{q})$ between all pairs of states.\n", + "* [[Weighted Histogram Analysis Method|WHAM]] and [[Multistate Bennett Acceptance Ratio|MBAR]] have to have the complete set of $\\Delta U_{k,j}(\\vec{q})$ with $j=1\\ldots K$ along the entire transformation path; WHAM must have this information binned.\n", + "\n", + "The potential derivative required for TI must generally be calculated during the simulation; it cannot be postprocessed by a code that does not evaluate the derivatives. The potential energy differences required for EXP, BAR, MBAR, and WHAM be calculated either during the simulation or in post-processing. We recommend calculating the potential differences in code when possible to avoid extra overhead and possible errors produced by running the simulation twice, and to reduce the amount of stored information. Although TI must be usually be calculated in code, as it requires the derivative, there is one condition under which it actually has the fastest computation time. If the alchemical path you have chosen is a [[Intermediate_States#Linear_Alchemical_Potential|linear alchemical path]], then you get\n", + "\n", + "$\\frac{dU}{d\\lambda}=U_0(\\vec{q}) - U_1(\\vec{q})$\n", + "\n", + "which can be calculated with only the endpoint energies. However, because of the [[Intermediate_States#Problems_with_Linear_Transformations | problems with linear paths]], this simplification is rarely that useful." + ] + }, + { + "cell_type": "markdown", + "id": "7b92c15d-7ad3-4895-ba0b-38b5dfb6dd17", + "metadata": {}, + "source": [ + "## Correlation\n", + "Once we have extracted the information, we need to make sure that the data we process to extract free energies is independent. As [[Simulating_States_of_Interest#Independent_and_Uncorrelated_Samples | mentioned in running simulations]], samples close together in time are correlated to each other in all but the most simple systems and we **must** have uncorrelated samples for our data to be meaningful. \n", + "\n", + "The autocorrelation time is a measure which tells us the time between effectively uncorrelated samples and a number of approaches exist for calculating it. The most common start point is to compute the normalized autocorrelation function of an observable *X* over the duration of the whole simulation, $\\mathcal{T}$. We first make a notation of \n", + "\n", + "$\\displaystyle \\delta X(t) = X(t) - \\mathcal{T}^{-1}\\int_{t=0}^\\mathcal{T} X(t) dt$\n", + "\n", + "where we have defined the instantaneous value of *X* less the average value of *X*. We now compute the quantity:\n", + "\n", + "$\\displaystyle C_X(\\Delta t) = \\frac{\\int_{\\tau=0}^{\\mathcal{T}} \\delta X(\\tau) \\delta X(\\tau+\\Delta t) d\\tau}{\\int_{\\tau=0}^{\\mathcal{T}} \\delta X(\\tau)^2 d\\tau}$\n", + "\n", + "where $\\tau$ is the autocorrelation time. If we get $C_X(\\Delta t)=0$ both at and after $\\Delta t$, then the two samples separated by $\\Delta t$ are uncorrelated and can be treated as independent. \n", + "\n", + "Given that we usually have *N* samples taken at $\\delta t$ time apart, then $C_X(\\delta t)$ is now discrete at particular $i$, requiring us to redefine our two equations:\n", + "\n", + "$\\delta X(i) = X(i) - \\frac{1}{N}\\sum_{i=0}^N X(i)$\n", + "\n", + "and\n", + "\n", + "$C_X(i) = \\frac{\\sum_{j=0}^{N} \\delta X(j) \\delta X(j+i)}{\\sum_{j=0}^N \\delta X(j)^2}$\n", + "\n", + "where the autocorrelation time, $\\tau$ is now the integral under $C_X$. One must be careful when integrating this function though as the noise, especially at more than half simulation time, becomes rather substantial. Often, the autocorrelation function can be fit to an exponential, which makes $\\tau$ the relaxation time of the exponential function. A good rule of thumb is that simulations should run a minimum 50$\\tau$ as an estimate since longer correlation times may not be detected in short simulations. Once you have $\\tau$, under standard statistical assumptions, samples can be considered independent if they are spaced by 2$\\tau$. If you do not calculate the correlation times, your statistical uncertainty will be lower than true uncertainty. Fortunately, many simulation packages come with methods, some of which are more sophisticated than that presented here, to calculate the autocorrelation time." + ] + }, + { + "cell_type": "markdown", + "id": "5c54b90f-4495-443f-b4ab-a8bfebad68be", + "metadata": {}, + "source": [ + "### Applying Correlation Corrections\n", + "Once the time is calculated, you still must apply it to your data. If your free energy method computes single state averages, like [[Thermodynamic Integration|TI]], then the average over all samples can be used for your mean; this *included correlated samples.* Your effective variance is then the regular variance multiplied by $\\sqrt{2\\tau/\\Delta t}$ where $\\Delta t$ is the time difference between samples. As an alternate method, or when your averages are not computed from single states, you can *subsample* the data by analyzing only the set of samples separated by 2$\\tau$. Consider the following simple example with [[Bennett Acceptance Ratio|BAR]] wwhere we want the free energy difference between states 1 and 2.\n", + "\n", + "* 5 ns of simulation time\n", + "* 10 ps between each snapshot (500 snapshots total)\n", + "* Assume autocorrelation for 1 → 2 of 20ps\n", + "* Assume autocorrelation for 2 → 1 of 40ps\n", + "\n", + "Under these conditions, when we go to analyze $\\Delta U_{1,2}$, we need every fourth sample as the correlation time is 20 ps and we want samples every 2$\\tau$). Similarly, we should take every eight sample from $\\Delta U_{2,1}$ since the correlation is time is 40 ps and $2\\tau = 80\\,\\mathrm{ps}$. If this is done correctly, we will not have discarded any unique data as the information we ignored is already contained in the samples we kept.\n", + "\n", + "True independent samples between configurations is achieved only if *all* coordinates are also uncorrelated between samples, not just energies. Although independent energies usually implies independent samples, there are situations where the energy is approximately independently sampled within the noise, but the configuration space is not as well sampled. This may occur, for example, when a ligand contains a configuration with comparable binding affinity as another, but rarely visits that conformer. If one were looking only at energies, it may be hard to detect this lack of sampling." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77b5e570-f3d6-4108-9818-e7ffd3e1eede", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/fundamentals/FreeEnergyFundamentals.ipynb b/alchemistry/fundamentals/FreeEnergyFundamentals.ipynb new file mode 100644 index 0000000..34f291f --- /dev/null +++ b/alchemistry/fundamentals/FreeEnergyFundamentals.ipynb @@ -0,0 +1,123 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b9cc9fd4-56b8-4418-8a9e-8a80d37d06e5", + "metadata": {}, + "source": [ + "(fundamentals)=\n", + "# Theory and Free Energy Equations" + ] + }, + { + "cell_type": "markdown", + "id": "82e28a64-060d-4d43-947e-a177c87c4af9", + "metadata": {}, + "source": [ + "To understand the advantages and disadvantages of different free energy methods, it is important to begin with a review of the underlying principles. This page is dedicated to the most fundamental concepts of free energy calculations and is designed to give an in-depth view of the approaches, starting from the basics. This page also contains some of the common nomenclature and symbols that are seen throughout the rest of the free energy fundamentals pages and in the literature as a whole.\n", + "\n", + "This page is not meant to be an end-all repository of the background mathematics and principals required for free energy calculations, but it will serve as a good start point and hopefully a quick reference." + ] + }, + { + "cell_type": "markdown", + "id": "7299ec53-cdfa-4cbb-b001-4b074170c1bc", + "metadata": {}, + "source": [ + "## Why the name \"Alchemical\"?\n", + "One of the first questions, and often most confusing points, about a number of free energy calculations is why we refer to them as \"alchemical\" changes in a large number of computational methods. When people first hear the word \"alchemy,\" they usually think of the medieval alchemists who were trying to turn lead into gold, or other such transformations. How computational free energy adopted the name takes a bit of understanding of simulation limitations and the properties of what we are calculating.\n", + "\n", + "Considering that the natural evolution of some of the processes we try to model are well beyond reasonable simulation time scales, we must come up with very efficient ways to compute the free energy differences. Because free energy is a state variable (path independent), we can design simulations that provide a convenient pathway to computing free energy. Furthermore, since we are doing simulations, we are not limited by experimentally observable conditions so long as we are meticulous with our calculations.\n", + "\n", + "With these observations in mind, it was found that we can simulate modifications to atoms which can change their properties to reflect non-physical or entirely different species. This is roughly the same definition of alchemy from old in that we are changing the properties of the atoms to be something else, although we do it in a mathematically sound and rigorous manner; hence, the term \"alchemical\" was coined as many free energy calculations are \"like alchemy\" in their pathways and methods.\n", + "\n", + "There are of course many factors and checks that must be done to ensure accuracy and robustness of the calculations, many of which will be shown in the rest of the coming free energy fundamentals pages." + ] + }, + { + "cell_type": "markdown", + "id": "f111f73a-ca5e-4dfa-869b-7995f4607b00", + "metadata": {}, + "source": [ + "## Assumptions for the Fundamentals\n", + "The assumptions listed here are carried out through the rest of the fundamentals sections. Free energy calculations can usually be set up without these assumptions, but we'll make these assumptions to simplify the explanations.\n", + "\n", + "* **A standard molecular mechanics model will be assumed**; this includes:\n", + " * Harmonic bond angle terms\n", + " * Periodic dihedral terms\n", + " * Non-bonded terms made up of point charges and Lennard-Jones repulsion/dispersion terms\n", + "* Constant temperature is maintained: (discussed [below](facts_split))\n", + "**Masses do not alchemically change**. If one wishes to do this, substitute all potential energies, $U$, with the more general Hamiltonian, $\\mathcal{H}$.\n", + "**QM/MM will not be considered** for fundamental; there are number of other more complicated factors to consider that are beyond the scope of this discussion.[1]" + ] + }, + { + "cell_type": "markdown", + "id": "f4dd4945-7511-4dc0-8bdf-ca8dcb75e8b3", + "metadata": {}, + "source": [ + "## Free Energy Difference Equation\n", + "Most free energy calculations and methods started from a single core equation derived from statistical mechanics. The free energy difference between two states is directly related to the ratio of probabilities of those states through the partition functions. This free energy difference for an NVT ensemble is\n", + "\n", + "$\\displaystyle \\Delta A_{ij} = -k_B T \\ln \\frac{Q_j}{Q_i} = -k_B T \\ln \\frac{\\int_{\\Gamma_j} e^{-\\frac{U_j(\\vec{q})}{k_B T}} d\\vec{q}}{\\int_{\\Gamma_i} e^{-\\frac{U_i(\\vec{q})}{k_B T}}d\\vec{q}}$\n", + "\n", + "where $\\Delta A_{ij}$ is the Helmholtz free energy difference between state $j$ and state $i$, $k_B$ the Boltzmann constant, $Q$ the canonical partition function, $T$ is the temperature, $U_i$ and $U_j$ are the potential energies as a function of the coordinates and momenta $\\vec{q}$ for two states, and $\\Gamma_i$ and $\\Gamma_j$ are the *phase space volumes* of $\\vec{q}$ over which we sample, or the total set of all allowed positions and momenta of the system.\n", + "\n", + "From this equation, all free energy calculations are derived." + ] + }, + { + "cell_type": "markdown", + "id": "2d040859-e99f-431d-8073-19e467f315e0", + "metadata": {}, + "source": [ + "(facts_split)=\n", + "## Facts from the Free Energy Difference Definition" + ] + }, + { + "cell_type": "markdown", + "id": "e0cfda4d-b4a9-4c15-a9e1-5aa36cce901e", + "metadata": {}, + "source": [ + "There are a number of key notes we can learn from the definition of free energy differences. Each of these can be important in interpreting simulation results.\n", + "\n", + "* **Only free energy *differences* are ever calculated.** There is never a calculation where absolute free energies are needed (and rarely can they be calculated at all) as all of the biological or thermodynamical quantities of interest are based on a free energy difference. As such, there must always be a minimum of two defined thermodynamic states.\n", + " * Even *absolute* free energies of binding are still free energy differences between two states: the ligand restricted to the binding site, and the ligand free to explore all other configurations.\n", + "* **Free energy differences between states at different temperatures are usually not what you want to be calculating** for problems of interest. If it did, you would get $\\Delta A_{ij} = -k_B T_j \\ln Q_j + k_B T_i \\ln Q_i$, which is no longer a ratio calculation and not needed for biological systems of interest. Temperature dependence on free energy is more likely to be \"what is $ \\Delta A_{ij} $ change at two different temperatures?\"\n", + "* **There are two different phase space volumes.** $\\Gamma_i$ and $\\Gamma_j$ are often the same, but they are not required to be. The methods presented here almost always assumes that the two phase spaces overlap. However, when the spaces do not overlap, these methods break down and it is difficult to identify this problem without in depth knowledge of your system. Consider the example of a hard sphere solute with radius $\\sigma$ at state $i$ and a Lennard Jones repulsion/dispersion potential, with the same $\\sigma$ at state $j$. Since $\\Gamma _i$ will not have molecules at a distance less than $\\sigma$, but $\\Gamma_j$ will, the two phase spaces are not the same and these methods will either break down or return very error-prone results.\n", + " * The degree to which the phase spaces are shared is called the \"phase space overlap\". Efficient free energy calculations require significant phase space overlap. There are [[Intermediate_States | a number of strategies to address]] lack of overlap between target spaces, but determining the best way for any given situation is still a research question.\n", + " * It should also be noted that \"near zero probability\" and \"always zero probability\" are two distinct things when considering phase space. So long as there is a chance for an observation to be made, no matter how small, it is considered part of the phase space." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e854ed91-b221-4757-8d32-a3784a0c73ee", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/fundamentals/ImprovingSampling.ipynb b/alchemistry/fundamentals/ImprovingSampling.ipynb new file mode 100644 index 0000000..5d4eeb1 --- /dev/null +++ b/alchemistry/fundamentals/ImprovingSampling.ipynb @@ -0,0 +1,180 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e409c07f-d2ab-4976-93d7-826559eb927e", + "metadata": {}, + "source": [ + "(Sim_Accel)=\n", + "# Improving Sampling and Accelerating Simulations" + ] + }, + { + "cell_type": "markdown", + "id": "513beff4-ee3b-453c-9ab8-b9af96e5e69a", + "metadata": {}, + "source": [ + "In many cases of interest, carrying out robust free energy calculations may require significant investment of computational resources, beyond that which can be obtained by most researchers. This page examines additional tools for accelerating the sampling. As these begin to branch out from the [[:Category:Free Energy Fundamentals|fundamentals of free energy]], we will not go deeply into all these methods. They are not needed to carry out free energy calculations, but they may be extremely useful to converge calculations in complex systems with slow dynamics and we encourage readers to examine these topics to see what will work for them." + ] + }, + { + "cell_type": "markdown", + "id": "4dbdd285-0cde-4fae-861b-61afd568fb58", + "metadata": {}, + "source": [ + "## Umbrella Sampling\n", + "One standard method for improving sampling in atomistic simulations is umbrella sampling,{{cite|Torrie1977}} where bias terms are added to constrain the simulation in some way, then the restraints are removed. This method can lower barriers on the potential energy surface, or restrain simulations to slow-interconverting configurations that are relevant to the binding affinities (e.g. different torsional states). Doing this allows for the free energy components to be properly computed and then combined.{{cite|Wang2006}}{{cite|Mobley2007a}}{{cite|Mobley2007b}}\n", + "If a system has slow degrees of freedom, like some hydration free energies,{{cite|Klimovich2010}} this method allows one to sample more frequently in the slow state.\n", + "\n", + "One can also envision using umbrella sampling to compute the free energy of constraining the free ligand into the bound conformation directly before computing the free energy of binding, and then computing the free energy of releasing these restraints in solution. Doing so usually decreases correlation times for sampling intermediate states, thereby increasing the simulation efficiency. {{cite|Wang2006}}{{cite|Woo2005}}" + ] + }, + { + "cell_type": "markdown", + "id": "0e876ce6-28b0-44b1-9f89-b03aae760e23", + "metadata": {}, + "source": [ + "## Hamiltonian Replica Exchange\n", + "Although it is perfectly valid to run each intermediate state as its own simulation, there are tools that let you run parallel, coupled simulations and swap information between them to improve sampling. One such method is called Hamiltonian Exchange (HEX) or Hamiltonian Replica Exchange (HREX). In HEX, the parallel simulations at each alchemical variable are allowed to swap atoms/molecules (under certain conditions) from state A which has different energy barriers than state B. This enables sampling configurations that may take significant amounts of time to access normally when simulating only one state.\n", + "{{cite|Okamoto2004}}{{cite|Roux2007}}{{cite|Woods2003}}{{cite|Banba2000}}{{Cite|Bitetti-Putzer2003|}}{{cite|Hritz2008}}\n", + "\n", + "We **highly recommend doing HEX methods** as most codes do this on top of well-tested temperature replica exchange methods, and so there is very little overhead. the outputs of Hamiltonian exchange simulations can be analyzed in the same way as the outputs of $K$ uncoupled simulations. These simulations are guaranteed to decorrelate as fast or faster than standard simulations, though the exact amount of improvement depends on the system; at least they will not be any worse!\n", + "\n", + "[[Simulation Information Gathering#Correlation|Correlation]] times with HEX is a bit more complicated since atoms/molecules are swapping along very different trajectories. In this case, you will need to compute along trajectories that sample different states as opposed to a single state." + ] + }, + { + "cell_type": "markdown", + "id": "77beb4ca-cc3f-44e0-b21a-dde019c34962", + "metadata": {}, + "source": [ + "### Some Suggestions for Replica Exchange Simulations\n", + "\n", + "* The level of acceleration one gets is a function both of phase space mixing between the replicas and the acceleration provided by of replicas kinetics that have faster sampling than the original distribution in the degree of freedom of interest. \n", + "* Choice of Hamiltonian/temperature states depends quite a bit on what one actually wants to do: is it just accelerating kinetics, or does one actually care about properties as a function of temperature or calculating free energies? That can change the choice. Identifying a choice of replica that accelerates kinetics in desired way is nontrivial problem. \n", + "* Space replicas to be even in phase space overlap. The easiest way to do this is to guess a spacing, then look at the results pf MBAR /TI after a short run. You can then respace the replicas so the error in the free energies between consecutive states is close to being equal. It is most important to get rid of very bad spacing; there is not much efficiency gain in going from decent spacing to optimal spacing {{cite|Naden2014}}. \n", + "* Acceptance rates should be around 38% to maximize mixing in state space, but anywhere in the 25-50% range is nearly as good, so it is not that useful to tune it too much {{cite|Predescu2005}}.\n", + " * More precise quantitative measurements of state space mixing are determined by taking the transition matrix between states, and maximizing $1-\\lambda_2$, where $\\lambda_2$ is the second largest eigenvalue of the transition matrix. {{cite|Chodera2011}} This quantity is known as the spectral gap, and is related to the rate of mixing in state space. Remember that mixing in state space is insufficient to accelerate sampling; there must be some state where configurations are sampled more quickly. \n", + "* To improve mixing between states, perform multiple replica switches each time one pauses to exchange. {{cite|Chodera2011}} Anecdotal evidence is that effectiveness of multiple switches decreases past 10-100 exchanges, but this is system dependent.\n", + "* Exchange should be as frequent as is permissible given the networking and code {{cite|Chodera2011}}, as long as the velocities are not reassigned on switches. Reassigning velocities too frequently slows kinetics and negates the advantages gained by performing rapid replica switches {{cite|Basconi2013}}." + ] + }, + { + "cell_type": "markdown", + "id": "0b813700-cb4d-4ebc-8f30-af59a56e8afb", + "metadata": {}, + "source": [ + "## Other Methods\n", + "The other methods that will be mentioned briefly are *Expanded Ensemble* and *$\\lambda$-dynamics.* Expanded ensemble works by running a single simulation and sampling both the intermediate states and separate coordinates simultaneously. $\\lambda$-dynamics works by treating the alchemical variable as dynamical, introducing a fictitious mass corresponding to the $\\lambda$ degree of freedom; although this seems rather novel, it is essentially identical to Monte Carlo techniques.{{cite|Banba2000}}{{cite|Guo1998}}{{cite|Kong1996}}{{cite|Li2007}}\n", + "$\\lambda$-dynamics is showing promise, but is still in preliminary stages of development{{cite|Zheng2008a}}{{cite|Zheng2008b}}{{cite|Min2008}}{{cite|Li2007}}{{cite|Min2007}}\n", + "\n", + "In general, we **do NOT recommend Expanded Ensemble or $\\lambda$-dynamics to beginners.** The methods and implementations are not up to the same robustness as HEX yet and there are tweaks and extra parameters that have to be coded to obtain proper convergence. Given more time and development, these methods may become more accessible to beginners and we will be able to recommend them in the future." + ] + }, + { + "cell_type": "markdown", + "id": "296e4966-60e1-45c0-a294-d045e38b084d", + "metadata": {}, + "source": [ + "### Slow Growth \n", + "\n", + "In slow-growth methods, the coupling parameter, $\\lambda$, is slowly varied over the course of one or several simulations to take a system gradually from $\\lambda=0$ to $\\lambda=1$; from this, the free energy difference between endpoints is estimated. In equilibrium methods, on the other hand, separate simulations are run at multiple different lambda values and information from the individual simulations is used to estimate free energy differences between adjoining lambda values.\n", + "\n", + "Slow-growth methods have a number of well-documented problems, such as Hamiltonian lag and hysteresis (*add references*). Additionally, if a slow-growth calculation does not lead to a sufficiently precise free energy estimate, the only way to improve the free energy estimate is to repeat the calculation with a slower rate of change in lambda – the simulation cannot be extended to gain additional precision, meaning that significant data can be wasted. Fast-growth methods, while conceptually appealing, do not appear to offer substantial advantages over equilibrium methods.{{cite|Jarzynski2006}}{{cite|Oostenbrink2006}}" + ] + }, + { + "cell_type": "markdown", + "id": "dc78e840-b064-4e96-91f2-df0be88386d7", + "metadata": {}, + "source": [ + "### Nonequilibrium (Fast Growth) methods \n", + "\n", + "Fast-growth methods are based on a theorem by Jarzynski in 1997{{cite|Jarzynski1997}} that the free energy difference associated with a particular equilibrium process can be computed by taking an the exponential average of the irreversible work done in performing many (potentially rapid) non-equilibrium trials of the same process. \n", + "\n", + "$ \\Delta G = -k_B T \\ln \\left \\langle e^{-W/k_b T} \\right \\rangle $\n", + "\n", + "Where $ W$ is the work to execute the transition between the initial and final thermodynamic state. When applied to alchemical free energy calculations, this simply amounts to estimating free energy differences by performing many different rapid versions of a slow-growth trial – that is, rapidly changing lambda between two values (i.e. 0 and 1) in many separate trials, and monitoring the work done each time. Equilibrium free energy calculations can be thought of as \"instantaneous growth\" as they rely on estimating the free energy difference between neighboring $\\lambda$ values based only on instantaneous evaluations of potential energy differences between $\\lambda$ values (or by evaluation of and extrapolation of $dV/d\\lambda$ at a particular $\\lambda$ value).\n", + "\n", + "## References\n", + "\n", + "{{cite|Torrie1977|Torrie, G. M., and Valleau, J. P. (1977) Non-physical Sampling Distributions in Monte-Carlo Free-Energy Estimation : Umbrella Sampling. *J. Comput. Phys.* 23, 187–199.|http://www.citeulike.org/group/14929/article/9052620}}\n", + "\n", + "{{cite|Wang2006|Wang, J., Deng, Y., and Roux, B. (2006) Absolute Binding Free Energy Calculations Using Molecular Dynamics Simulations with Restraining Potentials. *Biophys. J.* 91, 2798–2814.|http://www.citeulike.org/user/kupopo/article/867499}}\n", + "\n", + "{{cite|Mobley2007a|Mobley, D. L., Chodera, J. D., and Dill, K. A. (2007) Confine-and-release method: Obtaining correct binding free energies in the presence of protein conformational change. *J. Chem. Theory Comput.* 3, 1231–1235.|http://www.citeulike.org/group/14929/article/9052442}}\n", + "\n", + "{{cite|Mobley2007b|Mobley, D. L., Graves, A. P., Chodera, J. D., McReynolds, A. C., Shoichet, B. K., and Dill, K. A. (2007) Predicting absolute ligand binding free energies to a simple model site. *J. Mol. Biol.* 371, 1118–1134.\\http://www.citeulike.org/group/14929/article/9052443}}\n", + "\n", + "{{cite|Klimovich2010|Klimovich, P. V., and Mobley, D. L. (2010) Predicting hydration free energies using all-atom molecular dynamics simulations and multiple starting conformations. *J. Comp. Aided Mol. Design* 24, 307–316.|http://www.citeulike.org/group/14929/article/9520307}}\n", + "\n", + "{{cite|Woo2005|Woo, H.-J., and Roux, B. (2005) Calculation of absolute protein-ligand binding free energy from computer simulation. *Proc. Natl. Acad. Sci.* 102, 6825–6830.|http://www.citeulike.org/group/14929/article/9052663}}\n", + "\n", + "{{cite|Okamoto2004|Okamoto, Y. (2004) Generalized-ensemble algorithms: Enhanced sampling techniques for Monte Carlo and molecular dynamics simulations. *J. Mol. Graph. Model.* 22, 425–439.|http://www.citeulike.org/group/14929/article/9052463}}\n", + "\n", + "{{cite|Roux2007|Roux, B., and Faraldo-Gómez, J. D. (2007) Characterization of conformational equilibria through Hamiltonian and temperature replica- exchange simulations: Assessing entropic and environmental effects. *J. Comput. Chem.* 28, 1634–1647.|http://www.citeulike.org/group/14929/article/9052542}}\n", + "\n", + "{{cite|Woods2003|Woods, C. J., Essex, J. W., and King, M. A. (2003) Enhanced Configurational Sampling in Binding Free Energy Calculations. *J. Phys. Chem. B* 107, 13711–13718.|http://www.citeulike.org/group/14929/article/9052666}}\n", + "\n", + "{{cite|Banba2000|Banba, S., Guo, Z., and Brooks III, C. L. (2000) Efficient sampling of ligand orientations and conformations in free energy calculations using the lambda-dynamics method. *J. Phys. Chem. B* 104, 6903–6910.|http://www.citeulike.org/group/14929/article/9052063}}\n", + "\n", + "{{Cite|Bitetti-Putzer2003|Bitetti-Putzer, R., Yang, W., and Karplus, M. (2003) Generalized ensembles serve to improve the convergence of free energy simulations. *Chem. Phys. Lett.* 377, 633–641.|http://www.citeulike.org/group/14929/article/9052089}}\n", + "\n", + "{{cite|Hritz2008|Hritz, J., and Oostenbrink, C. (2008) Hamiltonian replica exchange molecular dynamics using soft-core interactions. *J. Chem. Phys.* 128, 144121.|http://www.citeulike.org/group/14929/article/9052272}}\n", + "\n", + "{{cite|Chodera2011| Chodera, J.D. and Shirts, M. R. (2011) Replica exchange and expanded ensemble simulations as Gibbs sampling: Simple improvements for enhanced mixing. *J. Chem. Phys.* 135, 194110 | http://www.citeulike.org/group/14929/article/10064105}}\n", + "\n", + "{{cite|Basconi2013|Basconi, J. E. and Shirts, M. R. (2013) Effects of Temperature Control Algorithms on Transport Properties and Kinetics in Molecular Dynamics Simulations. *J. Chem. Theory Comput.* 9, 2887-2899 | http://www.citeulike.org/group/14929/article/12397351}}\n", + "\n", + "{{cite|Naden2014|Naden, L. N., Pham T. T., and Shirts, M. R. (2014) Linear Basis Function Approach to Efficient Alchemical Free Energy Calculations. 1. Removal of Uncharged Atomic Sites. *J. Chem. Theory Comput.\" 10, 1128-1149 | http://www.citeulike.org/group/14929/article/13073244}}\n", + "\n", + "{{cite|Predescu2005|Predescu, C., Predescu, M., Ciobanu, C. V. (2005) On the Efficiency of Exchange in Parallel Tempering Monte Carlo Simulations\n", + "*J. Phys. Chem. B, 109*, 4189-4196 | http://www.citeulike.org/group/14929/article/875546}}\n", + "\n", + "{{cite|Guo1998|Guo, Z., Brooks III, C. L., and Kong, X. (1998) Efficient and flexible algorithm for free energy calculations using the $\\lambda$-dynamics approach. *J. Phys. Chem. B* 102, 2032–2036.|http://www.citeulike.org/group/14929/article/9052235}}\n", + "\n", + "{{cite|Kong1996|Kong, X., and Brooks III, C. L. (1996) $\\lambda$-dynamics: A new approach to free energy calculations. *J. Chem. Phys.* 105, 2414–2423.|http://www.citeulike.org/group/14929/article/9052345}}\n", + "\n", + "{{cite|Li2007|Li, H., Fajer, M., and Yang, W. (2007) Simulated scaling method for localized enhanced sampling and simultaneous \"alchemical\" free energy simulations: A general method for molecular mechanical, quantum mechanical, and quantum mechanical/molecular mechanical simulations. *J. Chem. Phys.* 126, 024106.|http://www.citeulike.org/group/14929/article/9052376}}\n", + "\n", + "{{cite|Zheng2008a|Zheng, L., Carbone, I. O., Lugovskoy, A., Berg, B. A., and Yang, W. (2008) A hybrid recursion method to robustly ensure convergence efficiencies in the simulated scaling based free energy simulations. *J. Chem. Phys.* 129, 034105.|http://www.citeulike.org/user/nathanandrewbaker/article/6361266}}\n", + "\n", + "{{cite|Zheng2008b|Zheng, L., and Yang, W. (2008) Essential energy space random walks to accelerate molecular dynamics simulations: Convergence improvements via an adaptive-length selfhealing strategy. *J. Chem. Phys.* 129, 014105.}}\n", + "\n", + "{{cite|Min2008|Min, D., and Yang, W. (2008) Energy difference space random walk to achieve fast free energy calculations. *J. Chem. Phys.* 128, 191102.}}\n", + "\n", + "{{cite|Li2007|Li, H., and Yang, W. (2007) Forging the missing link in free energy estimations: lambda-WHAM in thermodynamic integration, overlap histogramming, and free energy perturbation. *Chem. Phys. Lett.* 440, 155–159.|http://www.citeulike.org/group/14929/article/9052374}}\n", + "\n", + "{{cite|Min2007|Min, D., Li, H., Li, G., Bitetti-Putzer, R., and Yang, W. (2007) Synergistic approach to improve “alchemical” free energy calculation in rugged energy surface. *J. Chem. Phys.* 126, 144109.|http://www.citeulike.org/group/14929/article/9052435}}\n", + "\n", + "{{cite|Jarzynski1997|Jarzynski, C. Physical Review Letters 1997, 78.|http://www.citeulike.org/group/14929/article/9052290}}\n", + "\n", + "{{cite|Jarzynski2006|Jarzynski, C. Phys. Rev. E 2006, 73, 046105.|http://www.citeulike.org/group/14929/article/9052297}}\n", + "\n", + "{{cite|Oostenbrink2006|Oostenbrink, C.; van Gunsteren, W. F. Chemical Physics 2006, 323.|http://www.citeulike.org/group/14929/article/9052473}}\n", + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/fundamentals/IntermediateStates.ipynb b/alchemistry/fundamentals/IntermediateStates.ipynb new file mode 100644 index 0000000..8df47c2 --- /dev/null +++ b/alchemistry/fundamentals/IntermediateStates.ipynb @@ -0,0 +1,283 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0b79db6a-4691-4540-96dd-5a2e6e0e7f21", + "metadata": {}, + "source": [ + "(Intermediate_States)=\n", + "# Constructing a Pathway of Intermediate States" + ] + }, + { + "cell_type": "markdown", + "id": "c9a98f6d-b738-414d-8c2a-36f4fbbc796a", + "metadata": {}, + "source": [ + "Because the phase space overlap between two target states of interest can be near zero, doing free energy calculations for the two states alone will often have very large errors. When one considers the large number of potential molecules that could be considered interesting, this problem becomes even more pronounced. Because free energy is a state function, we can construct a [[Thermodynamic_Cycle|thermodynamic path]] that takes us through a set of states that improves phase space overlap between states. To put this mathematically, we can improve our results by constructing high phase space overlap intermediates and calculating our free energy difference by\n", + "\n", + "$\\Delta A_{1,K} = \\sum_{i=1}^{K-1} \\Delta A_{i,i+1}$.\n", + "\n", + "One advantage available to computational free energy calculations is the ability to simulate unphysical states. By this, we mean that our **intermediate states do not have to be observable experimentally.** This is a fact that most if not all computational methods out there take advantage of. Again, because free energy is a state function, we simply choose the path of greatest convenience and carry out the calculation across this path. However, choosing the \"correct\" or even a \"good\" path is a very challenging action and is one of the most difficult tasks in the entire field of free energy calculations.\n", + "\n", + "Because our free energy calculations frequently involve transforming one kind of atom at a given site to another kind, the transformations are often referred to as \"alchemical.\"" + ] + }, + { + "cell_type": "markdown", + "id": "f207baf5-2e04-4d99-b980-2c95a9366633", + "metadata": {}, + "source": [ + "## Linear Alchemical Potential\n", + "To be more clear about what it means to define an \"alchemical path,\" one should think of it as defining the thermodynamic path where we modify, remove, or add various forces on an atom. Take for instance estimating the free energy difference between a Lennard-Jones fluid and a Stockmayer fluid, our thermodynamic path would calculate the work and free energy required to switch on the dipole interactions at a rate that maximizes phase space overlap and efficiency.{{cite|Frenkel2002}} Because the atoms changed their interactions with the surroundings without being removed or added from the system, we have directly modified the atoms to create our alchemical path.\n", + "\n", + "Most alchemical transformations can be defined by alchemically scaling the potential in some manner. The simplest of these is the linear transformation, which says that the net potential energy, $U(\\lambda,\\vec{q})$, is the sum of the alchemically modified two end states' potentials, $U_0$ and $U_1$, plus the parts of the potential that are unaffected by the alchemical transformation, $U_{\\mathrm{unaffected}}$; or\n", + "\n", + "$U(\\lambda,\\vec{q}) = (1-\\lambda) U_0(\\vec{q}) + \\lambda U_1(\\vec{q}) + U_{\\mathrm{unaffected}}(\\vec{q})$\n", + "\n", + "where the alchemical variable linearly modifies the confrontational information from each state of interest." + ] + }, + { + "cell_type": "markdown", + "id": "eebd5761-778f-4a21-bdd3-edffa7024b0f", + "metadata": {}, + "source": [ + "### Problems with Linear Transformations\n", + "```{figure} ../images/Linear_LJ_transformation.png\n", + "---\n", + "width: 400px\n", + "name: linearXform\n", + "---\n", + "Linearly scaled Lennard-Jones potential. Note that even with very small $\\lambda$, the singularity at $r=0$ is still very much there. Image recreated based on Beutler *et. al.*{{cite|Beutler1994}}\n", + "```\n", + "The largest problem with linear scaling is that it **gives very unequal phase space overlap** between states. This has been a long acknowledged problem with several easily identifiable causes:\n", + "\n", + "1. Consider the OPLS-AA force field, at $\\lambda=0.1$, the excluded volume of a methane sphere is still 60-70%.\n", + "1. Most potential energy functions have an infinite potential (singularity) at $r=0$. This is usually brought on by the $r^{-12}$ term that appears in Lennard-Jones based potentials and is present at all nonzero values of $\\lambda$. Since most the free energy techniques require some numeric methods, evaluating this singularity diverges the calculations.\n", + "\n", + "This choice also leads to large forces, numerical instabilities, and other problems in simulations near $\\lambda=1$. Formally, it has been shown that this leads to a integrable singularity in $dV/(d\\lambda)$, which means that computing correct free energies with this scheme using thermodynamic integration is impossible using numerical techniques{{cite|Mruzik1976}}{{cite|Mezei1986}}{{cite|Resat1993}} including with more modern methods{{cite|Beutler1994}}{{cite|Pitera2002}}{{cite|Steinbrecher2007}} and similar problems plague free energy perturbation schemes.\n", + "\n", + "This singularity causes the most problems with [[Thermodynamic Integration | TI ]] since that method requires evaluation {{#tag:math| {{dudl}} }} numerically, which diverges at the singularity, but the same issues result in large biases for BAR and MBAR as well, because the phase space volumes do not overlap.\n", + "\n", + "Several workarounds for this have been suggested. One of the simplest is to raise the $\\lambda$ terms to a power greater than or equal to 4 like so:\n", + "\n", + "$\\displaystyle U(\\lambda) = (1-\\lambda)^4 U_0 + \\lambda^4 U_1$\n", + "\n", + "this leads to an integrable singularity in $dV/d\\lambda$, so thermodynamic integration can in principle be done{{cite|Mezei1986}}{{cite|Beutler1994}}. But integrable singularities still pose very substantial problems for molecular simulation, and this approach can still lead to large forces, numerical instabilities and energy conservation problems{{cite|Beutler1994}}{{cite|Steinbrecher2007}} and make free energy differences extremely difficult to converge (D. Mobley, unpub. data).\n", + "\n", + "These methods do converge {{#tag:math| \\left\\langle{{dudl}}\\right\\rangle }} however very slowly with number of samples and still can cause numeric instabilities{{cite|Pitera2002}}{{cite|Steinbrecher2007}}\n", + "\n", + "Other methods have been suggested such as shrinking the core of the atom, but this also causes instabilities{{cite|Pitera2002}}{{cite|Pearlman1995}}{{cite|Wang1994}} and is not practical for systems with many bonds." + ] + }, + { + "cell_type": "markdown", + "id": "ec92585d-8f1a-48e9-a882-8816a67cca11", + "metadata": {}, + "source": [ + "## Soft Core Potentials\n", + "A standard method has been developed to alchemically change molecules to get around the numeric instabilities called a \"soft core potential.\"{{cite|Beutler1994}}{{cite|Zacharias1994}} In these potentials, the configuration variable, $r$, is now coupled to the alchemical variable, $\\lambda$, smoothing out the singularity and looks like\n", + "\n", + "$U(\\lambda,r) = 4\\epsilon\\lambda^n \\left[\\left(\\alpha(1-\\lambda)^m + \\left(\\frac{r}{\\sigma}\\right)^6\\right)^{-2} - \\left(\\alpha(1-\\lambda)^m + \\left(\\frac{r}{\\sigma}\\right)^6\\right)^{-1}\\right]$\n", + "\n", + "where $\\alpha$ is a constant (usually 0.5) and, $m$ and $n$ are positive integers with the original choice being $n=4$ and $m=2$. Recent improvements have shown that keeping $\\alpha=0.5$ and setting $m=n=1$ provides significant improvement to the variance {{cite|Pitera2002}} {{cite|Steinbrecher2007}}{{cite|Shirts2005}} and research into further improving this is still going on.{{cite|Pham2001}}\n", + "\n", + "This potential is now considered the standard for avoiding endpoint errors, and some version of this is common in many software packages that support free energy calculations As a [[Theoretic Principals#|naming convention]], when \"soft core\" is referenced, it refers to the this potential where as \"linear\" refers to the [[#Linear Alchemical Potential | linear alchemical potential]] where the exponent is 1 (i.e. $\\lambda$ *not* $\\lambda^4$).\n", + "\n", + "In summary: Linearly scaling Lennard-Jones interactions back as a function of $\\lambda$ for insertion/deletion of particles is formally incorrect for numerical integration and leads to wrong estimates of free energy differences. While more complicated schemes involving $\\lambda^k$ scaling can be formally correct, there are serious concerns regarding their accuracy. Soft-core potentials provide a rigorously correct, efficient alternative to these and should be used whenever particles are inserted or deleted, preferably with a specific functional form and parameters{{cite|Shirts2005}}, unless future work finds a still more efficient set of parameters." + ] + }, + { + "cell_type": "markdown", + "id": "6eb43bf2-10bd-4fe6-907d-71f77b407e97", + "metadata": {}, + "source": [ + "## Avoid turning off charges and Lennard-Jones sites at the same time\n", + "\n", + "Guideline 2 states that a partial atomic charge should never be allowed to remain on an atom while its Lennard-Jones interactions are being removed. To understand the reason for this, consider two atoms of opposite charge, A and B. Lennard-Jones interactions of atom A are being scaled back. Regardless of the scaling scheme used, at some lambda value, atoms A and B will begin to overlap occasionally, since the final state allows A and B to overlap totally. If A has a remaining partial atomic charge when these overlaps become possible, the two point charges assigned to A and B can actually overlap as well. Since the potential energy of Coulomb interactions between point charges scales as $q_{A} q_{B}/r_{AB}$, where $r_{AB}$ is the distance between A and B, this presents significant problems when $q_A$ and $q_B$ have opposite signs. In particular, there is an infinite energy minimum at $r_{AB}=0$, so the two particles would in principle get trapped on top of one another. \n", + "\n", + "In practice, what usually happens in molecular dynamics simulations in these circumstances is that the forces get extremely large as A and B begin to overlap, and the simulation will crash. Constraint algorithms are often the first to fail, so this may lead to a warning about constraints (i.e. LINCS or SHAKE) and then a crash. This issue is discussed briefly by Pitera and van Gunsteren{{cite|Pitera2002}} and in more detail by Anwar and Heyes{{cite|Anwar2005}}.\n", + "\n", + "In view of this problem, we recommend always turning off partial charges for any atoms for which Lennard-Jones interactions are being removed before doing the Lennard-Jones transformation. Additionally, when Lennard-Jones parameters for an atom are being substantially modified during a free energy calculation (i.e. for relative free energy calculations involving mutation of an atom) and soft-core potentials are employed, similar problems may arise, so it may be useful to remove partial charges on atoms which are being mutated, as well. \n", + "\n", + "Several groups have developed modified electrostatics scaling methods in an attempt to bypass this problem and allow electrostatics interactions and Lennard-Jones interactions to be turned off in only one set of calculations (for example, Anwar and Heyes{{cite|Anwar2005}}), but since electrostatics transformations are usually so smooth a function of $\\lambda$ and need only few $\\lambda$ values for good overlap (Shirts et al.{{cite|Shirts2005}} 2005], Mobley et al.{{cite|Mobley2007}}, and others) it is unclear that this results in any significant efficiency gain over performing the transformations separately. \n", + "\n", + "As noted in Guideline 2, above, electrostatics transformations are typically smooth functions of lambda with good phase-space overlap between even coarsely-spaced lambda values(Shirts et al.{{cite|Shirts2005}} 2005], Mobley et al.{{cite|Mobley2007}}, and others). As a consequence, these are quite efficient compared to Lennard-Jones calculations. As established above, when particles are being inserted or deleted, the electrostatic interactions of these particles should be set to zero before turning off their Lennard-Jones interactions. But what about electrostatic interactions on atoms which are merely being mutated (i.e. a change of partial charge and Lennard-Jones radius), as in relative free energy calculations?\n", + "We are not aware of any study which has looked at this in detail, but given the efficiency of free energy calculations modifying electrostatics interactions relative to those significantly modifying Lennard-Jones interactions, we believe it makes sense to perform the two sets of calculations separately. Given that the two transformations have different lambda-dependences, it might actually be less efficient to perform them together than separately. Performing them separately has an additional advantage, as well: Uncertainties in the two components can be assessed separately, and computational effort focused on reducing the largest uncertainty (i.e. by extending some simulations to get additional sampling).\n", + "Further testing should be focused in this area, to determine whether alternative scaling approaches which can modify Lennard-Jones and electrostatic interactions simultaneously{{cite|Anwar2005}} are actually more efficient than the approach of separate modification that we propose.\n", + "\n", + "In view of this, our recommendation is that either (a) partial charges on any particles being inserted or deleted be turned off prior to the use of soft core potentials for those particles, or (b) a soft core scheme for electrostatics be implemented to allow simultaneous changes." + ] + }, + { + "cell_type": "markdown", + "id": "d0612bbf-4b55-4a35-9c11-949e6d443718", + "metadata": {}, + "source": [ + "## Rules of Thumb for Intermediate States\n", + "This section is just the short version of the discussions found elsewhere(mostly [[#Constructing Intermediate States|below]]) on the site. These rules are not the end-all set and you should be familiar with why each one is suggested before just accepting them.\n", + "\n", + "1. **Bonded terms can be modified/turned off linearly.** This includes angle or bond force constants as well as *unconstrained* bond distances.\n", + "1. ***Constrained* bonds should not change length.** There are free energy changes that cannot be ignored affiliated with this action.{{cite|Boresch1996}}\n", + "1. **Maximize similarity between states by removing/decoupling as few atoms as possible.**\n", + "1. ** Do not open and close rings.** This supersedes the previous rule.\n", + "1. **Statistical uncertainty between any neighboring states should be equal.** Rather challenging to do, but it has been proven to have the lowest variance path if you can pull it off.{{cite|Shenfeld2009}}\n", + "1. **Deleting or adding atoms should always be done with a soft core potential.**\n", + "1. ***Changes* in parameters can be done linearly.**\n", + "1. **All charge on atoms must be turned off prior to atomic repulsion.** Otherwise you can get an infinite attractive potential and crash your simulation. \n", + " * Similarly for only changes in terms, it's generally **more efficient to change electrostatic terms separate from Lennard-Jones terms.**\n", + "1. **More states is better than fewer.** Variance shrinks rapidly with number of states. You want the difference between intermediaries to be between 2-3 $k_BT$. Obviously you will be limited on CPU power. Fewer states also leads to more samples begin required from each state, so take this into account when deciding number of states as well. However, for MBAR and TI, it can be shown that spreading samples across multiple states does not significantly affect the uncertainty, since for TI, each state contributes less to the total uncertainty, and in MBAR, data contributes to the statistical precision of states with similar values of lambda.\n", + "1. **Shape of the variance does not significantly change with number of atoms, only magnitude.**{{cite|Shirts2003}} More intermediates will still be required for a large number of atoms to reduce statistical noise. \n", + "1. **Charge should be maintained across all $\\lambda$.** Simply *having* charged molecules is fine, but the net of the system should remain constant. If you must change the net charge, there are complicated ways to do so.{{cite|Kastenholz2006a}}{{cite|Kastenholz2006b}}\n", + "1. **Short prototype simulations are recommended.** Even as short as 100 ps, the prototypes can provide rough magnitude of variance estimates, although will likely under-predict the free energy as many configurations remain unsampled." + ] + }, + { + "cell_type": "markdown", + "id": "65208292-4aba-4796-8c3a-565b590ce399", + "metadata": {}, + "source": [ + "## Constructing Intermediate States\n", + "This section outlines some of the options one has when constructing their thermodynamic path through the intermediary states. Each method has its own strengths and weakness and should be carefully considered before implementing. Although there is no absolute best way to do this, there are a number of [[#Rules of Thumb for Constructing Intermediate States | best practices]] we strongly consider following, especially for those starting out in free energy calculations. In order to fully understand some of the choices in this section relative to the grand scheme of things, please read the [[Thermodynamic Cycle | thermodynamic cycle]] page.\n", + "\n", + "### Intermolecular Forces\n", + "When designing intermediate states, one must decide early on which forces to change, and how to change them. For intermolecular forces, both the electrostatic and Lennard-Jones interactions must be turned off, however, one **should not change all the forces at the same time.** If one were to turn off the repulsive Lennard-Jones components first, unlike charges on atoms would be infinitely attracted to each other and cause the simulation to crash. As such, one should perform one of the following sequences when *decoupling* atoms and/or molecules:\n", + "\n", + "**Decoupling Scheme A**\n", + "1. Disable all electrostatics linearly\n", + "1. Disable Lennard-Jones terms by soft core\n", + "\n", + "**Decoupling Scheme B**\n", + "1. Disable electrostatics AND dispersion (attractive) terms together linearly\n", + "1. Disable the repulsive components by soft core\n", + "\n", + "Either method is rather efficient{{cite|Wang2006}}{{cite|Steinbrecher2007}} and can be followed in reverse for appearing molecules. Although all the terms could be turned off by soft core at the same time to avoid negative infinities,{{cite|Wang1994}}{{cite|Blondel2004}} parametrization is a pain and hard to generalize.{{cite|Steinbrecher2011}}\n", + "\n", + "For those just starting, we highly recommend Decoupling Scheme A.\n", + "\n", + "### Topologies\n", + "```{figure} ../images/Both_topologies.png\n", + "---\n", + "width: 300px\n", + "name: bothTopologies\n", + "---\n", + "Single topology (A, upper) and dual topology (B, lower) approaches to constructing an alchemical path between ethane and ethanol. D represents non-interacting dummies, while M represents nonphysical intermediate atoms. In a dual topology approach, no atoms change type, only have their interactions turned off from the rest of the system; however, more atoms need to be altered to go from the initial to the final state. Image Source from a chapter by [http://www.faculty.virginia.edu/shirtsgroup/ Shirts] in Computational Drug Discovery and Design{{cite|Baron2012}}\n", + "```\n", + "\n", + "After the intermolecular forces are decided, you will then need to decide the overall method in which you will modify your molecules. There are two main approaches known as the *single topology* approach and the *dual topology* approach; examples of this can be seen in the figure to the side which will be referenced accordingly. Single topology (A, upper) has only one site for any location shared between the end states, and then \"dummy\" atoms to make up for any unique sites. During the transformation, the dummy atoms are transformed into fully interacting atoms, and the shared site atom is transformed directly to a new atom. Dual topology (B, lower) is different in that shared sites between states do **not** share atoms. No atom changes its type, just the interactions with the surrounding system.\n", + "\n", + "The dual topology does require more dummies, which means more CPU power and needed intermediates, however, it has a very strong advantage in that the dummies can simultaneously explore more congregational space while decoupled; this perk is amplified when simulations are run with Hamiltonian exchange or expanded ensemble. One may notice that, even though the dummies have nonbonded interactions, they are still \"bound\" to other atoms which make the end states have slightly nonphysical character. These interactions though can be accounted for by simulating in both vacuum and in molecular surrounds. With fixed bond lengths in the rigid rotor approximation, the effect of these dummies on the free energy is negated.{{cite|Boresch1999}} If the bonds are not constrained, there is a difference, but not enough to be of concern (>0.01 kcal/mol).\n", + "\n", + "In either topology, it may also be necessary to modify the bonded terms (especially for angle and dihedral terms). Because of the time scale of these motions, these terms converge quickly leaving one with small variances but large energy changes. For simple bonded parameters like harmonic spring constants and equilibrium bond lengths, linear changes are perfectly acceptable. *Constrained* bonds are challanging to compute as correction terms are needed from the complete lack of phase space overlap{{cite|Boresch2003}}\n", + "\n", + "Either topological approach will provide the same results and which one is used will often depend on the simulation package/code.\n", + "\n", + "### Alchemically Transforming Rings\n", + "Rings in free energy calculations are rather challenging as they prove the exception to the rule of \"disappear as few atoms as possible.\" The act of opening or closing a ring alchemically involves drastic changes to the phase space overlap which should be avoided if at all possible. As such, rings should be disappeared entirely and the new molecule appeared if you find the need to actually break the ring, and in reverse for appearing the ring. This has been shown to be straightforward with the soft core potentials and is still accurate despite the large number of transformations.{{cite|Shirts2005}}{{cite|Shirts2003}}{{cite|Mobley2007}}{{cite|Nicholls2008}}{{cite|Mobley2009}}\n", + "\n", + "### Pulling Methods\n", + "A rather different approach to the intermediate states is to set up a system where the ligand is physically pulled away from the protein. The two end states are then where the ligand is in its binding pocket (or nearby at least), and then another where the ligand is far enough away to be considered not interacting with the protein; the free energy in this system can be seen as free energy of binding. Carrying these out can be done with either [[Methods and Simulation Workflow|nonequilibrium methods]],{{cite|Ytreberg2009}} or by setting up an umbrella simulation with different overlapping harmonic oscillators at increasing distances from the binding pocket, then calculating the PMF.{{cite|Lee2006}}{{cite|Woo2005}}{{cite|Gan2008}}\n", + "\n", + "Although this may seem simple enough, there are a number of problems with pulling methods and so this method is not recommended for those just starting in free energy methods. That said, this method is still valid and has even been suggested to be very efficient for highly charged ligands.{{cite|Woo2005}}\n", + "\n", + "**Challenges for the pulling methods**\n", + "* Direct exit path for the ligand from the binding pocket may be difficult to identify. This results in large statistical error.\n", + "* Large box sizes are needed for this method to approximate the ligand and protein not interacting.\n", + "* If smaller boxes are required (limited computational resources), mean-field or analytical approximations must be made to estimate the \"infinite distance away\" ligand free energy." + ] + }, + { + "cell_type": "markdown", + "id": "c1550bbc-c82e-4f51-acf8-963a7497c469", + "metadata": {}, + "source": [ + "## References\n", + "\n", + "{{cite|Frenkel2002|Frenkel, D., & Smit, B. (2002). Understanding Molecular Simulation (2nd ed., p. 638). San Diego: Academic Press.}}\n", + "\n", + "{{cite|Beutler1994|Beutler, T., Mark, A., & Schaik, R. van. (1994). Avoiding singularities and numerical instabilities in free energy calculations based on molecular simulations. *Chemical Physics Letters*, 222(June), 529–539.}}\n", + "\n", + "{{cite|Mezei1986|Mezei, M. and Beveridge, D. L. (1986), Free Energy Simulations. Annals of the New York Academy of Sciences, 482: 1–23. doi: 10.1111/j.1749-6632.1986.tb20933.x}}\n", + "\n", + "{{cite|Mruzik1976| Mruzik, M., Abraham, F., Schreiber D., and Pound, G. M., A Monte Carlo study of ion--water clusters, J. Chem. Phys. 64, 481 (1976), DOI:10.1063/1.432264}}\n", + "\n", + "{{cite|Resat1993| Resat, H. and Mezei, M., Studies on free energy calculations. I. Thermodynamic integration using a polynomial path, J. Chem. Phys. 99, 6052 (1993), DOI:10.1063/1.465902}}\n", + "\n", + "{{cite|Zacharias1994|Zacharias, M., Straatsma, T. P., and McCammon, J. A. (1994) Separation-shifted scaling, a new scaling method for Lennard-Jones interactions in thermodynamic integration. *J. Phys. Chem.* 100, 9025–9031.}}\n", + "\n", + "{{cite|Pitera2002|Pitera, J. W., and van Gunsteren, W. F. (2002) A comparison of non-bonded scaling approaches for free energy calculations. *Mol. Simulat.* 28, 45–65.}}\n", + "\n", + "{{cite|Steinbrecher2007|Steinbrecher, T., Mobley, D. L., and Case, D. A. (2007) Nonlinear scaling schemes for Lennard-Jones interactions in free energy calculations. *J. Chem. Phys.* 127, 214108.}}\n", + "\n", + "{{cite|Pearlman1995|Pearlman, D. A., and Connelly, P. R. (1995) Determination of the differential effects of hydrogen bonding and water release on the binding of FK506 to native and TYR82 → PHE82 FKBP-12 proteins using free energy simulations. *J. Mol. Biol.* 248, 696–717.}}\n", + "\n", + "{{cite|Wang1994|Wang, L., and Hermans, J. (1994) Change of bond length in free-energy simulations: Algorithmic improvements, but when is it necessary? *J. Chem. Phys.* 100, 9129–9139.}}\n", + "\n", + "{{cite|Shirts2005|Shirts, M. R., and Pande, V. S. (2005) Solvation free energies of amino acid side chains for common molecular mechanics water models. *J. Chem. Phys.* 122, 134508.}}\n", + "\n", + "{{cite|Wang2006|Wang, J., Deng, Y., and Roux, B. (2006) Absolute Binding Free Energy Calculations Using Molecular Dynamics Simulations with Restraining Potentials. *Biophys. J.* 91, 2798–2814.}}\n", + "\n", + "{{cite|Pham2001|Pham, T. T., and Shirts, M. R. (2011) Identifying Low Variance Pathways for Free Energy Calculations of Molecular Transformations in Solution Phase. *J. Chem. Phys.* 135, 034114.}}\n", + "\n", + "{{cite|Boresch1996|Boresch, S., and Karplus, M. (1996) The Jacobian factor in free energy simulations. *J. Chem. Phys.* 105, 5145–5154.}}\n", + "\n", + "{{cite|Shenfeld2009|Shenfeld, D. K., Xu, H., Eastwood, M. P., Dror, R. O., and Shaw, D. E. (2009) Minimizing thermodynamic length to select intermediate states for free-energy calculations and replica-exchange simulations. *Phys. Rev. E* 80, 046705.}}\n", + "\n", + "{{cite|Shirts2003|Shirts, M. R., Pitera, J. W., Swope, W. C., and Pande, V. S. (2003) Extremely precise free energy calculations of amino acid side chain analogs: Comparison of common molecular mechanics force fields for proteins. *J. Chem. Phys.* 119, 5740–5761.}}\n", + "\n", + "{{cite|Kastenholz2006a|Kastenholz, M. A., and Hünenberger, P. H. (2006) Computation of methodology-independent ionic solvation free energies from molecular simulations. I. The electrostatic potential in molecular liquids. *J. Chem. Phys.* 124, 124106.}}\n", + "\n", + "{{cite|Kastenholz2006b|Kastenholz, M. A., and Hünenberger, P. H. (2006) Computation of methodology-independent ionic solvation free energies from molecular simulations. II. The hydration free energy of the sodium cation. *J. Chem. Phys.* 124, 224501.}}\n", + "\n", + "{{cite|Blondel2004|Blondel, A. (2004) Ensemble variance in free energy calculations by thermodynamic integration: theory, optimal Alchemical path, and practical solutions. *J. Comput. Chem.* 25, 985–993.}}\n", + "\n", + "{{cite|Steinbrecher2011|Steinbrecher, T., Joung, I., & Case, D. a. (2011). Soft-core potentials in thermodynamic integration: comparing one- and two-step transformations. *J. of Comp. Chem.*, 32(15), 3253–63. doi:10.1002/jcc.21909}}\n", + "\n", + "{{cite|Baron2012|Baron, R. (Ed.). (2012). Computational Drug Discovery and Design (p. 628). New York: Humana Press. doi:10.1007/978-1-61779-465-0}}\n", + "\n", + "{{cite|Boresch1999|Boresch, S., and Karplus, M. (1999) The Role of Bonded Terms in Free Energy Simulations. 2. Calculation of Their Influence on Free Energy differences of Solvation. *J. Phys. Chem.* A 103, 119–136.}}\n", + "\n", + "{{cite|Boresch2003|Boresch, S., Tettinger, F., Leitgeb, M., and Karplus, M. (2003) Absolute binding free energies: A quantitative approach for their calculation. *J. Phys. Chem.* A 107, 9535–9551.}}\n", + "\n", + "{{cite|Mobley2007|Mobley, D. L., Dumont, È., Chodera, J. D., and Dill, K. A. (2007) Comparison of charge models for fixed-charge force fields: Small-molecule hydration free energies in explicit solvent. *J. Phys. Chem.* B 111, 2242–2254.}}\n", + "\n", + "{{cite|Nicholls2008|Nicholls, A., Mobley, D. L., Guthrie, P. J., Chodera, J. D., and Pande, V. S. (2008) Predicting Small-Molecule Solvation Free Energies: An Informal Blind Test for Computational Chemistry. *J. Med. Chem.* 51, 769–779.}}\n", + "\n", + "{{cite|Mobley2009|Mobley, D. L., Bayly, C. I., Cooper, M. D., and Dill, K. A. (2009) Predictions of Hydration Free Energies from All-Atom Molecular Dynamics Simulations?a. *J. Phys. Chem.* B 113, 4533–4537.}}\n", + "\n", + "{{cite|Ytreberg2009|Ytreberg, F. (2009) Absolute FKBP binding affinities obtained via nonequilibrium unbinding simulations. *J. Chem. Phys.* 130, 164906.}}\n", + "\n", + "{{cite|Lee2006|Lee, M. S., and Olson, M. A. (2006) Calculation of Absolute Protein-Ligand Binding Affinity Using Path and Endpoint Approaches. *Biophys. J.* 90, 864–877.}}\n", + "\n", + "{{cite|Woo2005|Woo, H.-J., and Roux, B. (2005) Calculation of absolute protein-ligand binding free energy from computer simulation. *Proc. Natl. Acad. Sci.* 102, 6825–6830.}}\n", + "\n", + "{{cite|Gan2008|Gan, W., and Roux, B. (2008) Binding specificity of SH2 domains: Insight from free energy simulations. *Proteins* 74, 996–1007.}}\n", + "\n", + "{{cite|Anwar2005|Anwar, J. and Heyes, D. M., Robust and accurate method for free-energy calculation of charged molecular systems, *J. Chem. Phys.* 122, 224117 (2005), DOI:10.1063/1.1924449}}\n", + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/fundamentals/MBAR.ipynb b/alchemistry/fundamentals/MBAR.ipynb new file mode 100644 index 0000000..498b1da --- /dev/null +++ b/alchemistry/fundamentals/MBAR.ipynb @@ -0,0 +1,138 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0325809e-4725-46c9-b55e-d72fa467d43b", + "metadata": {}, + "source": [ + "(MBAR)=\n", + "# Multistate Bennett Acceptance Ratio" + ] + }, + { + "cell_type": "markdown", + "id": "5cb0c113-cee0-4a31-838d-12ef240546f4", + "metadata": {}, + "source": [ + "The Multistate Bennett Acceptance Ratio (MBAR){{cite|Shirts2008}} is a direct extension to [[Bennett Acceptance Ratio|BAR]] as it allows for assessing data from all states, and predicting the free energy at an unsampled state. MBAR reduces to BAR in the limit that only two states are sampled. This equation of free energy calculations can also be seen as a zero-width bin [[Weighted Histogram Analysis Method|WHAM]]. \n", + "\n", + "Much like WHAM, the free energies provided by this method are only a statistical estimator, however, MBAR has been shown to have the lowest variance estimator to date." + ] + }, + { + "cell_type": "markdown", + "id": "b8bd0716-2d6a-4e59-b961-f7ac99e15b37", + "metadata": {}, + "source": [ + "## Derivation\n", + "MBAR is derived from a set of $K \\times K$ weighting functions, $\\alpha_{i,j}(\\vec{q})$, that minimized the variance during the reweighting across the board. Starting from our [[Theoretic Principals#Core Free Energy Equation| core free energy equation]], we have\n", + "\n", + "$\\displaystyle\\Delta A_{ij} = -\\beta^{-1} \\ln\\frac{Q_j}{Q_i}$\n", + "\n", + "We can also manipulate the same identity Bennett used when starting his derivation{{Cite | Bennett1976}} and then a one extra bit to come up with the relation\n", + "\n", + "$Q_i\\left\\langle\\alpha_{ij}\\exp\\left(-\\beta U_j\\right)\\right\\rangle_i = Q_j\\left\\langle\\alpha_{ij}\\exp\\left(-\\beta U_i\\right)\\right\\rangle_j$\n", + "\n", + "Although this may not seem like much, it does allow us to write out the following:\n", + "\n", + "$\\displaystyle \\sum\\limits_{j=1}^K \\frac{\\hat{Q_i}}{N_i} \\sum\\limits_{n=1}^{N_i} \\alpha_{ij}\\exp\\left(-\\beta U_j(\\vec{q}_{i,n})\\right) = \\sum\\limits_{j=1}^K \\frac{\\hat{Q_j}}{N_j} \\sum\\limits_{n=1}^{N_j} \\alpha_{ij}\\exp\\left(-\\beta U_i(\\vec{q}_{j,n})\\right)$\n", + "\n", + "assuming we use the empirical estimator for the expectation values of $\\left\\langle g \\right\\rangle_i = N_i^{-1}\\sum_{n=1}^{N_i}g(\\vec{q}_{i,n})$\n", + "\n", + "Choosing the optimal $\\alpha_{ij}$ can be done by looking through the literature at extended bridge sampling.{{cite|Tan2004}} We then get an $\\alpha_{ij}$ of:\n", + "\n", + "$\\displaystyle \\alpha_{ij} = \\frac{N_j \\hat{c_j}^{-1}}{\\sum\\limits_{k=1}^K N_k \\hat{c_k}^{-1} \\exp\\left(-\\beta U_k \\right)}$.\n", + "\n", + "After making all the necessary substitutions, we can get an expression for an estimated free energy of:\n", + "\n", + "$\\displaystyle \\hat{A_i} = -\\beta^{-1} \\ln \\sum\\limits_{j=1}^K \\sum\\limits_{n=1}^{N_j} \\frac{\\exp\\left[-\\beta U_i\\right]}{\\sum_{k=1}^K N_k \\exp\\left[\\beta\\hat{A_k} - \\beta U_k\\right]}$.\n", + "\n", + "One of the first things you should notice is that we have a *single* free energy, not a difference. This is not a typo, but the free energies for a given set of states is only uniquely determined up to an additive constant. Because of this, one free energy must be taken in reference and thus we are once again calculating free energy differences.\n", + "\n", + "### Reduced potential \n", + "\n", + "It is important to note that the $U$ that appears in the MBAR derivation and equation is not only valid for potential energies, but any generalized/reduced potential as a function of pressure, volume, chemical potential, and number of particles. \n", + "For example, in a general form, we can take some subset of the additive terms in the following to define the reduced potential $u_i(x)$ for thermodynamic state $i$:\n", + "$\\displaystyle u_i(x) \\equiv \\beta_i [ U_i(x) + p_i V(x) + {\\bf \\mu}_i^\\mathrm{T} {\\bf N}(x) ]$\n", + "The reduced potential function is uniquely defined by some combination of thermodynamic parameters $\\beta_i$ denoting the inverse temperature, $U_i(x)$ denoting the potential energy function, $p_i$ denoting the pressure, and ${\\bf \\mu}_i$ denoting the chemical potential of one or more components of the system.\n", + "These latter two thermodynamic variables are conjugate to the box volume $V(x)$ and particle numbers ${\\bf N}(x)$.\n", + "Use of the reduced potential simplifies the MBAR equations and generalizes them to the computation of arbitrary reduced free energy differences $\\Delta f_{ij} \\equiv \\beta_j A_j - \\beta_i A_i$ among states.\n", + "$\\displaystyle \\hat{f_i} = -\\ln \\sum\\limits_{j=1}^K \\sum\\limits_{n=1}^{N_j} \\frac{\\exp\\left[-u_i(x_{nj})\\right]}{\\sum_{k=1}^K N_k \\exp\\left[\\hat{f_k} - u_k(x_{nj})\\right]}$.\n", + "\n", + "In the sum above $x_{nj}$ indicates the $n$th sample from the $j$th state. Interestingly, in this formula, it actually doesn't matter which sample comes from which state, so it can be rewritten as a sum over all $N = \\sum_{j=1}^K N_j$, so that we have:\n", + "$\\displaystyle \\hat{f_i} = -\\ln \\sum\\limits_{n=1}^{N} \\frac{\\exp\\left[-u_i(x_{n})\\right]}{\\sum_{k=1}^K N_k \\exp\\left[\\hat{f_k} - u_k(x_{n})\\right]}$\n", + "\n", + "Finally, one can rewrite this as:\n", + "$\\displaystyle \\hat{f_i} = -\\ln \\left\\langle \\frac{\\exp\\left[-u_i(x_{n})\\right]}{\\sum_{k=1}^K \\frac{N_k}{N} \\exp\\left[\\hat{f_k} - u_k(x_{n})\\right]}\\right\\rangle $\n", + "\n", + "Where the ensemble averaged over is the **mixture ensemble** that consist of samples taken from each state $i$ $\\frac{N_i}{N}$th of the time.\n", + "\n", + "Remember always **the free energy will change depending on which reduced potential you use,** so please take this into account when working with MBAR." + ] + }, + { + "cell_type": "markdown", + "id": "7d8bd0bf-4260-484f-9284-0071e2f55aca", + "metadata": {}, + "source": [ + "## Estimating Free Energies with MBAR\n", + "MBAR provides the direct equation to find free energies as show above assuming you have the energies. This can be rather difficult to implement for beginners since this does require iterative solutions like BAR (note: see the [[#Download MBAR|free, Python implementation]] below). Despite this, MBAR still has the lowest variance of all the other methods listed under the [[Theoretic Principals|theory]] section of the fundamentals. Further, MBAR has a direct way to calculate errors (see paper{{cite|Shirts2008}} for derivation).\n", + "\n", + "You will need a complete set of $\\Delta U_{k,j}$ for all $K$ states for MBAR to work, just like you do in [[Weighted Histogram Analysis Method|WHAM]]. Fortunately, you can do most of this in post processing if need be. Once again, you do not *need* to calculate $\\Delta U_{k,k}$, but it is still a good self check to ensure the reweighting method is working correctly; all the remaining checks for [[Weighted Histogram Analysis Method#Usage of WHAM|WHAM]] hold as well and should be followed when analyzing data." + ] + }, + { + "cell_type": "markdown", + "id": "3db87126-3d5b-4e63-8bc7-3685cac3187e", + "metadata": {}, + "source": [ + "## Download MBAR\n", + "MBAR may seem like a daunting set of equations to program yourself, so the authors have provided a Python implementation of MBAR for anyone to use, free of charge at [http://github.com/choderalab/pymbar http://github.com/choderalab/pymbar], with a number of examples for a variety of situations at [http://github.com/choderalab/pymbar-examples http://github.com/choderalab/pymbar-examples]The software comes with examples and uses cases. Also bundled with it are the tools to compute expectation values and an implementation of BAR." + ] + }, + { + "cell_type": "markdown", + "id": "05c51e5c-4adb-47f4-a7ac-e6ad9a525c9a", + "metadata": {}, + "source": [ + "## References\n", + "\n", + "{{cite|Shirts2008|Shirts, M. R., and Chodera, J. D. (2008) Statistically optimal analysis of samples from multiple equilibrium states. *J. Chem. Phys.* 129, 129105.|http://www.citeulike.org/user/jamesr/article/3360542}}\n", + "\n", + "{{Cite | Bennett1976 | Bennett, C. H. (1976) Efficient Estimation of Free Energy differences from Monte Carlo Data. *J. Comput. Phys.* 22, 245–268. | http://www.citeulike.org/group/14929/article/9052076}}\n", + "\n", + "{{cite|Tan2004|Tan, Z. (2004). On a Likelihood Approach for Monte Carlo Integration. *J. Am. Stat. Assoc.*, 99(468), 1027–1036.}}\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f7bf493-a014-474d-8cf7-d5959700b3ca", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/fundamentals/RunningSimulations.ipynb b/alchemistry/fundamentals/RunningSimulations.ipynb new file mode 100644 index 0000000..73d7362 --- /dev/null +++ b/alchemistry/fundamentals/RunningSimulations.ipynb @@ -0,0 +1,116 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f0dc2702-7e05-485b-a431-a7c5384116ed", + "metadata": {}, + "source": [ + "(Runnning_Sims)=\n", + "# Running Simulations at States of Interest" + ] + }, + { + "cell_type": "markdown", + "id": "52ffaf67-55ae-42b8-a718-c43fc7b10cc9", + "metadata": {}, + "source": [ + "This section should serve as a guide for the preparation of the simulation as a whole and data collection, not as an answer to the question of \"which simulation software should I use?\" So long as the software can handle free energy calculations (or you can make it), and collects the information the analysis method that you choose needs, then any software is acceptable. Some software may be more suited to certain methods than others, but analysis of this is beyond the scope of this page.\n", + "\n", + "There are three core things you need when running simulations (although more will be discussed below); those items are:\n", + "1. **Simulations must be run at equilibrium.** Special rules apply when running [[#Running Nonequilibrium Simulations| nonequilibrium simulations]].\n", + "1. **All samples must be collected from states of interest.**\n", + "1. **Samples used in the analysis must be uncorrelated and independent.**" + ] + }, + { + "cell_type": "markdown", + "id": "c7f774b4-dea1-490b-85d9-4b6efebc8f51", + "metadata": {}, + "source": [ + "## Running Simulations at Equilibrium\n", + "All simulations used for analysis with any of these methods should be done at equilibrium. This is true not only for the end points, but also for every [[Intermediate States|intermediate state]] you have. Because rare events are often given such a large importance in most simulation packages, running nonequilibrium simulations will give you the wrong results. There are some methods which require nonequilibrium conditions, but even those must start at equilibrium before perturbing the system. There are no hard and fast rules for determining exactly how long one needs to run before equilibration time, so you must use your expertise to ensure that the system is correctly equilibrated.\n", + "\n", + "It is actually rather difficult for your simulation to *start* in equilibrium, especially for explicit solvent systems where the solvent is automatically generated. Many simulation software packages actually run or recommend you run an energy minimization step prior to beginning to correct for possible overlapping molecules. These alone are not enough to sufficiently bring the system into equilibrium. The best way to do this is to actually run the simulation for a period of time and then simply exclude those samples from analysis. This equilibration **must be done at every intermediate** too.\n", + "\n", + "One efficient scheme to get close to equilibration at each lambda is to run short (10-100 ps) simulations at each state, then restart/begin collecting data with the configuration at the end of this time window. This will not guarantee a full relaxation of the system, but it does significantly help reduce potential instabilities in your simulation; full relaxation can take 100s to 1000s of ps or longer in some cases.{{cite|Fujitani2005}} As $\\lambda$ changes, the volume should be allowed to change as well so the solvent density of the system does not change as a result of state. Liquids are incompressible, which means that any small change in volume can have stark changes in the thermodynamic properties. Even when running at [[Theoretic_Principals#Nomenclature and Variables|NVT]] conditions, the average volume from an [[Theoretic_Principals#Nomenclature and Variables|NPT]] equilibration should be used as box fluctuations can cause free energy differences of 0.1-0.3 kcal/mol for typical small molecule solvation.\n", + "\n", + "Solvating small molecules may take 100-500ps, but this is a best case scenario. Large protein-ligand systems started out of equilibrium have very lengthy [[Simulation Information Gathering#Correlation| correlation times]] and may take hundreds of *nano*seconds to equilibrate. Things to watch for when checking equilibration are the system energy ($U$), {{#tag:math|\\left\\langle \\mathrm{d} U / \\mathrm{d} \\lambda \\right\\rangle}}, and the number of hydrogen bonds (for small molecules) for convergence; the hydrogen bonds may be slow to converge.{{cite|Smith2002}}" + ] + }, + { + "cell_type": "markdown", + "id": "12641f28-bd00-43e3-a9e3-3062da5b39bf", + "metadata": {}, + "source": [ + "### Choosing Simulation Parameters\n", + "Free energy calculations typically involve calculating free energy differences that are relatively small compared to the total potential energy of a system. As a consequence, certain simulation parameters which may be unimportant for “typical” molecular dynamics simulations (because they change the total potential energy by such a small percentage, for example) can be tremendously important in free energy calculations. Thus, it is important to think carefully about simulation settings used for free energy calculations. Here are some examples of issues we have found to be important:\n", + "\n", + "* PME parameters: When using lattice-sums (Ewald, PME, PPME, and variants) to handle long range electrostatics, setting details for these can substantially affect computed free energies. In particular, one needs to ensure that the settings chosen give electrostatic interaction energies accurate to very small fractions of a kcal/mol. If this condition is not met, computed free energies can be wrong – sometimes even by several kcal/mol. To test this, one can compute accurate electrostatics energies for a set of simulation snapshots by evaluating their energies using very long electrostatics cutoffs, then re-evaluate energies using shorter cutoffs and lattice-sum electrostatics. Settings for lattice-sum electrostatics and electrostatic cutoffs should be chosen so that the total potential energy from lattice sum is accurate (compared to that evaluated with a very long cutoff) to a very small fraction of a kcal/mol (less than the desired uncertainty in the computed free energies). Some work has found that default settings for some simulation packages may lead to errors in free energies of up to several kcal/mol [Mobley].\n", + "* Thermostat choice: The choice of thermostat (for constant temperature simulations) can be quite important, especially in absolute free energy calculations. Many thermostats perform well when a system has a sufficiently large number of degrees of freedom, but this is not an adequate criteria for absolute free energy calculations, as at the end state in these calculations, a portion of the system typically does not interact with its environment. This portion – i.e. a small molecule – may have comparatively few degrees of freedom. Hence, simulations done with thermostats that do not sample from the correct distribution of kinetic energies even in the limit of few degrees of freedom may exhibit problems such as non-ergodicity near these end states (Shirts, Mobley unpub, Villa and Mark 2001). Thus it is important to use thermostats (like Andersen’s thermostat or dynamics (like Langevin dynamics) which are known to sample from the correct ensemble under such circumstances.\n", + "\n", + "Of course, this is by no means an exhaustive list, and these issues may be simulation-package dependent. We encourage further exploration in this area, and suggest that future work with free energy calculations should at minimum perform the above checks. Preferably, users should get used to testing different settings in their simulation packages to ensure that these are set to obtain sufficient accuracy for free energy calculations. \n", + "\n", + "In addition to these issues which can be particularly important for free energy calculations, there are a lot of choices to be made in setting up your system for simulation that also need to be made in a sensible way. Other issues to consider include such as protonation states, parameters for proteins and small molecules, missing residues in protein structures, simulation box sizes, and so on.\n", + "\n", + "### Running Nonequilibrium Simulations\n", + "Although all the methods discussed here require systems to be at equilibrium, there are free energy methods where some thermodynamic variables change over time, and some amount of work, *W*, is required to make this change. In accordance with basic thermodynamic principals, if the change is done infinitely slowly, the process is considered reversible and the work will be equal to the free energy difference. Since we can't run for an infinite amount of time, *W* will not equal the free energy change and the process will not be reversible. Despite this, it is still possible to write out a free energy change in a formalism developed by Jarzynski{{cite|Jarzynski1997}} where the average over the nonequilibrium trajectories which *started from an equilibrium ensemble* can give you the free energy; The equation is,\n", + "\n", + "$\\Delta G = -\\beta^{-1}\\ln\\left\\langle\\exp\\left(-\\beta W\\right)\\right\\rangle_0$.\n", + "\n", + "If the switch is instantaneous, then this equation is reduced to [[Exponential Averaging|EXP]] as *W* reduces to $\\Delta U_{ij}$. Although this equation is rooted in EXP, it is possible to construct a variant of [[Bennett Acceptance Ratio|BAR]] to do nonequilibrium work, but not MBAR.{{cite|Crooks2000}}{{cite|Shirts2003}} There are a number of studies available for nonequalibrium methods, and it looks as though starting from an equilibrium ensemble is just as if not slightly more efficient than starting from nonequilibrium ensembles.{{cite|Oostenbrink2006}}{{cite|Oberhofer2005}}\n", + "\n", + "In general, it is not recommended for beginners to work with nonequilibrium simulations as there are a number of intricacies involved with setting them up. Even so, there is substantial work in developing this field as it may help improve the efficiency of free energy calculations in general.{{cite|Pohorille2010}}\n", + "\n", + "## Collecting from States of Interest\n", + "This may seem like an unnecessary point, but it is worth mentioning. The main purpose of stating this is to draw attention to the sensitivity of parameters you choose to define your \"end state\" at. If a given set of parameters is $\\lambda$ dependent, then the total change in free energy could well be very large. As such, one needs to ensure that the changes they are inducing to a molecule are as few as possible, or at the very least as efficient as possible.\n", + "\n", + "Take for instance treating your simulation's long range electrostatics with particle mesh Ewald (PME). Under non-alchemical transformations, there are several \"standard\" choices for the parameters which are perfectly acceptable for most simulations. However, if you use this \"standard\" set when modifying partial charges, you can get significant energy differences up to 4 kcal/mol on some molecules. This just shows one example of how parameters that are considered less important in regular simulations are now important in free energy simulations. The general rule to keep in mind for this is: **if the potential energy is changed by the change of a parameter, then dependence of free energy on this parameter should be checked.**\n", + "\n", + "## Independent and Uncorrelated Samples\n", + "The samples must be *independent*, meaning they are uncorrelated in time; this is a basic assumption of all the theories presented here in the fundamentals section. However, for all but the simplest of systems, completely independent samples can be very difficult to generate. For protein-ligand binding affinities, the time scale for some motions may be 100s of ns, meaning truly uncorrelated samples may be impossible to generate in a reasonable amount of time with today's simulation technology. In this case, free energy calculations *might* provide some useful information, but will only be approximations to the correct free energy for that model and cannot be considered reliable.\n", + "\n", + "There are methods for subsampling simulated states and finding correlation times to work around the independent sampling problem, but those have been relegated to [[Simulation Information Gathering#Correlation|their own section]].\n", + "\n", + "## References\n", + "\n", + "{{cite|Fujitani2005|Fujitani, H., Tanida, Y., Ito, M., Shirts, M. R., Jayachandran, G., Snow, C. D., Sorin, E. J., and Pande, V. S. (2005) Direct calculation of the binding free energies of FKBP ligands. *J. Chem. Phys.* 123, 084108.|http://www.citeulike.org/group/14929/article/9052204}}\n", + "\n", + "{{cite|Smith2002|Smith, L. J., Daura, X., and van Gunsteren, W. F. (2002) Assessing equilibration and convergence in biomolecular simulations. *Proteins: Struct., Funct., Bioinf.* 48, 487–496.| http://www.citeulike.org/group/14929/article/12473632}}\n", + "\n", + "{{cite|Jarzynski1997|Jarzynski, C. (1997) Nonequilibrium equality for free energy differences. *Phys. Rev. Lett* 78, 2690–2693.|http://www.citeulike.org/group/14929/article/9052290}}\n", + "\n", + "{{cite|Crooks2000|Crooks, G. E. (2000) Path-ensemble averages in systems driven far from equilibrium. *Phys. Rev. E* 61, 2361–2366.|http://www.citeulike.org/group/14929/article/9052148}}\n", + "\n", + "{{cite|Shirts2003|Shirts, M. R., Bair, E., Hooker, G., and Pande, V. S. (2003) Equilibrium free energies from nonequilibrium measurements using maximum-likelihood methods. *Phys. Rev. Lett 91*, 140601.|http://www.citeulike.org/group/14929/article/9052565}}\n", + "\n", + "{{cite|Oostenbrink2006|Oostenbrink, C., and van Gunsteren,W. F. (2006) Calculating zeros: Non-equilibrium free energy calculations. *Chem. Phys.* 323, 102–108.|http://www.citeulike.org/group/14929/article/9052473}}\n", + "\n", + "{{cite|Oberhofer2005|Oberhofer, H., Dellago, C., and Geissler, P. L. (2005) Biased Sampling of Nonequilibrium Trajectories: Can Fast Switching Simulations Outperform Conventional Free Energy Calculation Methods? *J. Phys. Chem. B* 109, 6902–6915.|http://www.citeulike.org/group/14929/article/9052461}}\n", + "\n", + "{{cite|Pohorille2010|Pohorille, A., Jarzynski, C., and Chipot, C. (2010) Good Practices in Free-Energy Calculations. *J. Phys. Chem. B* 114, 10235–10253.|http://www.citeulike.org/group/14929/article/7540599}}\n", + "" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/fundamentals/ThermodynamicCycle.ipynb b/alchemistry/fundamentals/ThermodynamicCycle.ipynb new file mode 100644 index 0000000..3f92527 --- /dev/null +++ b/alchemistry/fundamentals/ThermodynamicCycle.ipynb @@ -0,0 +1,97 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3dec7429-394d-491f-b9d6-52db96ad4645", + "metadata": {}, + "source": [ + "(The_Cycle)=\n", + "# Free Energy Simulations, Thermodynamic Cycles and Paths" + ] + }, + { + "cell_type": "markdown", + "id": "4ed7fc1d-1fd3-4f0c-be0a-5bf6319f89de", + "metadata": {}, + "source": [ + "In any free energy calculation, one must decide on what thermodynamic cycle they will define as well as the end states of their simulations. This is a very important step as it provides understanding of not only what the simulation is doing, but what you are calculating as well as clue you into what intrinsic errors you may encounter in your run." + ] + }, + { + "cell_type": "markdown", + "id": "e7c2600a-6fa3-4e83-9350-cd7e51789cfe", + "metadata": {}, + "source": [ + "## Choosing End States\n", + "```{figure} ../images/Transformation_small.png\n", + "---\n", + "width: 300px\n", + "name: transform\n", + "---\n", + "Three simple molecules that could serve as your end states for free energy differences\n", + "```\n", + "This is a straightforward enough concept. If you have two species or systems you wish to know the free energy difference of, you simply select the end states of the cycle as those two points. You are not limited to only two end states, but it becomes significantly more challenging to define a thermodynamic path connecting all the states as you increase in count. As a simple example of end states, consider the example shown on the left. Your end states could easily be any of the three molecules; benzene, phenol, or 1,4-dichlorobenzene; and your thermodynamic path would be one that linked two or all three together.\n", + "\n", + "Although this may seem like a trivially easy task, it is an important one as the thermodynamic path you define will strongly depend on these. It is recommended for beginners to start with only pairs of free energy transformations instead of trying to estimate many end states at once; e.g. the sequence of benzene → 1,4-dichlorobenzene, benzene → phenol, then phenol → 1,4-dichlorobenzene (although this could be calculated from the first two).\n", + "\n", + "It is very rare for researches to only need the end states to simulate in free energy calculations, so numerous [[Intermediate States| intermediate states]] and an efficient thermodynamic path are needed. Because free energy is a state function, and because these are simulations, it's often better to select the most efficient or convenient path instead of the most physical path." + ] + }, + { + "cell_type": "markdown", + "id": "e855b843-40bb-4232-8b1f-2069c8177eb1", + "metadata": {}, + "source": [ + "## Constructing a Thermodynamic Path\n", + "```{figure} ../images/Bind_example.png\n", + "---\n", + "width: 400px\n", + "name: path\n", + "---\n", + "Example of using a thermodynamic path to find which molecule has a stronger binding affinity. We could either remove both molecules from the pocket which is time prohibitive, or we could simply transform one molecule to another both in and out of pocket to find the difference that way.\n", + "```\n", + "\n", + "The best way to show how efficient thermodynamic paths can make simulations quicker is with an illustrative example shown on the right. The situation is that we wish to find which ligand, *A* or *B* has a stronger binding affinity to our target protein. One rather physical way would be to simulate each ligand inside the pocket (left side), and have each ligand leave the pocket and move far enough away from the protein to be considered not interacting (right side). Constructing the intermediate states for this could include applying a bias to the ligand or making a large enough box for this to evolve on its own. There are several flaws with this though as it may take an incredibly long time for the system to evolve on its own, not to mention the large resources needed for an appropriate size box to simulate this.\n", + "\n", + "A more efficient way is to simulate the transformation of *A* to *B* both in and out of the pocket in separate simulations and find the change in binding free energies that way by the equation\n", + "\n", + "$\\Delta \\Delta A_{\\mathrm{bind}} = \\Delta A_{\\mathrm{bind}}^B - \\Delta A_{\\mathrm{bind}}^A = \\Delta A_{A\\rightarrow B}^{\\mathrm{bound}} - \\Delta A_{A\\rightarrow B}^{\\mathrm{unbound}}$\n", + "\n", + "which does not require an excessively large box and can be done relatively simply by choosing the unphysical path of transforming the ligands. How you go about transforming molecules is covered in the [[Intermediate_States#Constructing_Intermediate_States|constructing intermediate states]] article.\n", + "\n", + "Because we are now taking a nonphysical path, we must be even more judicious with our verifications. For instance, if you are running a constant volume and temperature simulation, your end states should have the same box volume and temperatures, even if you alchemically changed molecules. Despite the fact that we cannot replicate the thermodynamic path experimentally, there are still restrictions and rules we must abide by to ensure our results are valid.\n", + "\n", + "One of the easiest errors to make when beginning free energy simulations is to turn off all molecular interactions if you are removing an atom or molecule and assume this result is the free energy difference. Remember that our free energy difference is the system with the molecule present, and the system with the molecule existing on its own, so *intramolecular* interactions should have remained on. If you turn off all interactions, then either a second simulation or a correction will be needed to give you the correct results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "222dcb71-4aed-417c-8b71-7f75bc55882f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/fundamentals/ThermodynamicIntegration.ipynb b/alchemistry/fundamentals/ThermodynamicIntegration.ipynb new file mode 100644 index 0000000..13202e9 --- /dev/null +++ b/alchemistry/fundamentals/ThermodynamicIntegration.ipynb @@ -0,0 +1,162 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c990c90d-c854-47a9-b8d2-66b508d732f7", + "metadata": {}, + "source": [ + "(TI)=\n", + "# Thermodyanmic Integration" + ] + }, + { + "cell_type": "markdown", + "id": "b259b838-e677-4bb2-aae7-1c174d4af7e5", + "metadata": {}, + "source": [ + "Thermodynamic Integration (TI) is one of the most common methods for calculating free energy differences, and the easiest to understand. The basic relationship can be calculated by taking the derivative of the free energy difference with respect to $\\lambda$. It is one of the few methods that require calculation of $\\frac{\\partial U(\\lambda,\\vec{q})}{\\partial\\lambda}$. The need to calculate the derivative is also one of its limitations as many simulation packages will not calculate this natively, and can cause problems when numerically evaluating it at $r = 0$. Despite this, it is still one of the most accurate methods if used correctly and it is generally recommended for those new to the field because of its simplicity and ease of use if available in your code of interest." + ] + }, + { + "cell_type": "markdown", + "id": "8465ad70-6c35-434e-98f1-0ffce2aafc39", + "metadata": {}, + "source": [ + "## Derivation\n", + "Starting with the [[Theoretic Principals#Core Free Energy Equation | identity of free energy]]\n", + "\n", + "$\\displaystyle A = -\\beta^{-1} \\ln Q$\n", + "\n", + "taking the derivative with respect to $\\lambda$ yields\n", + "\n", + "$\\displaystyle dA/d\\lambda = -\\beta^{-1}\\frac{d}{d\\lambda} \\ln \\int e^{-\\beta U(\\lambda,\\vec{q})}d\\vec{q} = -\\beta^{-1} \\frac{\\frac{d}{d\\lambda}\\int e^{-\\beta U(\\lambda,\\vec{q})}d\\vec{q}}{Q}$\n", + "\n", + "which can then be written as\n", + "\n", + "$\\displaystyle dA/d\\lambda = -\\beta^{-1}\\frac{-\\beta\\int \\frac{dU(\\lambda,\\vec{q})}{d\\lambda} e^{-\\beta U(\\lambda,\\vec{q})}d\\vec{q}}{Q} = \\left\\langle \\frac{dU(\\lambda,\\vec{q})}{d\\lambda}\\right\\rangle_\\lambda $.\n", + "\n", + "Finally, one can do integration over the whole range of $\\lambda$ to get the final TI equation\n", + "\n", + "$\\displaystyle \\Delta A = \\int_0^1 \\left\\langle \\frac{dU(\\lambda,\\vec{q})}{d\\lambda}\\right\\rangle_\\lambda d\\lambda$." + ] + }, + { + "cell_type": "markdown", + "id": "09e1e73d-01b9-48e4-aef6-463546fe4f0d", + "metadata": {}, + "source": [ + "## Estimating Free Energies with TI\n", + "The above derivation makes it rather simple to estimate free energies from TI as there is no iterative solution needed, and information from only a single state is needed to calculate the derivative. Since we can only perform simulations at a number of $\\lambda$ states, numeric integration schemes are required. \n", + "\n", + "All numeric integration schemes have the form\n", + "\n", + "$\\displaystyle \\Delta A \\approx \\sum_{k=1}^K w_k \\left\\langle \\frac{dU(\\lambda,\\vec{q})}{d\\lambda}\\right\\rangle_k $\n", + "\n", + "where the weights, $w_k$ will depend on which numeric integration style is chosen. We recommend starting users in free energy calculations use the trapezoid rule as it is very straightforward and maximizes flexibility in choice of $\\lambda$ spacing. Under the trapezoid rule, even lambda spacing weights are $\\displaystyle w_1 = w_k = 1/[2(K-1)]$ and $w_{k \\ne 1,K} = 1/(K-1)$. Other weighting schemes have also been tried but are not recommended for beginners as they each have their own benefits and disadvantages and are often system specific. {{Cite|Jorge2010}}{{Cite|Shyu2009}} The worst case scenario is that you have to run a few extra intermediate states to get accurate results." + ] + }, + { + "cell_type": "markdown", + "id": "6093f5a4-c9b4-4ec8-9fcb-267da78edd0a", + "metadata": {}, + "source": [ + "### Calculating the Statistical Error in TI ##\n", + "The statistical error TI is fairly straightforward to calculate, but has one catch beginners should be aware of. Although the information required to calculate $\\left\\langle \\frac{dU}{d\\lambda} \\right\\rangle$ requires information from only the one state, to calculate the free energy over single interval takes values from two or more states at a time, which means that the statistical error over the entire interval does **not add independently.**\n", + "\n", + "Let's look at this with all the mathematical details. The total statistical error is the square root of the variance. Since each of the averages is generated from different simulations, the total variance for TI over the entire interval is a weighted sum of the variances:\n", + "\n", + "$\\mathrm{var}\\left(\\Delta A\\right) = \\sum_{k=1}^{K}w_k^2 \\mathrm{var}\\left(\\frac{dU}{d\\lambda}\\right)_k $.\n", + "\n", + "where the weights are the weights determined by the numerical integration method. To illustrate how this is different from independent addition of the errors or free energies of each interval together, consider the trapezoid rule example. Taking into account the *correct* equation the variance would be\n", + "$\\mathrm{var}\\left(\\Delta A_{1,K}\\right) = \\frac{1}{4}\\mathrm{var}\\left(\\frac{dU}{d\\lambda}\\right)_1 + \\mathrm{var}\\left(\\frac{dU}{d\\lambda}\\right)_2 + \\cdots + \\mathrm{var}\\left(\\frac{dU}{d\\lambda}\\right)_{K-1} +\\frac{1}{4}\\mathrm{var}\\left(\\frac{dU}{d\\lambda}\\right)_K $.\n", + "\n", + "Compare this to the *incorrect* method shown below.\n", + "$\\mathrm{var}\\left(\\Delta A_{i,i+1}\\right) = \\frac{1}{4}\\mathrm{var}\\left(\\frac{dU}{d\\lambda}\\right)_i + \\frac{1}{4}\\mathrm{var}\\left(\\frac{dU}{d\\lambda}\\right)_{i+1}$\n", + "\n", + "$\n", + "\\begin{alignat}{2}\n", + "\\mathrm{badvar}\\left(\\Delta A_{1,K}\\right) &=\\sum_{i=1}^{K-1}\\mathrm{var}\\left(\\Delta A_{i,i+1}\\right) \\\\\n", + " &= \\frac{1}{4}\\mathrm{var}\\left(\\frac{dU}{d\\lambda}\\right)_1 + \\frac{1}{2}\\mathrm{var}\\left(\\frac{dU}{d\\lambda}\\right)_2 + \\cdots + \\frac{1}{2}\\mathrm{var}\\left(\\frac{dU}{d\\lambda}\\right)_{K-1} +\\frac{1}{4}\\mathrm{var}\\left(\\frac{dU}{d\\lambda}\\right)_K \\\\\n", + "\\end{alignat}\n", + "$\n", + "\n", + "The second set of equations is clearly quite different from the correct first set. \n", + "\n", + "To get the correct statistical error, each individual average should be multiplied by $\\sqrt{2\\tau}$ to correct for the [[Simulation Information Gathering#Correlation Time | correlation time]] at each state; error is then $\\sqrt{\\mathrm{var}\\left(\\Delta A_{i,K}\\right)}$." + ] + }, + { + "cell_type": "markdown", + "id": "c659e2dc-0956-49ab-af94-4a7894e2efc5", + "metadata": {}, + "source": [ + "## Problems with TI\n", + "Although TI is one of the simplest free energy methods to analyze, it also suffers from some drawbacks that need to be carefully avoided. For instance, if the curvature of $\\frac{dU}{d\\lambda}$ is large, the bias introduced by discrete $\\lambda$ states becomes significant. So when using TI it is very important that researchers verify that they have gathered data from sufficient numbers of states, such that the free energy becomes independent of the number of states to within statistical precision." + ] + }, + { + "cell_type": "markdown", + "id": "ab445da0-9246-4c2d-bc91-7e5cf8c39dab", + "metadata": {}, + "source": [ + "### Infinite $dU/d\\lambda$\n", + "One of the largest problems with TI is evaluation of {{#tag:math| {{dudl}} }} at $r = 0$ when standard Lennard-Jones potentials are used. The simplest [[Intermediate States#Linear Alchemical Potential| thermodynamic pathway]] one can chose is the linear pathway of\n", + "\n", + "$U(\\lambda,\\vec{q}) = (1-\\lambda)U_0(\\vec{q}) + \\lambda U_1(\\vec{q}) + U_{unaffected}(\\vec{q})$\n", + "\n", + "which is acceptable for changes in parameters, but not for annihilating or decoupling a site because of the $r^{-12}$ term in the Lennard-Jones potentials in $U_i$. The linear transformation will always have an infinite potential at $r=0$ leading to numeric instabilities for evaluating {{#tag:math | {{dudl}} }} in TI. Although there are ways to get around this, they do not converge very well with any function of $\\lambda$ taking the form shown above. However, if one were to use a [[Intermediate States#Soft Core Potentials | soft core potential]], this problem can be mostly avoided." + ] + }, + { + "cell_type": "markdown", + "id": "b93ab9c0-f5f2-4493-8a17-545fb01b099e", + "metadata": {}, + "source": [ + "### Modifying Simulations\n", + "Because most other free energy methods do not need to evaluate {{#tag:math | {{dudl}} }}, many simulation codes do not natively support evaluating this quantity. If the thermodynamic path is constructed with a [[Intermediate States|linear transformation]], then the derivative can be evaluated in post-processing knowing the energy of the system. However, if the transformation is done with [[Intermediate States#Soft Core Potentials | soft core potentials]], then the derivative will need to be evaluated in code, and it will often be necessary for researchers to modify the code in-house as many simulation packages do not evaluate this quantity. It would probably better to use another method if the derivative is not explicitly calculated." + ] + }, + { + "cell_type": "markdown", + "id": "34d67def-07d6-48a0-ad8b-6f8d34571c26", + "metadata": {}, + "source": [ + "## References\n", + "\n", + "{{Cite|Jorge2010|Jorge, M., Garrido, N., Queimada, A., Economou, I., and Macedo, E. (2010) Effect of the Integration Method on the Accuracy and Computational Efficiency of Free Energy Calculations Using Thermodynamic Integration. *J. Chem. Theo. Comp.* 6, 1018–1027.}}\n", + "\n", + "{{Cite|Shyu2009|Shyu, C., and Ytreberg, F. M. (2009) Reducing the bias and uncertainty of free energy estimates by using regression to fit thermodynamic integration data. *J. Comp. Chem.* 30, 2297–2304.}}\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0af67dc4-63ae-47ff-8167-84366b2d1abc", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/fundamentals/WHAM.ipynb b/alchemistry/fundamentals/WHAM.ipynb new file mode 100644 index 0000000..a345d2a --- /dev/null +++ b/alchemistry/fundamentals/WHAM.ipynb @@ -0,0 +1,150 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "53d2152d-6f2b-48e8-923f-dddea073f032", + "metadata": {}, + "source": [ + "(WHAM)=\n", + "# Weighted Histogram Analysis Method" + ] + }, + { + "cell_type": "markdown", + "id": "56556316-614a-4af1-8928-52e3441b0dfd", + "metadata": {}, + "source": [ + "The Weighted Histogram Analysis Method (WHAM) is one of the earliest methods that take into account information from all [[Intermediate States]]. By analyzing all the information at once, we can reduce the number of cycles and loops me must run through, improving efficiency. The precursor to WHAM and first version of multiple histogram relighting techniques came from Ferrenberg and Swendsen;{{cite|Ferrenberg1989}} WHAM was developed later for alchemical simulations{{cite|Kumar1992}}.\n", + "\n", + "WHAM works on the principal that if you have a discrete number of states, you can create a histogram with discrete bins that provide you a relative probability of observing the states of interest, assuming you create the bins along whatever reaction path you have selected. In our cases, the reaction path is the alchemical variable. From these probabilities, you can calculate free energies and other observables." + ] + }, + { + "cell_type": "markdown", + "id": "4c0c37be-e767-442b-a6d0-338f708a125d", + "metadata": {}, + "source": [ + "## Derivation\n", + "WHAM's derivation{{cite|Kumar1992}} is best done by starting from visualizing the problem. If you are given $K$ number of states, a number of run simulations, $S$, $N_i$ samples from each simulation, and then $K$ free energies to calculate: $A_1,\\,A_2,\\,\\cdots A_K$; we now wish to assign the reweighed potentials into bins, $B$. Terms common to [[Definitions | this site's definitions page]] are preserved and while new terms are explained in context. Consider the [[Theoretic Principals#Core Free Energy Equation|core free energy difference]] equation between two states $i$ and $j$ of\n", + "\n", + "$\\displaystyle \\Delta A_{ij} = -k_B T \\ln \\frac{Q_j}{Q_i}$\n", + "\n", + "Instead of solving this directly, we will look at the probability density, which is the ratio between the two partition functions (i.e. ignore the $-k_B T \\ln$ part). Recall that the partition function of any state is often written as\n", + "\n", + "$\\displaystyle Q_i = \\int_{V_i} \\Omega_i(U) \\exp\\left(-\\beta U \\right) dU$,\n", + "$\\displaystyle Q_j = \\int_{V_j} \\Omega_j(U) \\exp\\left(-\\beta U \\right) dU$\n", + "\n", + "Where we have defined an unknown density of states for each of the thermodynamic states we are simulating, $\\Omega_i$.\n", + "\n", + "However, we could generalize to ''arbitrary'' integration coordinates, not just energy, which gives us:\n", + "\n", + "$\\displaystyle Q_i = \\int_{V_i} \\Omega_i(\\vec{q}) \\exp\\left(-\\beta U(\\vec{q}) \\right) d\\vec{q}$,\n", + "$\\displaystyle Q_j = \\int_{V_j} \\Omega_j(\\vec{q}) \\exp\\left(-\\beta U(\\vec{q}) \\right) d\\vec{q}$\n", + "\n", + "$\\vec{q}$ could be coordinates, in which case $\\Omega_i(\\vec{q}) = 1 $, but more usually it has only one or two dimensions.\n", + "\n", + "If we assume discrete states, with counts in each bin $B$, we can write the density of states out for a given simulation, $i$ as:\n", + "\n", + "$\\displaystyle \\Omega_i(\\vec{q},\\lambda_i) = B_i(\\vec{q},\\lambda_i)\\exp\\left[\\left(\\sum_{j=0}^K \\beta_i \\lambda_{j,i} U(\\vec{q}_j) \\right)-\\beta_i A_i\\right]$\n", + "\n", + "where the set of states available during simulation $i$ is $\\left\\{\\lambda\\right\\}_i = \\left\\{\\lambda_1, \\lambda_2, \\dots \\lambda_K \\right\\}_i = \\left\\{\\lambda_{1,i}, \\lambda_{2,i}, \\dots \\lambda_{K,i} \\right\\} $ and $B_i$ is the value of the histogram bin $i$ evaluated at $\\vec{q}$ and $\\lambda_i$. The best estimate for the density of states is then\n", + "\n", + "$\\displaystyle \\sum_{i=1}^S \\omega_i(\\vec{q})\\Omega_i(\\vec{q},\\lambda_i)$\n", + "$\\displaystyle \\sum_{i=1}^S \\omega_i(\\vec{q})=1$\n", + "\n", + "The best estimators for $\\omega_i$ are the ones that minimize the statistical noise. It turns out then, that the best estimator is the one that minimize the error of $B_i$ for all $i$. The error of any individual $B_i$ is then\n", + "\n", + "$\\delta^2 B_i = g_i\\left\\langle B_i \\right\\rangle$\n", + "\n", + "where $g_i = 1+2\\tau_i$ and $\\tau_i$ is the [[Simulation Information Gathering#Autocorrelation time|correlation time]] of a given simulation. The best estimator for the expectation of the bin value is then\n", + "\n", + "$\\displaystyle \\hat{\\left\\langle B_i \\right\\rangle} = N_i\\Omega\\exp\\left(\\beta_i A_i - \\beta\\sum_{j=0}^K \\lambda_{j,i} U(\\vec{q}_j) \\right) $.\n", + "\n", + "Please note that $\\Omega$ does ''not have a subscript'' in the previous equation, as it is now the density of states determined from all of the simulations.\n", + "\n", + "From here you can just substitute back in to estimate $/Q_i$ to get the final WHAM equation of\n", + "\n", + "$\\displaystyle \\hat{Q_i} = \\sum_{j=1}^K \\frac{\\sum\\limits_{x=1}^S g_x^{-1} B_x(\\vec{q},\\lambda_j) \\exp\\left[-\\beta\\lambda_0\\vec{q}_0 - \\beta\\sum\\limits_{l=1}^K \\lambda_l U_l(\\vec{q}) \\right]}{\\sum\\limits_{y=1}^S g_y^{-1} N_y \\exp\\left[\\beta A_y - \\beta\\lambda_0\\vec{q}_0 - \\beta\\sum\\limits_{m=1}^K \\lambda_{m,y} U_m(\\vec{q}) \\right]}$\n", + "\n", + "where the nought subscript indicates the conditions at the unmodified state. It is then trivial to get a free energy of this state. This is only a relative free energy though as the system must be self-consistently solved. It is common to set one of the states to a free energy of zero so you can calculate [[Free Energy Fundamentals#Facts from the Free Energy Difference Definition | differences in free energy]] between states.\n", + "\n", + "### Zero Width Bins\n", + "It is possible to take the bin width to zero in the WHAM formula. Although this is a limiting case, it does clean up the equation a bit and looks like\n", + "\n", + "$\\displaystyle \\hat{A_i} = - \\beta^{-1}\\ln \\sum_{k=1}^K \\sum_{n=1}^{N_k} \\frac{ \\exp[-\\beta U_i(\\vec{q}_{kn})]}{\\sum\\limits_{k'=1}^K N_{k'} \\, \\exp[\\beta A_{k'} - \\beta U_{k'}(\\vec{q}_{kn})]}$\n", + "\n", + "which turns out to be the exact equation for [[Multistate Bennett Acceptance Ratio|MBAR]].{{cite|Shrits2008|Shirts, M. R., and Chodera, J. D. (2008) Statistically optimal analysis of samples from multiple equilibrium states. ''J. Chem. Phys.'' 129, 129105.|http://www.citeulike.org/user/jamesr/article/3360542}}" + ] + }, + { + "cell_type": "markdown", + "id": "82fc6382-1a57-4d7c-bd1d-ee688fea23df", + "metadata": {}, + "source": [ + "## Usage of WHAM\n", + "Given a particular implementation of WHAM ([[#Downloading WHAM|see below]]), one may be tempted just to let the analysis give you a black-box result. You should always understand what is happening and so the results should not be taken on blind faith alone.\n", + "\n", + "Since WHAM collects data from all states, you will need to calculate $\\Delta U_{k,j}(\\vec{q})$ for all $ j=1,2,\\cdots,K$ states. Even though $\\Delta U_{k,k}(\\vec{q})$ does not ''need'' to be calculated since it ''should'' be zero, it is highly recommended that you do. This lets you check to see if the re-evaluation function is working as intended, and help you identify possible errors. Although this does not tell you what the error is, it will at least tell you there is one. One possible source of error is the output of your coordinate files should be higher accuracy than a standard PDB files, $10^5 $Å may be sufficient but binary format coordinates are preferred.\n", + "\n", + "Because you are applying a discrete set and finite number of bins, there is a bias introduced since all variables must fall into the bins. This is the predominate problem with WHAM and everything that comes with it, so exercise caution. Error estimates are not directly available for WHAM, and so methods such as [[Analyzing Simulation Results#Bootstrap Method|bootstrap sampling]] are required." + ] + }, + { + "cell_type": "markdown", + "id": "32979bd6-25b0-4587-9bf1-e9238f3b97a9", + "metadata": {}, + "source": [ + "## Downloading WHAM\n", + "Because WHAM requires solving sets of non-linear equations, it is not recommended for beginners to write their own. Fortunately, many simulation packages, such as GROMACS and CHARMM already include WHAM driven free energy calculations.{{cite|Hub2010}}{{cite|Souaille2001}}{{cite|Wang2006}} There are also [http://membrane.urmc.rochester.edu/content/wham | also other standalone packages available] and so there should not be a need to write your own." + ] + }, + { + "cell_type": "markdown", + "id": "da9690d3-61d0-40a1-8914-3892adf9230d", + "metadata": {}, + "source": [ + "## References\n", + "\n", + "{{cite|Ferrenberg1989|Ferrenberg, A. M., and Swendsen, R. H. (1989) Optimized Monte Carlo Data Analysis. ''Phys. Rev. Lett'' 63, 1195–1198.|http://www.citeulike.org/user/dgront/article/774372}}\n", + "\n", + "{{cite|Kumar1992|Kumar, S., Bouzida, D., Swendsen, R. H., Kollman, P. A., and Rosenberg, J. M. (1992) The weighted histogram analysis method for free-energy calculations on biomolecules. I. The method. ''J. Comput. Chem.'' 13, 1011–1021.|http://www.citeulike.org/user/dgront/article/774373}}\n", + "\n", + "{{cite|Hub2010|Hub, J. S., de Groot, B. L., and van der Spoel, D. (2010) g_wham–A Free Weighted Histogram Analysis Implementation Including Robust Error and Autocorrelation Estimates. ''J. Chem. Theo. Comput.'' 6, 3713–3720.|http://www.citeulike.org/user/agrossfield/article/8443854}}\n", + "\n", + "{{cite|Souaille2001|Souaille, M., and Roux, B. (2001) Extension to the weighted histogram analysis method: combining umbrella sampling with free energy calculations. ''Comput. Phys. Commun.'' 135, 40–57.|http://www.citeulike.org/user/girayenkavi/article/2910539}}\n", + "\n", + "{{cite|Wang2006|Wang, J., Deng, Y., and Roux, B. (2006) Absolute Binding Free Energy Calculations Using Molecular Dynamics Simulations with Restraining Potentials. ''Biophys. J.'' 91, 2798–2814.|http://www.citeulike.org/user/kupopo/article/867499}}\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43f7a6f4-3e07-48b7-b922-f1dcb19113f2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/alchemistry/images/.DS_Store b/alchemistry/images/.DS_Store new file mode 100644 index 0000000..17004e6 Binary files /dev/null and b/alchemistry/images/.DS_Store differ diff --git a/assets/images/Alchemistry.jpg b/alchemistry/images/Alchemistry.jpg similarity index 100% rename from assets/images/Alchemistry.jpg rename to alchemistry/images/Alchemistry.jpg diff --git a/alchemistry/images/Bind_example.png b/alchemistry/images/Bind_example.png new file mode 100644 index 0000000..ea309ff Binary files /dev/null and b/alchemistry/images/Bind_example.png differ diff --git a/alchemistry/images/Bootstrap_Comparison.png b/alchemistry/images/Bootstrap_Comparison.png new file mode 100644 index 0000000..bf02eaa Binary files /dev/null and b/alchemistry/images/Bootstrap_Comparison.png differ diff --git a/alchemistry/images/Both_topologies.png b/alchemistry/images/Both_topologies.png new file mode 100644 index 0000000..e687302 Binary files /dev/null and b/alchemistry/images/Both_topologies.png differ diff --git a/alchemistry/images/Linear_LJ_transformation.png b/alchemistry/images/Linear_LJ_transformation.png new file mode 100644 index 0000000..438b1eb Binary files /dev/null and b/alchemistry/images/Linear_LJ_transformation.png differ diff --git a/assets/images/Transformation_small.png b/alchemistry/images/Transformation_small.png similarity index 100% rename from assets/images/Transformation_small.png rename to alchemistry/images/Transformation_small.png diff --git a/assets/img/favicons/favicon.ico b/alchemistry/images/alchemistry-favicon.ico similarity index 100% rename from assets/img/favicons/favicon.ico rename to alchemistry/images/alchemistry-favicon.ico diff --git a/alchemistry/images/brands/Acellera_TX_logo_300.png b/alchemistry/images/brands/Acellera_TX_logo_300.png new file mode 100644 index 0000000..40d22c8 Binary files /dev/null and b/alchemistry/images/brands/Acellera_TX_logo_300.png differ diff --git a/alchemistry/images/brands/Helix-blue.jpeg b/alchemistry/images/brands/Helix-blue.jpeg new file mode 100644 index 0000000..76c76ad Binary files /dev/null and b/alchemistry/images/brands/Helix-blue.jpeg differ diff --git a/alchemistry/images/events/.DS_Store b/alchemistry/images/events/.DS_Store new file mode 100644 index 0000000..fbf2099 Binary files /dev/null and b/alchemistry/images/events/.DS_Store differ diff --git a/alchemistry/images/events/2024WorkFEDD/Day1.png b/alchemistry/images/events/2024WorkFEDD/Day1.png new file mode 100644 index 0000000..83d4c25 Binary files /dev/null and b/alchemistry/images/events/2024WorkFEDD/Day1.png differ diff --git a/alchemistry/images/events/2024WorkFEDD/Day2.png b/alchemistry/images/events/2024WorkFEDD/Day2.png new file mode 100644 index 0000000..810c324 Binary files /dev/null and b/alchemistry/images/events/2024WorkFEDD/Day2.png differ diff --git a/alchemistry/images/events/2024WorkFEDD/Scheltema.jpeg b/alchemistry/images/events/2024WorkFEDD/Scheltema.jpeg new file mode 100644 index 0000000..68ab5da Binary files /dev/null and b/alchemistry/images/events/2024WorkFEDD/Scheltema.jpeg differ diff --git a/assets/images/staurosporine-hydrated-1.png b/alchemistry/images/staurosporine-hydrated-1.png similarity index 100% rename from assets/images/staurosporine-hydrated-1.png rename to alchemistry/images/staurosporine-hydrated-1.png diff --git a/alchemistry/references.bib b/alchemistry/references.bib new file mode 100644 index 0000000..25f5c78 --- /dev/null +++ b/alchemistry/references.bib @@ -0,0 +1,52 @@ +@inproceedings{holdgraf_evidence_2014, + address = {Brisbane, Australia, Australia}, + title = {Evidence for {Predictive} {Coding} in {Human} {Auditory} {Cortex}}, + booktitle = {International {Conference} on {Cognitive} {Neuroscience}}, + publisher = {Frontiers in Neuroscience}, + author = {Holdgraf, Christopher Ramsay and de Heer, Wendy and Pasley, Brian N. and Knight, Robert T.}, + year = {2014} +} + +@article{holdgraf_rapid_2016, + title = {Rapid tuning shifts in human auditory cortex enhance speech intelligibility}, + volume = {7}, + issn = {2041-1723}, + url = {http://www.nature.com/doifinder/10.1038/ncomms13654}, + doi = {10.1038/ncomms13654}, + number = {May}, + journal = {Nature Communications}, + author = {Holdgraf, Christopher Ramsay and de Heer, Wendy and Pasley, Brian N. and Rieger, Jochem W. and Crone, Nathan and Lin, Jack J. and Knight, Robert T. and Theunissen, Frédéric E.}, + year = {2016}, + pages = {13654}, + file = {Holdgraf et al. - 2016 - Rapid tuning shifts in human auditory cortex enhance speech intelligibility.pdf:C\:\\Users\\chold\\Zotero\\storage\\MDQP3JWE\\Holdgraf et al. - 2016 - Rapid tuning shifts in human auditory cortex enhance speech intelligibility.pdf:application/pdf} +} + +@inproceedings{holdgraf_portable_2017, + title = {Portable learning environments for hands-on computational instruction using container-and cloud-based technology to teach data science}, + volume = {Part F1287}, + isbn = {978-1-4503-5272-7}, + doi = {10.1145/3093338.3093370}, + abstract = {© 2017 ACM. There is an increasing interest in learning outside of the traditional classroom setting. This is especially true for topics covering computational tools and data science, as both are challenging to incorporate in the standard curriculum. These atypical learning environments offer new opportunities for teaching, particularly when it comes to combining conceptual knowledge with hands-on experience/expertise with methods and skills. Advances in cloud computing and containerized environments provide an attractive opportunity to improve the effciency and ease with which students can learn. This manuscript details recent advances towards using commonly-Available cloud computing services and advanced cyberinfrastructure support for improving the learning experience in bootcamp-style events. We cover the benets (and challenges) of using a server hosted remotely instead of relying on student laptops, discuss the technology that was used in order to make this possible, and give suggestions for how others could implement and improve upon this model for pedagogy and reproducibility.}, + author = {Holdgraf, Christopher Ramsay and Culich, A. and Rokem, A. and Deniz, F. and Alegro, M. and Ushizima, D.}, + year = {2017}, + keywords = {Teaching, Bootcamps, Cloud computing, Data science, Docker, Pedagogy} +} + +@article{holdgraf_encoding_2017, + title = {Encoding and decoding models in cognitive electrophysiology}, + volume = {11}, + issn = {16625137}, + doi = {10.3389/fnsys.2017.00061}, + abstract = {© 2017 Holdgraf, Rieger, Micheli, Martin, Knight and Theunissen. Cognitive neuroscience has seen rapid growth in the size and complexity of data recorded from the human brain as well as in the computational tools available to analyze this data. This data explosion has resulted in an increased use of multivariate, model-based methods for asking neuroscience questions, allowing scientists to investigate multiple hypotheses with a single dataset, to use complex, time-varying stimuli, and to study the human brain under more naturalistic conditions. These tools come in the form of “Encoding” models, in which stimulus features are used to model brain activity, and “Decoding” models, in which neural features are used to generated a stimulus output. Here we review the current state of encoding and decoding models in cognitive electrophysiology and provide a practical guide toward conducting experiments and analyses in this emerging field. Our examples focus on using linear models in the study of human language and audition. We show how to calculate auditory receptive fields from natural sounds as well as how to decode neural recordings to predict speech. The paper aims to be a useful tutorial to these approaches, and a practical introduction to using machine learning and applied statistics to build models of neural activity. The data analytic approaches we discuss may also be applied to other sensory modalities, motor systems, and cognitive systems, and we cover some examples in these areas. In addition, a collection of Jupyter notebooks is publicly available as a complement to the material covered in this paper, providing code examples and tutorials for predictive modeling in python. The aimis to provide a practical understanding of predictivemodeling of human brain data and to propose best-practices in conducting these analyses.}, + journal = {Frontiers in Systems Neuroscience}, + author = {Holdgraf, Christopher Ramsay and Rieger, J.W. and Micheli, C. and Martin, S. and Knight, R.T. and Theunissen, F.E.}, + year = {2017}, + keywords = {Decoding models, Encoding models, Electrocorticography (ECoG), Electrophysiology/evoked potentials, Machine learning applied to neuroscience, Natural stimuli, Predictive modeling, Tutorials} +} + +@book{ruby, + title = {The Ruby Programming Language}, + author = {Flanagan, David and Matsumoto, Yukihiro}, + year = {2008}, + publisher = {O'Reilly Media} +} diff --git a/alchemistry/welcome.ipynb b/alchemistry/welcome.ipynb new file mode 100644 index 0000000..822fc2a --- /dev/null +++ b/alchemistry/welcome.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0b1c6ed0-ac32-46e3-8096-3b17d1f906bb", + "metadata": {}, + "source": [ + "```{image} images/Alchemistry.jpg\n", + ":alt: Welcome to Alchemistry.org\n", + ":align: center\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "42820110", + "metadata": {}, + "source": [ + "(landing)=\n", + "# What is Alchemistry?" + ] + }, + { + "cell_type": "markdown", + "id": "sufficient-suite", + "metadata": {}, + "source": [ + "Alchemical free energy calculations employ unphysical (“alchemical”) intermediates to estimate the free energies of various physical processes. Such process include ligand binding to a protein receptor, the transfer of a small molecule from gas to water, or the free energy of a mutation of a side chain. This approach provides not only a quantitative and rigorous method for computing free energies, but often provides insight into the dominant interactions driving binding." + ] + }, + { + "cell_type": "markdown", + "id": "52592f1b-c79a-47c0-ab0f-caf7523af3e2", + "metadata": {}, + "source": [ + "## Getting Started with Free Energy Methods\n", + "This site is designed to both give novices best practices information about how to perform free energy calculations, and to provide an ongoing reference for the current state of research into methods for calculating free energies." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f9dcfa9-0fc3-437a-aeaa-268c3d2827a4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + }, + "vscode": { + "interpreter": { + "hash": "1e681b6f81db57470e7e3f73bde524406b83463aa6cc205193b50b915b78b906" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/assets/css/jekyll-theme-chirpy.scss b/assets/css/jekyll-theme-chirpy.scss deleted file mode 100644 index 534a196..0000000 --- a/assets/css/jekyll-theme-chirpy.scss +++ /dev/null @@ -1,8 +0,0 @@ ---- ---- - -@import 'main'; - -/* append your custom style below */ - -@import 'refactor-edits'; diff --git a/assets/css/refactor-edits.scss b/assets/css/refactor-edits.scss deleted file mode 100644 index 575360a..0000000 --- a/assets/css/refactor-edits.scss +++ /dev/null @@ -1,31 +0,0 @@ -// A series of changes to better lock in the header image - -.preview-img { - aspect-ratio: 40 / 21; - width: 100%; - height: 100%; - overflow: hidden; - - @extend %rounded; - - &:not(.no-bg) { - background: var(--img-bg); - } - - img { - height: 100%; - -o-object-fit: contain; - object-fit: contain; - - @extend %rounded; - - @at-root #post-list & { - width: 100%; - } - } -} - -// Allows a box with only an image to not have empty fill border -.contain-images .preview-img { - display: contents; -} diff --git a/assets/img/favicons/android-chrome-192x192.png b/assets/img/favicons/android-chrome-192x192.png deleted file mode 100644 index e0e5c1a..0000000 Binary files a/assets/img/favicons/android-chrome-192x192.png and /dev/null differ diff --git a/assets/img/favicons/android-chrome-256x256.png b/assets/img/favicons/android-chrome-256x256.png deleted file mode 100644 index 198d48e..0000000 Binary files a/assets/img/favicons/android-chrome-256x256.png and /dev/null differ diff --git a/assets/img/favicons/apple-touch-icon.png b/assets/img/favicons/apple-touch-icon.png deleted file mode 100644 index 8278c2c..0000000 Binary files a/assets/img/favicons/apple-touch-icon.png and /dev/null differ diff --git a/assets/img/favicons/favicon-16x16.png b/assets/img/favicons/favicon-16x16.png deleted file mode 100644 index 93ea536..0000000 Binary files a/assets/img/favicons/favicon-16x16.png and /dev/null differ diff --git a/assets/img/favicons/favicon-32x32.png b/assets/img/favicons/favicon-32x32.png deleted file mode 100644 index d0d82cc..0000000 Binary files a/assets/img/favicons/favicon-32x32.png and /dev/null differ diff --git a/assets/img/favicons/mstile-150x150.png b/assets/img/favicons/mstile-150x150.png deleted file mode 100644 index 33a6226..0000000 Binary files a/assets/img/favicons/mstile-150x150.png and /dev/null differ diff --git a/homepage/header.md b/homepage/header.md deleted file mode 100644 index 396b160..0000000 --- a/homepage/header.md +++ /dev/null @@ -1,7 +0,0 @@ -![Base alchemisty image](/assets/images/Alchemistry.jpg) - -# What is Alchemistry? -Alchemical free energy calculations employ unphysical ("alchemical") intermediates to estimate the free energies of -various physical processes. Such process include ligand binding to a protein receptor, the transfer of a small molecule -from gas to water, or the free energy of a mutation of a side chain. This approach provides not only a quantitative and -rigorous method for computing free energies, but often provides insight into the dominant interactions driving binding. diff --git a/homepage/lessons_and_tutorials.html b/homepage/lessons_and_tutorials.html deleted file mode 100644 index 4b9416d..0000000 --- a/homepage/lessons_and_tutorials.html +++ /dev/null @@ -1,5 +0,0 @@ -

Free Energy Fundamentals and Lessons

-{% include post-like-gen.html content_type="lessons" %} - -

Tutorials and External Resources

-{% include post-like-gen.html content_type="examples" %} diff --git a/homepage/lessons_and_tutorials.md b/homepage/lessons_and_tutorials.md deleted file mode 100644 index 5028f78..0000000 --- a/homepage/lessons_and_tutorials.md +++ /dev/null @@ -1,4 +0,0 @@ -# Getting Started with Free Energy Methods - -This site is designed to both give novices best practices information about how to perform free energy calculations, -and to provide an ongoing reference for the current state of research into methods for calculating free energies. diff --git a/index.html b/index.html deleted file mode 100644 index 1357b08..0000000 --- a/index.html +++ /dev/null @@ -1,4 +0,0 @@ ---- -layout: home -# Index page ---- diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5235316 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +jupyter-book +matplotlib +numpy +ghp-import