diff --git a/zarrtraj/benchmarking.ipynb b/notebooks/initial_benchmark.ipynb similarity index 100% rename from zarrtraj/benchmarking.ipynb rename to notebooks/initial_benchmark.ipynb diff --git a/notebooks/yiip_benchmark.ipynb b/notebooks/yiip_benchmark.ipynb new file mode 100644 index 0000000..94c0e76 --- /dev/null +++ b/notebooks/yiip_benchmark.ipynb @@ -0,0 +1,121 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "import MDAnalysis as mda\n", + "import MDAnalysisData\n", + "\n", + "yiip = MDAnalysisData.yiip_equilibrium.fetch_yiip_equilibrium_short()\n", + "u = mda.Universe(yiip.topology, 5 * [yiip.trajectory], in_memory=True)\n", + "positions = u.trajectory.get_array()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "too many values to unpack (expected 2)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[20], line 26\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m h5py\u001b[38;5;241m.\u001b[39mFile(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124myiip_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcompression\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcompression_opts\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_hdf5.h5\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m root:\n\u001b[1;32m 22\u001b[0m root\u001b[38;5;241m.\u001b[39mcreate_dataset(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpositions\u001b[39m\u001b[38;5;124m'\u001b[39m, data\u001b[38;5;241m=\u001b[39mpositions, \n\u001b[1;32m 23\u001b[0m chunks \u001b[38;5;241m=\u001b[39m (\u001b[38;5;241m1\u001b[39m, np\u001b[38;5;241m.\u001b[39mshape(positions)[\u001b[38;5;241m1\u001b[39m], \u001b[38;5;241m3\u001b[39m), \n\u001b[1;32m 24\u001b[0m compression\u001b[38;5;241m=\u001b[39mcompression, compression_opts\u001b[38;5;241m=\u001b[39mcompression_opts)\n\u001b[0;32m---> 26\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m zarr_compressors:\n\u001b[1;32m 27\u001b[0m compression \u001b[38;5;241m=\u001b[39m k\n\u001b[1;32m 28\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(v[\u001b[38;5;241m0\u001b[39m], v[\u001b[38;5;241m1\u001b[39m]):\n", + "\u001b[0;31mValueError\u001b[0m: too many values to unpack (expected 2)" + ] + } + ], + "source": [ + "import numpy as np\n", + "import zarr\n", + "import h5py\n", + "\n", + "zarr_compressors = {'blosc': (0,9), 'zlib': (0, 9), 'bz2': (1,9), 'lzma': (0,9)}\n", + "# For lzma, pass in compression opts as dict(preset=)\n", + "\n", + "hdf5_compressors = {'gzip':(0,9)}\n", + "\n", + "\n", + "# Load yiip data into minimal zarr file and hdf5 files of different compressors\n", + "def create_zarr_traj(compression, compression_opts):\n", + " root = zarr.open(f'yiip_{compression}_{compression_opts}_zarr.zarr', mode='a')\n", + " if compression == 'lzma':\n", + " compression_opts = dict(preset=compression_opts)\n", + " root.create_dataset('positions', data=positions, \n", + " chunks = (1, np.shape(positions)[1], 3), \n", + " compression=compression, compression_opts=compression_opts)\n", + " \n", + "def create_hdf5_traj(compression, compression_opts):\n", + " with h5py.File(f'yiip_{compression}_{compression_opts}_hdf5.h5', 'w') as root:\n", + " root.create_dataset('positions', data=positions, \n", + " chunks = (1, np.shape(positions)[1], 3), \n", + " compression=compression, compression_opts=compression_opts)\n", + "\n", + "for k, v in zarr_compressors:\n", + " compression = k\n", + " for i in range(v[0], v[1]):\n", + " create_zarr_traj(compression, i)\n", + "\n", + "for k, v in hdf5_compressors:\n", + " compression = k\n", + " for i in range(v[0], v[1]):\n", + " create_zarr_traj(compression, i)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "111815" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.shape(positions)[1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "zarrtraj", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}