From f6979682933b06b4418a1e913ef7dab3bf64ce1f Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Tue, 16 Jan 2024 11:54:08 +0100
Subject: [PATCH] Deployed af5adee with MkDocs version: 1.5.3

---
 404.html                       |   4 +-
 about/index.html               |  30 +--
 api/index.html                 |   4 +-
 cli/index.html                 |  10 +-
 config/index.html              |   4 +-
 guide/index.html               | 397 +++++++++++++++++++--------------
 intro/index.html => index.html |  44 ++--
 requirements/index.html        |   4 +-
 search/search_index.json       |   2 +-
 sitemap.xml.gz                 | Bin 127 -> 127 bytes
 start/index.html               |  96 ++++----
 11 files changed, 328 insertions(+), 267 deletions(-)
 rename intro/index.html => index.html (89%)
diff --git a/404.html b/404.html
index a142e54..bc2a4a6 100755
--- a/404.html
+++ b/404.html
@@ -184,11 +184,11 @@
   
   
     <li class="md-nav__item">
-      <a href="/intro/" class="md-nav__link">
+      <a href="/." class="md-nav__link">
         
   
   <span class="md-ellipsis">
-    Introduction
+    Overview
   </span>
   
 
diff --git a/about/index.html b/about/index.html
index bad5a08..d72b8a1 100755
--- a/about/index.html
+++ b/about/index.html
@@ -191,11 +191,11 @@
   
   
     <li class="md-nav__item">
-      <a href="../intro/" class="md-nav__link">
+      <a href=".." class="md-nav__link">
         
   
   <span class="md-ellipsis">
-    Introduction
+    Overview
   </span>
   
 
@@ -582,27 +582,27 @@ <h2 id="contributions">Contributions</h2>
 corresponding issue. </p>
 <h2 id="development">Development</h2>
 <p>Setup development environment:</p>
-<pre><code class="language-bash">pip install -r requirements.txt
-pip install -r requirements-dev.txt
-pip install -r requirements-docs.txt
-</code></pre>
+<div class="highlight"><pre><span></span><code>pip<span class="w"> </span>install<span class="w"> </span>-r<span class="w"> </span>requirements.txt
+pip<span class="w"> </span>install<span class="w"> </span>-r<span class="w"> </span>requirements-dev.txt
+pip<span class="w"> </span>install<span class="w"> </span>-r<span class="w"> </span>requirements-docs.txt
+</code></pre></div>
 <h3 id="testing-and-coverage">Testing and Coverage</h3>
 <p><code>zappend</code> uses <a href="https://docs.pytest.org/">pytest</a> for unit-level testing 
 and code coverage analysis.</p>
-<pre><code class="language-bash">pytest --cov=zappend tests
-</code></pre>
+<div class="highlight"><pre><span></span><code>pytest<span class="w"> </span>--cov<span class="o">=</span>zappend<span class="w"> </span>tests
+</code></pre></div>
 <h3 id="code-style">Code Style</h3>
 <p><code>zappend</code> source code is formatted using the <a href="https://black.readthedocs.io/">black</a> tool.</p>
-<pre><code class="language-bash">black zappend
-</code></pre>
+<div class="highlight"><pre><span></span><code>black<span class="w"> </span>zappend
+</code></pre></div>
 <h3 id="documentation">Documentation</h3>
 <p><code>zappend</code> documentation is build using the <a href="https://www.mkdocs.org/">mkdocs</a> tool.</p>
-<pre><code class="language-bash">pip install -r requirements-doc.txt
+<div class="highlight"><pre><span></span><code>pip<span class="w"> </span>install<span class="w"> </span>-r<span class="w"> </span>requirements-doc.txt
 
-mkdocs build
-mkdocs serve
-mkdocs gh-deploy
-</code></pre>
+mkdocs<span class="w"> </span>build
+mkdocs<span class="w"> </span>serve
+mkdocs<span class="w"> </span>gh-deploy
+</code></pre></div>
 <h2 id="license">License</h2>
 <p><code>zappend</code> is open source made available under the terms and conditions of the 
 <a href="https://github.com/bcdev/zappend/blob/main/LICENSE">MIT License</a>.</p>
diff --git a/api/index.html b/api/index.html
index 9fb3900..82f095f 100755
--- a/api/index.html
+++ b/api/index.html
@@ -193,11 +193,11 @@
   
   
     <li class="md-nav__item">
-      <a href="../intro/" class="md-nav__link">
+      <a href=".." class="md-nav__link">
         
   
   <span class="md-ellipsis">
-    Introduction
+    Overview
   </span>
   
 
diff --git a/cli/index.html b/cli/index.html
index fceb95f..2425bd5 100755
--- a/cli/index.html
+++ b/cli/index.html
@@ -193,11 +193,11 @@
   
   
     <li class="md-nav__item">
-      <a href="../intro/" class="md-nav__link">
+      <a href=".." class="md-nav__link">
         
   
   <span class="md-ellipsis">
-    Introduction
+    Overview
   </span>
   
 
@@ -370,7 +370,7 @@
 
 
 <h1 id="command-line-interface-reference">Command Line Interface Reference</h1>
-<pre><code>Usage: zappend [OPTIONS] [SLICES]...
+<div class="highlight"><pre><span></span><code>Usage: zappend [OPTIONS] [SLICES]...
 
   Create or update a Zarr dataset TARGET from slice datasets SLICES.
 
@@ -379,12 +379,12 @@ <h1 id="command-line-interface-reference">Command Line Interface Reference</h1>
                          passed, subsequent configurations are incremental to
                          the previous ones.
   -t, --target TARGET    Target Zarr dataset path or URI. Overrides the
-                         'target_dir' configuration field.
+                         &#39;target_dir&#39; configuration field.
   --dry-run              Run the tool without creating, changing, or deleting
                          any files.
   --help-config json|md  Show configuration help and exit.
   --help                 Show this message and exit.
-</code></pre>
+</code></pre></div>
 
 
 
diff --git a/config/index.html b/config/index.html
index 82cc92b..bbd56c1 100755
--- a/config/index.html
+++ b/config/index.html
@@ -193,11 +193,11 @@
   
   
     <li class="md-nav__item">
-      <a href="../intro/" class="md-nav__link">
+      <a href=".." class="md-nav__link">
         
   
   <span class="md-ellipsis">
-    Introduction
+    Overview
   </span>
   
 
diff --git a/guide/index.html b/guide/index.html
index 2d8f41f..a38d77c 100755
--- a/guide/index.html
+++ b/guide/index.html
@@ -193,11 +193,11 @@
   
   
     <li class="md-nav__item">
-      <a href="../intro/" class="md-nav__link">
+      <a href=".." class="md-nav__link">
         
   
   <span class="md-ellipsis">
-    Introduction
+    Overview
   </span>
   
 
@@ -298,9 +298,9 @@
       <ul class="md-nav__list">
         
           <li class="md-nav__item">
-  <a href="#variable-outline" class="md-nav__link">
+  <a href="#variable-dimensions" class="md-nav__link">
     <span class="md-ellipsis">
-      Variable Outline
+      Variable Dimensions
     </span>
   </a>
   
@@ -335,27 +335,27 @@
 </li>
         
           <li class="md-nav__item">
-  <a href="#missing-values" class="md-nav__link">
+  <a href="#missing-data" class="md-nav__link">
     <span class="md-ellipsis">
-      Missing Values
+      Missing Data
     </span>
   </a>
   
 </li>
         
           <li class="md-nav__item">
-  <a href="#compression" class="md-nav__link">
+  <a href="#data-packing" class="md-nav__link">
     <span class="md-ellipsis">
-      Compression
+      Data Packing
     </span>
   </a>
   
 </li>
         
           <li class="md-nav__item">
-  <a href="#data-packing" class="md-nav__link">
+  <a href="#compression" class="md-nav__link">
     <span class="md-ellipsis">
-      Data Packing
+      Compression
     </span>
   </a>
   
@@ -556,9 +556,9 @@
       <ul class="md-nav__list">
         
           <li class="md-nav__item">
-  <a href="#variable-outline" class="md-nav__link">
+  <a href="#variable-dimensions" class="md-nav__link">
     <span class="md-ellipsis">
-      Variable Outline
+      Variable Dimensions
     </span>
   </a>
   
@@ -593,27 +593,27 @@
 </li>
         
           <li class="md-nav__item">
-  <a href="#missing-values" class="md-nav__link">
+  <a href="#missing-data" class="md-nav__link">
     <span class="md-ellipsis">
-      Missing Values
+      Missing Data
     </span>
   </a>
   
 </li>
         
           <li class="md-nav__item">
-  <a href="#compression" class="md-nav__link">
+  <a href="#data-packing" class="md-nav__link">
     <span class="md-ellipsis">
-      Compression
+      Data Packing
     </span>
   </a>
   
 </li>
         
           <li class="md-nav__item">
-  <a href="#data-packing" class="md-nav__link">
+  <a href="#compression" class="md-nav__link">
     <span class="md-ellipsis">
-      Data Packing
+      Compression
     </span>
   </a>
   
@@ -701,64 +701,60 @@ <h1 id="user-guide">User Guide</h1>
 slice dataset paths that contribute to the datacube to be generated. The target dataset 
 path must point to a directory that will contain a Zarr group to be created and 
 updated. The slice dataset paths may be provided as Zarr as well or in other data 
-formats supported by the<br />
-<a href="https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html">xarray.open_dataset()</a>
-function. The target and slice dataset are allowed to live in different filesystems. 
+formats supported by the <a href="https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html">xarray.open_dataset()</a> function. 
+The target and slice dataset are allowed to live in different filesystems. 
 Additional filesystem storage options may be specified via the tool's configuration.</p>
 <p>The tool takes care of generating the target dataset from slice datasets, but doesn't 
 care how the slice datasets are created. Hence, when using the Python <code>zappend()</code> 
 function, the slice datasets can be provided in various forms. More on this below.</p>
-<blockquote>
-<p>[!NOTE]
-We use the term <em>Dataset</em> in the same way <code>xarray</code> does: A dataset
-comprises any number of multidimensional <em>Data Variables</em>, and 
-usually 1-dimensional <em>Coordinate Variables</em> that provide the labels for 
-the dimensions used by the data variables. A variable comprises the actual 
-data array as well as metadata describing the data dimensions, 
-units, and encoding, such as chunking and compression.</p>
-</blockquote>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>We use the term <em>Dataset</em> in the same way <code>xarray</code> does: A dataset comprises any 
+number of multidimensional <em>Data Variables</em>, and usually 1-dimensional 
+<em>Coordinate Variables</em> that provide the labels for the dimensions used by the data 
+variables. A variable comprises the actual data array as well as metadata describing 
+the data dimensions, units, and encoding, such as chunking and compression.</p>
+</div>
 <h2 id="dataset-outline">Dataset Outline</h2>
-<p>If no further configuration is supplied, then the target dataset's outline
-and data encoding is fully prescribed by the first slice dataset provided.
-By default, the dimension along subsequent slice datasets are concatenated
-is <code>time</code>. If you use a different append dimension, the <code>append_dim</code> 
-setting can be used to specify its name:</p>
-<pre><code class="language-json">{
-    &quot;append_dim&quot;: &quot;depth&quot;
-}
-</code></pre>
+<p>If no further configuration is supplied, then the target dataset's outline and data 
+encoding is fully prescribed by the first slice dataset provided. By default, the 
+dimension along subsequent slice datasets are concatenated is <code>time</code>. If you use a 
+different append dimension, the <code>append_dim</code> setting can be used to specify its name:</p>
+<div class="highlight"><pre><span></span><code><span class="p">{</span>
+<span class="w">    </span><span class="nt">&quot;append_dim&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;depth&quot;</span>
+<span class="p">}</span>
+</code></pre></div>
 <p>All other non-variadic dimensions can and should be specified using the
 <code>fixed_dims</code> setting which is a mapping from dimension name to the 
 fixed dimension sizes, e.g.:</p>
-<pre><code class="language-json">{
-    &quot;fixed_dims&quot;: {
-        &quot;x&quot;: 16384,
-        &quot;y&quot;: 8192
-    }
-}
-</code></pre>
+<div class="highlight"><pre><span></span><code><span class="p">{</span>
+<span class="w">    </span><span class="nt">&quot;fixed_dims&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">        </span><span class="nt">&quot;x&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">16384</span><span class="p">,</span>
+<span class="w">        </span><span class="nt">&quot;y&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">8192</span>
+<span class="w">    </span><span class="p">}</span>
+<span class="p">}</span>
+</code></pre></div>
 <p>By default, without further configuration, all data variables seen in the first
 dataset slice will be included in the target dataset. If only a subset of 
 variables shall be used from the slice dataset, they can be specified using the
 <code>included_variables</code> setting, which is a list of names of variables that will 
 be included:</p>
-<pre><code class="language-json">{
-    &quot;included_variables&quot;: [
-        &quot;time&quot;, &quot;y&quot;, &quot;x&quot;,
-        &quot;chl&quot;, 
-        &quot;tsm&quot;
-    ]
-}
-</code></pre>
+<div class="highlight"><pre><span></span><code><span class="p">{</span>
+<span class="w">    </span><span class="nt">&quot;included_variables&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
+<span class="w">        </span><span class="s2">&quot;time&quot;</span><span class="p">,</span><span class="w"> </span><span class="s2">&quot;y&quot;</span><span class="p">,</span><span class="w"> </span><span class="s2">&quot;x&quot;</span><span class="p">,</span>
+<span class="w">        </span><span class="s2">&quot;chl&quot;</span><span class="p">,</span><span class="w"> </span>
+<span class="w">        </span><span class="s2">&quot;tsm&quot;</span>
+<span class="w">    </span><span class="p">]</span>
+<span class="p">}</span>
+</code></pre></div>
 <p>Often, it is easier to tell which variables should be excluded:</p>
-<pre><code class="language-json">{
-    &quot;excluded_variables&quot;: [&quot;GridCellId&quot;]
-}
-</code></pre>
+<div class="highlight"><pre><span></span><code><span class="p">{</span>
+<span class="w">    </span><span class="nt">&quot;excluded_variables&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="s2">&quot;GridCellId&quot;</span><span class="p">]</span>
+<span class="p">}</span>
+</code></pre></div>
 <h2 id="variable-metadata">Variable Metadata</h2>
-<p>Without any additional configuration, <code>zappend</code> uses the outline, attributes, 
-and encoding information of data variables for the target dataset from the 
-data variables of the first slice dataset. 
+<p>Without any additional configuration, <code>zappend</code> uses the dimensions, attributes, 
+and encoding information from the data variables of the first slice dataset. 
 Encoding information is used only to the extent applicable to the Zarr format.
 Non-applicable encoding information will be reported by a warning log record 
 but is otherwise ignored. </p>
@@ -770,96 +766,162 @@ <h2 id="variable-metadata">Variable Metadata</h2>
 first dataset slice.</p>
 <p>A special "variable name" is the wildcard <code>*</code> that can be used to define default
 values for all variables:</p>
-<pre><code class="language-json">{
-    &quot;variables&quot;: {
-        &quot;*&quot;: { }
-    }
-}
-</code></pre>
+<div class="highlight"><pre><span></span><code><span class="p">{</span>
+<span class="w">    </span><span class="nt">&quot;variables&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">        </span><span class="nt">&quot;*&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span>
+<span class="w">        </span><span class="p">}</span>
+<span class="w">    </span><span class="p">}</span>
+<span class="p">}</span>
+</code></pre></div>
 <p>If <code>*</code> is specified, the effective variable metadata applied is gained by merging a 
 given specific metadata, into the common metadata given by <code>*</code>, which is eventually 
 merged into metadata of the variable in the first dataset slice.</p>
-<blockquote>
-<p>[!NOTE]
-The metadata of variables from subsequent slice datasets is ignored!</p>
-</blockquote>
-<h3 id="variable-outline">Variable Outline</h3>
-<p>To ensure a slice variable has the expected dimensionality, the <code>dims</code> 
-setting is used. The following example defines the dimensions of the data variable
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>Only metadata from the first slice dataset is used, metadata of variables from 
+subsequent slice datasets is ignored entirely.</p>
+</div>
+<h3 id="variable-dimensions">Variable Dimensions</h3>
+<p>To ensure a slice variable has the expected dimensionality and shape, the <code>dims</code> 
+setting is used. The following example defines the dimensions of a data variable 
 named <code>chl</code> (Chlorophyll):</p>
-<pre><code class="language-json">{
-    &quot;variables&quot;: {
-        &quot;chl&quot;: { 
-            &quot;dims&quot;: [&quot;time&quot;, &quot;y&quot;, &quot;x&quot;]
-        }
-    }
-}
-</code></pre>
+<div class="highlight"><pre><span></span><code><span class="p">{</span>
+<span class="w">    </span><span class="nt">&quot;variables&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">        </span><span class="nt">&quot;chl&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span>
+<span class="w">            </span><span class="nt">&quot;dims&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="s2">&quot;time&quot;</span><span class="p">,</span><span class="w"> </span><span class="s2">&quot;y&quot;</span><span class="p">,</span><span class="w"> </span><span class="s2">&quot;x&quot;</span><span class="p">]</span>
+<span class="w">        </span><span class="p">}</span>
+<span class="w">    </span><span class="p">}</span>
+<span class="p">}</span>
+</code></pre></div>
 <p>An error will be raised if a variable from a subsequent slice has different dimensions.</p>
 <h3 id="variable-attributes">Variable Attributes</h3>
 <p>Extra variable attributes can be provided using the <code>attrs</code> setting:</p>
-<pre><code class="language-json">{
-    &quot;variables&quot;: {
-        &quot;chl&quot;: { 
-            &quot;attrs&quot;: {
-                &quot;units&quot;: &quot;mg/m^3&quot;,
-                &quot;long_name&quot;: &quot;chlorophyll_concentration&quot;
-            }
-        }
-    }
-}
-</code></pre>
+<div class="highlight"><pre><span></span><code><span class="p">{</span>
+<span class="w">    </span><span class="nt">&quot;variables&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">        </span><span class="nt">&quot;chl&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span>
+<span class="w">            </span><span class="nt">&quot;attrs&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">                </span><span class="nt">&quot;units&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;mg/m^3&quot;</span><span class="p">,</span>
+<span class="w">                </span><span class="nt">&quot;long_name&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;chlorophyll_concentration&quot;</span>
+<span class="w">            </span><span class="p">}</span>
+<span class="w">        </span><span class="p">}</span>
+<span class="w">    </span><span class="p">}</span>
+<span class="p">}</span>
+</code></pre></div>
 <h3 id="variable-encoding">Variable Encoding</h3>
-<p>Encoding metadata specifies how array data is stored in the target dataset and
-includes storage data type, packing, chunking, and compression.
-Encoding metadata for a given variable is provided by the <code>encoding</code> setting.
-Since the encoding is often shared by multiple variables the wildcard 
-variable name <code>*</code> can often be of help.</p>
+<p>Encoding metadata specifies how array data is stored in the target dataset and includes 
+storage data type, packing, chunking, and compression. Encoding metadata for a given 
+variable is provided by the <code>encoding</code> setting. Since the encoding is often shared by 
+multiple variables the wildcard variable name <code>*</code> can often be of help.</p>
+<div class="admonition tip">
+<p class="admonition-title">Verify encoding is as expected</p>
+<p>To verify that <code>zappend</code> uses the expected encoding for your variables create a 
+target dataset for testing from your first slice dataset and open it using 
+<code>ds = xarray.open_zarr(target_dir, decode_cf=False)</code>. Then inspect dataset <code>ds</code> 
+using the Python console or Jupyter Notebook (attribute <code>ds.&lt;var&gt;.encoding</code>).
+You can also inspect the Zarr directly by opening the <code>&lt;target_dir&gt;/&lt;var&gt;/.zarray</code>
+or <code>&lt;target_dir&gt;/.zmetadata</code> metadata JSON files.    </p>
+</div>
 <h4 id="chunking">Chunking</h4>
 <p>By default, the chunking of the coordinate variable corresponding to the append 
-dimension will be its dimension in the first slice dataset. Often, this will
-be one or a small number. Since <code>xarray</code> loads coordinates eagerly when opening
-a dataset, this can lead to performance issues if the target dataset is served
-from object storage such as S3. This is because, a separate HTTP request is 
-required for every single chunk. It is therefore very advisable to set the 
-chunks of that variable to a larger number using the <code>chunks</code> setting.
-For other variables, the chunking within the append dimension may stay small
-if desired:</p>
-<pre><code class="language-json">{
-    &quot;variables&quot;: {
-        &quot;time&quot;: { 
-            &quot;dims&quot;: [&quot;time&quot;],
-            &quot;encoding&quot;: {
-                &quot;chunks&quot;: [1024]
-            }
-        },
-        &quot;chl&quot;: { 
-            &quot;dims&quot;: [&quot;time&quot;, &quot;y&quot;, &quot;x&quot;],
-            &quot;encoding&quot;: {
-                &quot;chunks&quot;: [1, 2048, 2048]
-            }
-        }
-    }
-}
-</code></pre>
-<h4 id="missing-values">Missing Values</h4>
-<p><em>This section is a work in progress.</em></p>
-<h4 id="compression">Compression</h4>
-<p><em>This section is a work in progress.</em></p>
+dimension will be its dimension in the first slice dataset. Often, this will be one or 
+a small number. Since <code>xarray</code> loads coordinates eagerly when opening a dataset, this 
+can lead to performance issues if the target dataset is served from object storage such 
+as S3. This is because, a separate HTTP request is required for every single chunk. It 
+is therefore very advisable to set the chunks of that variable to a larger number using 
+the <code>chunks</code> setting. For other variables, the chunking within the append dimension may 
+stay small if desired:</p>
+<div class="highlight"><pre><span></span><code><span class="p">{</span>
+<span class="w">    </span><span class="nt">&quot;variables&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">        </span><span class="nt">&quot;time&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span>
+<span class="w">            </span><span class="nt">&quot;dims&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="s2">&quot;time&quot;</span><span class="p">],</span>
+<span class="w">            </span><span class="nt">&quot;encoding&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">                </span><span class="nt">&quot;chunks&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="mi">1024</span><span class="p">]</span>
+<span class="w">            </span><span class="p">}</span>
+<span class="w">        </span><span class="p">},</span>
+<span class="w">        </span><span class="nt">&quot;chl&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span>
+<span class="w">            </span><span class="nt">&quot;dims&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="s2">&quot;time&quot;</span><span class="p">,</span><span class="w"> </span><span class="s2">&quot;y&quot;</span><span class="p">,</span><span class="w"> </span><span class="s2">&quot;x&quot;</span><span class="p">],</span>
+<span class="w">            </span><span class="nt">&quot;encoding&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">                </span><span class="nt">&quot;chunks&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="mi">2048</span><span class="p">,</span><span class="w"> </span><span class="mi">2048</span><span class="p">]</span>
+<span class="w">            </span><span class="p">}</span>
+<span class="w">        </span><span class="p">}</span>
+<span class="w">    </span><span class="p">}</span>
+<span class="p">}</span>
+</code></pre></div>
+<h4 id="missing-data">Missing Data</h4>
+<p>To indicate missing data in a variable data array, a dedicated no-data or missing value 
+can be specified by the <code>fill_value</code> setting. The value is given in a variable's storage 
+type and storage units, see next section <em>Data Packing</em>.</p>
+<div class="highlight"><pre><span></span><code><span class="p">{</span>
+<span class="w">    </span><span class="nt">&quot;variables&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">        </span><span class="nt">&quot;chl&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span>
+<span class="w">            </span><span class="nt">&quot;encoding&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">                </span><span class="nt">&quot;fill_value&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">-999</span>
+<span class="w">            </span><span class="p">}</span>
+<span class="w">        </span><span class="p">}</span>
+<span class="w">    </span><span class="p">}</span>
+<span class="p">}</span>
+</code></pre></div>
+<p>If the <code>fill_value</code> is not specified, the default is <code>NaN</code> (given as string <code>"NaN"</code> 
+in JSON) if the storage data type is floating point; it is <code>None</code> (<code>null</code> in JSON) 
+if the storage data types is integer, which effectively means, no fill value is used. 
+You can also explicitly set <code>fill_value</code> to <code>null</code> (<code>None</code> in Python) to not use one.</p>
+<p>Setting the <code>fill_value</code> for a variable can be important for saving storage space and 
+improving data I/O performance in many cases, because <code>zappend</code> does not write empty 
+array chunks - chunks that comprise missing data only, i.e., 
+<code>slice.to_zarr(target_dir, write_empty_chunks=False, ...)</code>.</p>
 <h4 id="data-packing">Data Packing</h4>
-<p><em>This section is a work in progress.</em></p>
-<ul>
-<li>
-<p>The target encoding should also allow for packing floating point data into 
-  integer data with fewer bits using scaling factor and offset.</p>
-</li>
-<li>
-<p>If the target exists, the slice will be appended. Check if the slice to be 
-  appended is last. If not, refuse to append (alternative: insert but this is 
-  probably difficult or error prone).</p>
-</li>
-<li>Slices are appended in the order they are provided.</li>
-</ul>
+<p><em>Data packing</em> refers to a simple lossy data compression method where 32- or 64-bit 
+floating point values are linearly scaled so that their value range can be fully or 
+partially represented by a lower precision integer data type. Packed values usually
+also give higher compression rates when using a <code>compressor</code>, see next section.</p>
+<p>Data packing is specified using the <code>scale_factor</code> and <code>add_offset</code> settings together
+with the storage data type setting <code>dtype</code>. The settings should be given as a triple:</p>
+<div class="highlight"><pre><span></span><code><span class="p">{</span>
+<span class="w">    </span><span class="nt">&quot;variables&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">        </span><span class="nt">&quot;chl&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span>
+<span class="w">            </span><span class="nt">&quot;encoding&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">                </span><span class="nt">&quot;dtype&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;int16&quot;</span><span class="p">,</span>
+<span class="w">                </span><span class="nt">&quot;scale_factor&quot;</span><span class="p">:</span><span class="w"> </span><span class="mf">0.005</span><span class="p">,</span>
+<span class="w">                </span><span class="nt">&quot;add_offset&quot;</span><span class="p">:</span><span class="w"> </span><span class="mf">0.0</span>
+<span class="w">            </span><span class="p">}</span>
+<span class="w">        </span><span class="p">}</span>
+<span class="w">    </span><span class="p">}</span>
+<span class="p">}</span>
+</code></pre></div>
+<p>The in-memory value in its physical units for a given encoded value in storage is 
+computed according to </p>
+<div class="highlight"><pre><span></span><code><span class="n">memory_value</span> <span class="o">=</span> <span class="n">scale_factor</span> <span class="o">*</span> <span class="n">storage_value</span> <span class="o">+</span> <span class="n">add_offset</span>
+</code></pre></div>
+<p>Hence, the encoded value is computed from an in-memory value in physical units as</p>
+<div class="highlight"><pre><span></span><code><span class="n">storage_value</span> <span class="o">=</span> <span class="p">(</span><span class="n">memory_value</span> <span class="o">-</span> <span class="n">add_offset</span><span class="p">)</span> <span class="o">/</span> <span class="n">scale_factor</span>
+</code></pre></div>
+<p>You can compute <code>scale_factor</code> and <code>add_offset</code> from given data range in physical units
+according to</p>
+<div class="highlight"><pre><span></span><code>  <span class="n">add_offset</span> <span class="o">=</span> <span class="n">memory_value_min</span>
+  <span class="n">scale_factor</span> <span class="o">=</span> <span class="p">(</span><span class="n">memory_value_max</span> <span class="o">-</span> <span class="n">memory_value_min</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span><span class="mi">2</span> <span class="o">**</span> <span class="n">num_bits</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
+</code></pre></div>
+<p>with <code>num_bits</code> being the number of bits for the integer type to be used.</p>
+<h4 id="compression">Compression</h4>
+<p>Data compression is specified by the <code>compressor</code> setting, optionally paired with the
+<code>filters</code> setting: </p>
+<div class="highlight"><pre><span></span><code><span class="p">{</span>
+<span class="w">    </span><span class="nt">&quot;variables&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">        </span><span class="nt">&quot;chl&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w"> </span>
+<span class="w">            </span><span class="nt">&quot;encoding&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">                </span><span class="nt">&quot;compressor&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{},</span>
+<span class="w">                </span><span class="nt">&quot;filters&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[]</span>
+<span class="w">            </span><span class="p">}</span>
+<span class="w">        </span><span class="p">}</span>
+<span class="w">    </span><span class="p">}</span>
+<span class="p">}</span>
+</code></pre></div>
+<p>By default, <code>zappend</code> uses default the default <code>blosc</code> compressor of Zarr, if not 
+specified. To explicitly disable compression you must set the <code>compressor</code> to <code>None</code> 
+(<code>null</code> in JSON).</p>
+<p>The usage of compressors and filters is best explained in dedicated sections of the 
+<a href="https://zarr.readthedocs.io/en/stable/tutorial.html">Zarr Tutorial</a>, namely 
+<a href="https://zarr.readthedocs.io/en/stable/tutorial.html#compressors"><em>Compressors</em></a> and 
+<a href="https://zarr.readthedocs.io/en/stable/tutorial.html#filters"><em>Filters</em></a>.</p>
 <h2 id="data-io">Data I/O</h2>
 <p><em>This section is a work in progress.</em></p>
 <ul>
@@ -924,37 +986,36 @@ <h3 id="transactions">Transactions</h3>
 <h2 id="slice-data-types">Slice Data Types</h2>
 <p><em>This section is a work in progress.</em></p>
 <h2 id="logging">Logging</h2>
-<p>The <code>zappend</code> logging configuration follows exactly the 
-Python <a href="https://docs.python.org/3/library/logging.config.html#logging-config-dictschema">dictionary schema</a> of the Python module <code>logging.config</code>.
-The logger used by the <code>zappend</code> tool is named <code>zappend</code>.
-Note that you can also configure the logger of other Python modules, e.g.,
+<p>The <code>zappend</code> logging configuration follows exactly the <a href="https://docs.python.org/3/library/logging.config.html#logging-config-dictschema">dictionary schema</a> of the 
+Python module <code>logging.config</code>. The logger used by the <code>zappend</code> tool is named 
+<code>zappend</code>. Note that you can also configure the logger of other Python modules, e.g.,
 <code>xarray</code> or <code>dask</code> here.</p>
 <p>Given here is an example that logs <code>zappend</code>'s output to the console using 
 the INFO level:</p>
-<pre><code class="language-json">{
-    &quot;logging&quot;: {
-        &quot;version&quot;: 1,
-        &quot;formatters&quot;: {
-            &quot;normal&quot;: {
-                &quot;format&quot;: &quot;%(asctime)s %(levelname)s %(message)s&quot;,
-                &quot;style&quot;: &quot;%&quot;
-            }
-        },
-        &quot;handlers&quot;: {
-            &quot;console&quot;: {
-                &quot;class&quot;: &quot;logging.StreamHandler&quot;,
-                &quot;formatter&quot;: &quot;normal&quot;
-            }
-        },
-        &quot;loggers&quot;: {
-            &quot;zappend&quot;: {
-                &quot;level&quot;: &quot;INFO&quot;,
-                &quot;handlers&quot;: [&quot;console&quot;]
-            }
-        }
-    }
-}
-</code></pre>
+<div class="highlight"><pre><span></span><code><span class="p">{</span>
+<span class="w">    </span><span class="nt">&quot;logging&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">        </span><span class="nt">&quot;version&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span>
+<span class="w">        </span><span class="nt">&quot;formatters&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">            </span><span class="nt">&quot;normal&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">                </span><span class="nt">&quot;format&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;%(asctime)s %(levelname)s %(message)s&quot;</span><span class="p">,</span>
+<span class="w">                </span><span class="nt">&quot;style&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;%&quot;</span>
+<span class="w">            </span><span class="p">}</span>
+<span class="w">        </span><span class="p">},</span>
+<span class="w">        </span><span class="nt">&quot;handlers&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">            </span><span class="nt">&quot;console&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">                </span><span class="nt">&quot;class&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;logging.StreamHandler&quot;</span><span class="p">,</span>
+<span class="w">                </span><span class="nt">&quot;formatter&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;normal&quot;</span>
+<span class="w">            </span><span class="p">}</span>
+<span class="w">        </span><span class="p">},</span>
+<span class="w">        </span><span class="nt">&quot;loggers&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">            </span><span class="nt">&quot;zappend&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
+<span class="w">                </span><span class="nt">&quot;level&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;INFO&quot;</span><span class="p">,</span>
+<span class="w">                </span><span class="nt">&quot;handlers&quot;</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="s2">&quot;console&quot;</span><span class="p">]</span>
+<span class="w">            </span><span class="p">}</span>
+<span class="w">        </span><span class="p">}</span>
+<span class="w">    </span><span class="p">}</span>
+<span class="p">}</span>
+</code></pre></div>
 
 
 
diff --git a/intro/index.html b/index.html
similarity index 89%
rename from intro/index.html
rename to index.html
index b4bba72..872fe8f 100755
--- a/intro/index.html
+++ b/index.html
@@ -10,19 +10,19 @@
       
       
       
-        <link rel="next" href="../start/">
+        <link rel="next" href="start/">
       
       
-      <link rel="icon" href="../assets/images/favicon.png">
+      <link rel="icon" href="assets/images/favicon.png">
       <meta name="generator" content="mkdocs-1.5.3, mkdocs-material-9.5.3">
     
     
       
-        <title>Introduction - zappend</title>
+        <title>zappend</title>
       
     
     
-      <link rel="stylesheet" href="../assets/stylesheets/main.50c56a3b.min.css">
+      <link rel="stylesheet" href="assets/stylesheets/main.50c56a3b.min.css">
       
       
 
@@ -41,9 +41,9 @@
       
     
     
-      <link rel="stylesheet" href="../assets/_mkdocstrings.css">
+      <link rel="stylesheet" href="assets/_mkdocstrings.css">
     
-    <script>__md_scope=new URL("..",location),__md_hash=e=>[...e].reduce((e,_)=>(e<<5)-e+_.charCodeAt(0),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
+    <script>__md_scope=new URL(".",location),__md_hash=e=>[...e].reduce((e,_)=>(e<<5)-e+_.charCodeAt(0),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
     
       
 
@@ -78,7 +78,7 @@
 
 <header class="md-header md-header--shadow" data-md-component="header">
   <nav class="md-header__inner md-grid" aria-label="Header">
-    <a href=".." title="zappend" class="md-header__button md-logo" aria-label="zappend" data-md-component="logo">
+    <a href="." title="zappend" class="md-header__button md-logo" aria-label="zappend" data-md-component="logo">
       
   
   <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54Z"/></svg>
@@ -98,7 +98,7 @@
         <div class="md-header__topic" data-md-component="header-topic">
           <span class="md-ellipsis">
             
-              Introduction
+              Overview
             
           </span>
         </div>
@@ -173,7 +173,7 @@
 
 <nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
   <label class="md-nav__title" for="__drawer">
-    <a href=".." title="zappend" class="md-nav__button md-logo" aria-label="zappend" data-md-component="logo">
+    <a href="." title="zappend" class="md-nav__button md-logo" aria-label="zappend" data-md-component="logo">
       
   
   <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54Z"/></svg>
@@ -204,18 +204,18 @@
           
   
   <span class="md-ellipsis">
-    Introduction
+    Overview
   </span>
   
 
           <span class="md-nav__icon md-icon"></span>
         </label>
       
-      <a href="./" class="md-nav__link md-nav__link--active">
+      <a href="." class="md-nav__link md-nav__link--active">
         
   
   <span class="md-ellipsis">
-    Introduction
+    Overview
   </span>
   
 
@@ -278,7 +278,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../start/" class="md-nav__link">
+      <a href="start/" class="md-nav__link">
         
   
   <span class="md-ellipsis">
@@ -298,7 +298,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../guide/" class="md-nav__link">
+      <a href="guide/" class="md-nav__link">
         
   
   <span class="md-ellipsis">
@@ -318,7 +318,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../config/" class="md-nav__link">
+      <a href="config/" class="md-nav__link">
         
   
   <span class="md-ellipsis">
@@ -338,7 +338,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../cli/" class="md-nav__link">
+      <a href="cli/" class="md-nav__link">
         
   
   <span class="md-ellipsis">
@@ -358,7 +358,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../api/" class="md-nav__link">
+      <a href="api/" class="md-nav__link">
         
   
   <span class="md-ellipsis">
@@ -378,7 +378,7 @@
   
   
     <li class="md-nav__item">
-      <a href="../about/" class="md-nav__link">
+      <a href="about/" class="md-nav__link">
         
   
   <span class="md-ellipsis">
@@ -484,9 +484,9 @@ <h2 id="features">Features</h2>
   The same holds for the slice datasets to be appended.</li>
 <li><strong>Slices polling</strong>: The tool can be configured to wait for slice datasets to 
   become available. </li>
-<li><strong>CLI and Python API</strong>: The tool can be used in a shell using the <a href="../cli/"><code>zappend</code></a>
+<li><strong>CLI and Python API</strong>: The tool can be used in a shell using the <a href="cli/"><code>zappend</code></a>
   command or from Python. When used from Python using the 
-  <a href="../api/"><code>zappend()</code></a> function, slice datasets can be passed as local file paths, 
+  <a href="api/"><code>zappend()</code></a> function, slice datasets can be passed as local file paths, 
   URIs, or as in-memory datasets of type 
   <a href="https://docs.xarray.dev/en/stable/generated/xarray.Dataset.html">xarray.Dataset</a>.
   Users can implement their own <em>slice sources</em> and provide them to the that provide 
@@ -591,10 +591,10 @@ <h2 id="how-it-works">How It Works</h2>
     </div>
     
     
-    <script id="__config" type="application/json">{"base": "..", "features": [], "search": "../assets/javascripts/workers/search.f886a092.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
+    <script id="__config" type="application/json">{"base": ".", "features": [], "search": "assets/javascripts/workers/search.f886a092.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
     
     
-      <script src="../assets/javascripts/bundle.d7c377c4.min.js"></script>
+      <script src="assets/javascripts/bundle.d7c377c4.min.js"></script>
       
     
   </body>
diff --git a/requirements/index.html b/requirements/index.html
index c37c422..940fac5 100755
--- a/requirements/index.html
+++ b/requirements/index.html
@@ -189,11 +189,11 @@
   
   
     <li class="md-nav__item">
-      <a href="../intro/" class="md-nav__link">
+      <a href=".." class="md-nav__link">
         
   
   <span class="md-ellipsis">
-    Introduction
+    Overview
   </span>
   
 
diff --git a/search/search_index.json b/search/search_index.json
index c9c2f63..9b1e58f 100755
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"about/","title":"About zappend","text":""},{"location":"about/#changelog","title":"Changelog","text":"<p>You can find the complete <code>zappend</code> changelog  here. </p>"},{"location":"about/#reporting","title":"Reporting","text":"<p>If you have suggestions, ideas, feature requests, or if you have identified a malfunction or error, then please  post an issue. </p>"},{"location":"about/#contributions","title":"Contributions","text":"<p>The <code>zappend</code> project welcomes contributions of any form as long as you respect our  code of conduct and follow our  contribution guide.</p> <p>If you'd like to submit code or documentation changes, we ask you to provide a  pull request (PR)  here.  For code and configuration changes, your PR must be linked to a  corresponding issue. </p>"},{"location":"about/#development","title":"Development","text":"<p>Setup development environment:</p> <pre><code>pip install -r requirements.txt\npip install -r requirements-dev.txt\npip install -r requirements-docs.txt\n</code></pre>"},{"location":"about/#testing-and-coverage","title":"Testing and Coverage","text":"<p><code>zappend</code> uses pytest for unit-level testing  and code coverage analysis.</p> <pre><code>pytest --cov=zappend tests\n</code></pre>"},{"location":"about/#code-style","title":"Code Style","text":"<p><code>zappend</code> source code is formatted using the black tool.</p> <pre><code>black zappend\n</code></pre>"},{"location":"about/#documentation","title":"Documentation","text":"<p><code>zappend</code> documentation is build using the mkdocs tool.</p> <pre><code>pip install -r requirements-doc.txt\n\nmkdocs build\nmkdocs serve\nmkdocs gh-deploy\n</code></pre>"},{"location":"about/#license","title":"License","text":"<p><code>zappend</code> is open source made available under the terms and conditions of the  MIT License.</p> <p>Copyright \u00a9 2024 Brockmann Consult Development</p>"},{"location":"api/","title":"Python API reference","text":"<p>All described objects can be imported from the <code>zappend.api</code> module.</p>"},{"location":"api/#function-zappend","title":"Function <code>zappend</code>","text":"<p>Create or update a Zarr dataset from dataset slices.</p> <p>Parameters:</p> Name Type Description Default <code>slices</code> <code>Iterable[SliceObj | SliceFactory]</code> <p>An iterable that yields slice objects. A slice object is either a <code>str</code>, <code>xarray.Dataset</code>, <code>SliceSource</code> or a factory function that returns a slice object. If <code>str</code> is used, it is interpreted as local dataset path or dataset URI. If a URI is used, protocol-specific parameters apply, given by configuration parameter <code>slice_storage_options</code>.</p> required <code>config</code> <code>ConfigLike</code> <p>Processor configuration. May be a file path or URI, a <code>dict</code>, <code>None</code>, or a sequence of the aforementioned. If a sequence is used, subsequent configurations are incremental to the previous ones.</p> <code>None</code> <code>kwargs</code> <code>Any</code> <p>Additional configuration parameters. Can be used to pass or override configuration values in config.</p> <code>{}</code>"},{"location":"api/#class-slicesource","title":"Class <code>SliceSource</code>","text":"<p>             Bases: <code>ABC</code></p> <p>Represents a source for a slice dataset. Instances of this class are supposed to be used as context managers. The context manager provides the dataset instance by calling the get_dataset() method. When the context manager exits, the dispose() method is called.</p> <p>You may implement your own slice source class and define a slice source factory function that creates instances of your slice source. Such functions can be passed input to the zappend() function, usually in the form of a closure to capture slice-specific information.</p> <p>Parameters:</p> Name Type Description Default <code>ctx</code> <code>Context</code> <p>The processing context.</p> required"},{"location":"api/#zappend.slice.abc.SliceSource.ctx","title":"<code>ctx: Context</code>  <code>property</code>","text":"<p>The processing context passed to the constructor.</p>"},{"location":"api/#zappend.slice.abc.SliceSource.dispose","title":"<code>dispose()</code>","text":"<p>Dispose this slice source. This should include cleaning up of used resources.</p> <p>This method is not intended to be called directly. Instead, instances of this class are context managers and should be used as such.</p>"},{"location":"api/#zappend.slice.abc.SliceSource.get_dataset","title":"<code>get_dataset()</code>  <code>abstractmethod</code>","text":"<p>Open this slice source and return the dataset instance.</p> <p>This method is not intended to be called directly. Instead, instances of this class are context managers and should be used as such.</p> <p>It should return a dataset that is compatible with target dataset:</p> <ul> <li>slice must have same fixed dimensions;</li> <li>append dimension must exist in slice.</li> </ul> <p>Returns:</p> Type Description <code>Dataset</code> <p>A slice dataset.</p>"},{"location":"api/#class-context","title":"Class <code>Context</code>","text":"<p>Provides access to configuration values and values derived from it.</p> <p>Parameters:</p> Name Type Description Default <code>config</code> <code>Dict[str, Any]</code> <p>A validated configuration.</p> required <p>Raises:</p> Type Description <code>ValueError</code> <p>If <code>target_dir</code> is missing in the configuration.</p>"},{"location":"api/#zappend.context.Context.append_dim_name","title":"<code>append_dim_name: str</code>  <code>property</code>","text":"<p>The configured append dimension.</p>"},{"location":"api/#zappend.context.Context.disable_rollback","title":"<code>disable_rollback: bool</code>  <code>property</code>","text":"<p>Whether to disable transaction rollbacks.</p>"},{"location":"api/#zappend.context.Context.dry_run","title":"<code>dry_run: bool</code>  <code>property</code>","text":"<p>Whether to run in dry mode.</p>"},{"location":"api/#zappend.context.Context.persist_mem_slices","title":"<code>persist_mem_slices: bool</code>  <code>property</code>","text":"<p>Whether to persist in-memory slice datasets.</p>"},{"location":"api/#zappend.context.Context.slice_engine","title":"<code>slice_engine: str | None</code>  <code>property</code>","text":"<p>The configured slice engine to be used if a slice object is not a Zarr. If defined, it will be passed to the <code>xarray.open_dataset()</code> function.</p>"},{"location":"api/#zappend.context.Context.slice_polling","title":"<code>slice_polling: tuple[float, float] | tuple[None, None]</code>  <code>property</code>","text":"<p>The configured slice dataset polling. If slice polling is enabled, returns tuple (interval, timeout) in seconds, otherwise, return (None, None).</p>"},{"location":"api/#zappend.context.Context.slice_storage_options","title":"<code>slice_storage_options: dict[str, Any] | None</code>  <code>property</code>","text":"<p>The configured slice storage options to be used if a slice object is a Zarr.</p>"},{"location":"api/#zappend.context.Context.target_dir","title":"<code>target_dir: FileObj</code>  <code>property</code>","text":"<p>The configured target directory.</p>"},{"location":"api/#zappend.context.Context.target_metadata","title":"<code>target_metadata: DatasetMetadata | None</code>  <code>property</code> <code>writable</code>","text":"<p>The metadata for the target dataset. May be <code>None</code> while the target dataset hasn't been created yet. Will be set, once the target dataset has been created from the first slice dataset.</p>"},{"location":"api/#zappend.context.Context.temp_dir","title":"<code>temp_dir: FileObj</code>  <code>property</code>","text":"<p>The configured directory used for temporary files such as rollback data.</p>"},{"location":"api/#zappend.context.Context.zarr_version","title":"<code>zarr_version: int</code>  <code>property</code>","text":"<p>The configured Zarr version for the target dataset.</p>"},{"location":"api/#zappend.context.Context.get_dataset_metadata","title":"<code>get_dataset_metadata(dataset)</code>","text":"<p>Get the dataset metadata from configuration and the given dataset.</p> <p>Parameters:</p> Name Type Description Default <code>dataset</code> <code>Dataset</code> <p>The dataset</p> required <p>Returns:</p> Type Description <code>DatasetMetadata</code> <p>The dataset metadata</p>"},{"location":"api/#class-fileobj","title":"Class <code>FileObj</code>","text":"<p>An object that represents a file or directory in some filesystem.</p> <p>Parameters:</p> Name Type Description Default <code>uri</code> <code>str</code> <p>The file or directory URI</p> required <code>storage_options</code> <code>dict[str, Any] | None</code> <p>Optional storage options specific to the protocol of the URI</p> <code>None</code> <code>fs</code> <code>AbstractFileSystem | None</code> <p>Optional fsspec filesystem instance. Use with care, the filesystem must be consistent with uri and storage_options. For internal use only.</p> <code>None</code> <code>path</code> <code>str | None</code> <p>The path info the filesystem fs. Use with care, the path must be consistent with uri. For internal use only.</p> <code>None</code>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.filename","title":"<code>filename: str</code>  <code>property</code>","text":"<p>The filename part of the URI.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.fs","title":"<code>fs: fsspec.AbstractFileSystem</code>  <code>property</code>","text":"<p>The filesystem.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.parent","title":"<code>parent: FileObj</code>  <code>property</code>","text":"<p>The parent file object.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.path","title":"<code>path: str</code>  <code>property</code>","text":"<p>The path of the file or directory into the filesystem.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.storage_options","title":"<code>storage_options: dict[str, Any] | None</code>  <code>property</code>","text":"<p>Storage options for creating the filesystem object.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.uri","title":"<code>uri: str</code>  <code>property</code>","text":"<p>The URI.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.__truediv__","title":"<code>__truediv__(rel_path)</code>","text":"<p>Overriden to call for_path(rel_path).</p> <p>Parameters:</p> Name Type Description Default <code>rel_path</code> <code>str</code> <p>Relative path to append.</p> required"},{"location":"api/#zappend.fsutil.fileobj.FileObj.close","title":"<code>close()</code>","text":"<p>Close the filesystem used by this file object.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.delete","title":"<code>delete(recursive=False)</code>","text":"<p>Delete the file or directory represented by this file object.</p> <p>Parameters:</p> Name Type Description Default <code>recursive</code> <code>bool</code> <p>Set to <code>True</code> to delete a non-empty directory.</p> <code>False</code>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.exists","title":"<code>exists()</code>","text":"<p>Check if the file or directory represented by this file object exists.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.for_path","title":"<code>for_path(rel_path)</code>","text":"<p>Gets a new file object for the given relative path.</p> <p>Parameters:</p> Name Type Description Default <code>rel_path</code> <code>str</code> <p>Relative path to append.</p> required <p>Returns:</p> Type Description <code>FileObj</code> <p>A new file object</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.mkdir","title":"<code>mkdir()</code>","text":"<p>Create the directory represented by this file object.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.read","title":"<code>read(mode='rb')</code>","text":"<p>Read the contents of the file represented by this file object.</p> <p>Parameters:</p> Name Type Description Default <code>mode</code> <code>Literal['rb'] | Literal['r']</code> <p>Read mode, must be \"rb\" or \"r\"</p> <code>'rb'</code> <p>Returns:</p> Type Description <code>bytes | str</code> <p>The contents of the file either as <code>bytes</code> if mode is \"rb\" or as <code>str</code></p> <code>bytes | str</code> <p>if mode is \"r\".</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.write","title":"<code>write(data, mode=None)</code>","text":"<p>Write the contents of the file represented by this file object.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>str | bytes</code> <p>The data to write.</p> required <code>mode</code> <code>Literal['wb'] | Literal['w'] | Literal['ab'] | Literal['a'] | None</code> <p>Write mode, must be \"wb\", \"w\", \"ab\", or \"a\".</p> <code>None</code> <p>Returns:</p> Type Description <code>int</code> <p>The number of bytes written.</p>"},{"location":"api/#types","title":"Types","text":""},{"location":"api/#zappend.slice.common.SliceObj","title":"<code>zappend.slice.common.SliceObj = str | FileObj | xr.Dataset | SliceSource</code>  <code>module-attribute</code>","text":"<p>The possible types that can represent a slice dataset.</p>"},{"location":"api/#zappend.slice.common.SliceFactory","title":"<code>zappend.slice.common.SliceFactory = Callable[[Context], SliceObj]</code>  <code>module-attribute</code>","text":"<p>Tne type for a factory function that returns a slice object for a given processing context.</p>"},{"location":"api/#zappend.config.ConfigItem","title":"<code>zappend.config.ConfigItem = FileObj | str | dict[str, Any]</code>  <code>module-attribute</code>","text":""},{"location":"api/#zappend.config.ConfigList","title":"<code>zappend.config.ConfigList = list[ConfigItem] | tuple[ConfigItem]</code>  <code>module-attribute</code>","text":""},{"location":"api/#zappend.config.ConfigLike","title":"<code>zappend.config.ConfigLike = ConfigItem | ConfigList | None</code>  <code>module-attribute</code>","text":"<p>The possible types used to represent processor configuration.</p>"},{"location":"cli/","title":"Command Line Interface Reference","text":"<pre><code>Usage: zappend [OPTIONS] [SLICES]...\n\n  Create or update a Zarr dataset TARGET from slice datasets SLICES.\n\nOptions:\n  -c, --config CONFIG    Configuration JSON or YAML file. If multiple are\n                         passed, subsequent configurations are incremental to\n                         the previous ones.\n  -t, --target TARGET    Target Zarr dataset path or URI. Overrides the\n                         'target_dir' configuration field.\n  --dry-run              Run the tool without creating, changing, or deleting\n                         any files.\n  --help-config json|md  Show configuration help and exit.\n  --help                 Show this message and exit.\n</code></pre>"},{"location":"config/","title":"Configuration Reference","text":""},{"location":"config/#version","title":"<code>version</code>","text":"<p>Configuration schema version. Allows the schema to evolve while still preserving backwards compatibility. Its value is <code>1</code>.</p>"},{"location":"config/#target_dir","title":"<code>target_dir</code>","text":"<p>Type string. The URI or local path of the target Zarr dataset. Must be a directory.</p>"},{"location":"config/#target_storage_options","title":"<code>target_storage_options</code>","text":"<p>Type object. Options for the filesystem given by the URI of <code>target_dir</code>.</p>"},{"location":"config/#slice_engine","title":"<code>slice_engine</code>","text":"<p>Type string. The name of the engine to be used for opening contributing datasets. Refer to the <code>engine</code> argument of the function <code>xarray.open_dataset()</code>.</p>"},{"location":"config/#slice_storage_options","title":"<code>slice_storage_options</code>","text":"<p>Type object. Options for the filesystem given by the protocol of the URIs of contributing datasets.</p>"},{"location":"config/#slice_polling","title":"<code>slice_polling</code>","text":"<p>Defines how to poll for contributing datasets. Must be one of the following: * No polling, fail immediately if dataset is not available.   Its value is <code>false</code>. * Poll using default values.   Its value is <code>true</code>. * Type object.   Polling parameters.   The keys <code>interval</code>, <code>timeout</code> are required.</p> <ul> <li><code>interval</code>:     Type number.     Polling interval in seconds.     Defaults to <code>2</code>.</li> <li><code>timeout</code>:     Type number.     Polling timeout in seconds.     Defaults to <code>60</code>.</li> </ul>"},{"location":"config/#temp_dir","title":"<code>temp_dir</code>","text":"<p>Type string. The URI or local path of the directory that will be used to temporarily store rollback information.</p>"},{"location":"config/#temp_storage_options","title":"<code>temp_storage_options</code>","text":"<p>Type object. Options for the filesystem given by the protocol of <code>temp_dir</code>.</p>"},{"location":"config/#zarr_version","title":"<code>zarr_version</code>","text":"<p>The Zarr version to be used. Its value is <code>2</code>.</p>"},{"location":"config/#fixed_dims","title":"<code>fixed_dims</code>","text":"<p>Type object. Specifies the fixed dimensions of the target dataset. Keys are dimension names, values are dimension sizes.</p> <p>The object's values are of type integer.</p>"},{"location":"config/#append_dim","title":"<code>append_dim</code>","text":"<p>Type string. The name of the variadic append dimension. Defaults to <code>\"time\"</code>.</p>"},{"location":"config/#variables","title":"<code>variables</code>","text":"<p>Type object. Defines dimensions, encoding, and attributes for variables in the target dataset. Object property names refer to variable names. The special name <code>*</code> refers to all variables, which is useful for defining common values.</p> <p>The object's values are of type object. Variable metadata.</p> <ul> <li><code>dims</code>:   Type array.   The names of the variable's dimensions in the given order. Each dimension must exist in contributing datasets.</li> </ul> <p>The array's items are of type string. * <code>encoding</code>:   Type object.   Variable storage encoding. Settings given here overwrite the encoding settings of the first contributing dataset.</p> <ul> <li><code>dtype</code>:     Storage data type     Must be one of <code>\"int8\", \"uint8\", \"int16\", \"uint16\", \"int32\", \"uint32\", \"int64\", \"uint64\", \"float32\", \"float64\"</code>.</li> <li> <p><code>chunks</code>:     Storage chunking.     Must be one of the following:</p> <ul> <li>Type array.   Chunk sizes in the order of the dimensions.</li> </ul> <p>The array's items are of type integer. * Disable chunking.   Its value is <code>null</code>.   * <code>fill_value</code>: Storage fill value. Must be one of the following: * Type number.   A number of type and unit of the given storage <code>dtype</code>. * Not-a-number. Can be used only if storage <code>dtype</code> is <code>float32</code> or <code>float64</code>.   Its value is <code>\"NaN\"</code>. * No fill value.   Its value is <code>null</code>.   * <code>scale_factor</code>: Type number. Scale factor for computing the in-memory value: <code>memory_value = scale_factor * storage_value + add_offset</code>.   * <code>add_offset</code>: Type number. Add offset for computing the in-memory value: <code>memory_value = scale_factor * storage_value + add_offset</code>.   * <code>units</code>: Type string. Units of the storage data type if memory data type is date/time.   * <code>calendar</code>: Type string. The calendar to be used if memory data type is date/time.   * <code>compressor</code>: Type array | null. Compressor definition. Set to <code>null</code> to disable data compression. The key <code>id</code> is required.</p> <ul> <li><code>id</code>:   Type string.</li> </ul> </li> <li> <p><code>filters</code>:     Type array | null.     Filters. Set to <code>null</code> to not use filters.</p> <p>The array's items are of type object. Filter definition. The key <code>id</code> is required.</p> <ul> <li><code>id</code>:   Type string.</li> </ul> </li> <li> <p><code>attrs</code>:   Type object.   Arbitrary variable metadata attributes.</p> </li> </ul>"},{"location":"config/#included_variables","title":"<code>included_variables</code>","text":"<p>Type array. Specifies the names of variables to be included in the target dataset. Defaults to all variables found in the first contributing dataset.</p> <p>The array's items are of type string.</p>"},{"location":"config/#excluded_variables","title":"<code>excluded_variables</code>","text":"<p>Type array. Specifies the names of individual variables to be excluded  from all contributing datasets.</p> <p>The array's items are of type string.</p>"},{"location":"config/#persist_mem_slices","title":"<code>persist_mem_slices</code>","text":"<p>Type boolean. Persist in-memory slices and reopen from a temporary Zarr before appending them to the target dataset. This can prevent expensive re-computation of dask chunks at the cost of additional i/o. Defaults to <code>false</code>.</p>"},{"location":"config/#disable_rollback","title":"<code>disable_rollback</code>","text":"<p>Type boolean. Disable rolling back dataset changes on failure. Effectively disables transactional dataset modifications, so use this setting with care. Defaults to <code>false</code>.</p>"},{"location":"config/#dry_run","title":"<code>dry_run</code>","text":"<p>Type boolean. If 'true', log only what would have been done, but don't apply any changes. Defaults to <code>false</code>.</p>"},{"location":"config/#logging","title":"<code>logging</code>","text":"<p>Type object. Logging configuration. For details refer to the dictionary schema of the Python module <code>logging.config</code>. The key <code>version</code> is required.</p> <ul> <li><code>version</code>:   Logging schema version.   Its value is <code>1</code>.</li> <li><code>formatters</code>:   Type object.   Formatter definitions. Each key is a formatter id and each value is an object describing how to configure the corresponding formatter.</li> </ul> <p>The object's values are of type object.   Formatter configuration.</p> <ul> <li><code>format</code>:     Type string.     Format string in the given <code>style</code>.     Defaults to <code>\"%(message)s\"</code>.</li> <li><code>datefmt</code>:     Type string.     Format string in the given <code>style</code> for the date/time portion.     Defaults to <code>\"%Y-%m-%d %H:%M:%S,uuu\"</code>.</li> <li> <p><code>style</code>:     Determines how the format string will be merged with its data.     Must be one of <code>\"%\", \"{\", \"$\"</code>.</p> </li> <li> <p><code>filters</code>:   Type object.   Filter definitions. Each key is a filter id and each value is a dict describing how to configure the corresponding filter.</p> </li> </ul> <p>The object's values are of type object.   Filter configuration. * <code>handlers</code>:   Type object.   Handler definitions. Each key is a handler id and each value is an object describing how to configure the corresponding handler.</p> <p>The object's values are of type object.   Handler configuration. All keys other than the following are passed through as keyword arguments to the handler's constructor.   The key <code>class</code> is required.</p> <ul> <li><code>class</code>:     Type string.     The fully qualified name of the handler class. See logging handlers.</li> <li><code>level</code>:     The level of the handler.     Must be one of <code>\"CRITICAL\", \"ERROR\", \"WARNING\", \"INFO\", \"DEBUG\", \"NOTSET\"</code>.</li> <li><code>formatter</code>:     Type string.     The id of the formatter for this handler.</li> <li> <p><code>filters</code>:     Type array.     A list of ids of the filters for this logger.</p> <p>The array's items are of type string.</p> </li> <li> <p><code>loggers</code>:   Type object.   Logger definitions. Each key is a logger name and each value is an object describing how to configure the corresponding logger. The tool's logger has the id <code>'zappend'</code>.</p> </li> </ul> <p>The object's values are of type object.   Logger configuration.</p> <ul> <li><code>level</code>:     The level of the logger.     Must be one of <code>\"CRITICAL\", \"ERROR\", \"WARNING\", \"INFO\", \"DEBUG\", \"NOTSET\"</code>.</li> <li><code>propagate</code>:     Type boolean.     The propagation setting of the logger.</li> <li> <p><code>filters</code>:     Type array.     A list of ids of the filters for this logger.</p> <p>The array's items are of type string.   * <code>handlers</code>: Type array. A list of ids of the handlers for this logger.</p> <p>The array's items are of type string.</p> </li> </ul>"},{"location":"guide/","title":"User Guide","text":"<p>Both the <code>zappend</code> CLI command and the Python function can be run  without any further configuration except the target dataset path and the  slice dataset paths that contribute to the datacube to be generated. The target dataset  path must point to a directory that will contain a Zarr group to be created and  updated. The slice dataset paths may be provided as Zarr as well or in other data  formats supported by the xarray.open_dataset() function. The target and slice dataset are allowed to live in different filesystems.  Additional filesystem storage options may be specified via the tool's configuration.</p> <p>The tool takes care of generating the target dataset from slice datasets, but doesn't  care how the slice datasets are created. Hence, when using the Python <code>zappend()</code>  function, the slice datasets can be provided in various forms. More on this below.</p> <p>[!NOTE] We use the term Dataset in the same way <code>xarray</code> does: A dataset comprises any number of multidimensional Data Variables, and  usually 1-dimensional Coordinate Variables that provide the labels for  the dimensions used by the data variables. A variable comprises the actual  data array as well as metadata describing the data dimensions,  units, and encoding, such as chunking and compression.</p>"},{"location":"guide/#dataset-outline","title":"Dataset Outline","text":"<p>If no further configuration is supplied, then the target dataset's outline and data encoding is fully prescribed by the first slice dataset provided. By default, the dimension along subsequent slice datasets are concatenated is <code>time</code>. If you use a different append dimension, the <code>append_dim</code>  setting can be used to specify its name:</p> <pre><code>{\n    \"append_dim\": \"depth\"\n}\n</code></pre> <p>All other non-variadic dimensions can and should be specified using the <code>fixed_dims</code> setting which is a mapping from dimension name to the  fixed dimension sizes, e.g.:</p> <pre><code>{\n    \"fixed_dims\": {\n        \"x\": 16384,\n        \"y\": 8192\n    }\n}\n</code></pre> <p>By default, without further configuration, all data variables seen in the first dataset slice will be included in the target dataset. If only a subset of  variables shall be used from the slice dataset, they can be specified using the <code>included_variables</code> setting, which is a list of names of variables that will  be included:</p> <pre><code>{\n    \"included_variables\": [\n        \"time\", \"y\", \"x\",\n        \"chl\", \n        \"tsm\"\n    ]\n}\n</code></pre> <p>Often, it is easier to tell which variables should be excluded:</p> <pre><code>{\n    \"excluded_variables\": [\"GridCellId\"]\n}\n</code></pre>"},{"location":"guide/#variable-metadata","title":"Variable Metadata","text":"<p>Without any additional configuration, <code>zappend</code> uses the outline, attributes,  and encoding information of data variables for the target dataset from the  data variables of the first slice dataset.  Encoding information is used only to the extent applicable to the Zarr format. Non-applicable encoding information will be reported by a warning log record  but is otherwise ignored. </p> <p>Variable metadata can be specified by the <code>variables</code> setting, which is a  mapping from variable name to a mapping that provides the dimensions, attributes,  and encoding information of data variables for the target dataset. All such  information is optional. The provided settings will be merged with the information retrieved from the data variables with same name included in the first dataset slice.</p> <p>A special \"variable name\" is the wildcard <code>*</code> that can be used to define default values for all variables:</p> <pre><code>{\n    \"variables\": {\n        \"*\": { }\n    }\n}\n</code></pre> <p>If <code>*</code> is specified, the effective variable metadata applied is gained by merging a  given specific metadata, into the common metadata given by <code>*</code>, which is eventually  merged into metadata of the variable in the first dataset slice.</p> <p>[!NOTE] The metadata of variables from subsequent slice datasets is ignored!</p>"},{"location":"guide/#variable-outline","title":"Variable Outline","text":"<p>To ensure a slice variable has the expected dimensionality, the <code>dims</code>  setting is used. The following example defines the dimensions of the data variable named <code>chl</code> (Chlorophyll):</p> <pre><code>{\n    \"variables\": {\n        \"chl\": { \n            \"dims\": [\"time\", \"y\", \"x\"]\n        }\n    }\n}\n</code></pre> <p>An error will be raised if a variable from a subsequent slice has different dimensions.</p>"},{"location":"guide/#variable-attributes","title":"Variable Attributes","text":"<p>Extra variable attributes can be provided using the <code>attrs</code> setting:</p> <pre><code>{\n    \"variables\": {\n        \"chl\": { \n            \"attrs\": {\n                \"units\": \"mg/m^3\",\n                \"long_name\": \"chlorophyll_concentration\"\n            }\n        }\n    }\n}\n</code></pre>"},{"location":"guide/#variable-encoding","title":"Variable Encoding","text":"<p>Encoding metadata specifies how array data is stored in the target dataset and includes storage data type, packing, chunking, and compression. Encoding metadata for a given variable is provided by the <code>encoding</code> setting. Since the encoding is often shared by multiple variables the wildcard  variable name <code>*</code> can often be of help.</p>"},{"location":"guide/#chunking","title":"Chunking","text":"<p>By default, the chunking of the coordinate variable corresponding to the append  dimension will be its dimension in the first slice dataset. Often, this will be one or a small number. Since <code>xarray</code> loads coordinates eagerly when opening a dataset, this can lead to performance issues if the target dataset is served from object storage such as S3. This is because, a separate HTTP request is  required for every single chunk. It is therefore very advisable to set the  chunks of that variable to a larger number using the <code>chunks</code> setting. For other variables, the chunking within the append dimension may stay small if desired:</p> <pre><code>{\n    \"variables\": {\n        \"time\": { \n            \"dims\": [\"time\"],\n            \"encoding\": {\n                \"chunks\": [1024]\n            }\n        },\n        \"chl\": { \n            \"dims\": [\"time\", \"y\", \"x\"],\n            \"encoding\": {\n                \"chunks\": [1, 2048, 2048]\n            }\n        }\n    }\n}\n</code></pre>"},{"location":"guide/#missing-values","title":"Missing Values","text":"<p>This section is a work in progress.</p>"},{"location":"guide/#compression","title":"Compression","text":"<p>This section is a work in progress.</p>"},{"location":"guide/#data-packing","title":"Data Packing","text":"<p>This section is a work in progress.</p> <ul> <li> <p>The target encoding should also allow for packing floating point data into    integer data with fewer bits using scaling factor and offset.</p> </li> <li> <p>If the target exists, the slice will be appended. Check if the slice to be    appended is last. If not, refuse to append (alternative: insert but this is    probably difficult or error prone).</p> </li> <li>Slices are appended in the order they are provided.</li> </ul>"},{"location":"guide/#data-io","title":"Data I/O","text":"<p>This section is a work in progress.</p> <ul> <li> <p><code>dry_run</code></p> </li> <li> <p><code>target_dir</code></p> </li> <li><code>target_storage_options</code></li> <li> <p><code>zarr_version</code></p> </li> <li> <p><code>slice_engine</code></p> </li> <li><code>slice_storage_options</code></li> <li> <p><code>persist_mem_slices</code></p> </li> <li> <p>If a slice is not yet available, wait and retry until it </p> <ul> <li>exists, and</li> <li>is complete.</li> </ul> </li> <li>Check for each slice that it is valid. A valid slice<ul> <li>is self-consistent, </li> <li>has the same structure as target, and</li> <li>has an append dimension whose size is equal to the target chunking of   this dimension.</li> </ul> </li> <li>Before appending a slice, lock the target so that another tool invocation    can recognize it, e.g., write a lock file.</li> <li>If the target is locked, either wait until it becomes available or exit    with an error. The behaviour is controlled by a tool option.</li> <li>After successfully appending a slice, remove the lock from the target.</li> <li>Appending a slice shall be an atomic operation to ensure target dataset    integrity. That is, in case a former append step failed, a rollback must   be performed to restore the last valid state of the target. Rolling back   shall take place after an append failed, or before a new slice is appended,   or to sanitize a target to make it usable again. Rolling back shall    include restoring all changed files, removing all added files,    and removing any locks. </li> <li>The tool shall allow for continuing appending slices at the point   it failed.</li> </ul>"},{"location":"guide/#slice-polling","title":"Slice Polling","text":"<p>This section is a work in progress.</p> <ul> <li><code>slice_polling</code></li> </ul>"},{"location":"guide/#transactions","title":"Transactions","text":"<p>This section is a work in progress.</p> <ul> <li><code>temp_dir</code></li> <li><code>temp_storage_options</code></li> <li><code>disable_rollback</code></li> </ul>"},{"location":"guide/#slice-data-types","title":"Slice Data Types","text":"<p>This section is a work in progress.</p>"},{"location":"guide/#logging","title":"Logging","text":"<p>The <code>zappend</code> logging configuration follows exactly the  Python dictionary schema of the Python module <code>logging.config</code>. The logger used by the <code>zappend</code> tool is named <code>zappend</code>. Note that you can also configure the logger of other Python modules, e.g., <code>xarray</code> or <code>dask</code> here.</p> <p>Given here is an example that logs <code>zappend</code>'s output to the console using  the INFO level:</p> <pre><code>{\n    \"logging\": {\n        \"version\": 1,\n        \"formatters\": {\n            \"normal\": {\n                \"format\": \"%(asctime)s %(levelname)s %(message)s\",\n                \"style\": \"%\"\n            }\n        },\n        \"handlers\": {\n            \"console\": {\n                \"class\": \"logging.StreamHandler\",\n                \"formatter\": \"normal\"\n            }\n        },\n        \"loggers\": {\n            \"zappend\": {\n                \"level\": \"INFO\",\n                \"handlers\": [\"console\"]\n            }\n        }\n    }\n}\n</code></pre>"},{"location":"intro/","title":"The zappend Tool","text":"<p><code>zappend</code> is a tool written in Python that is used for creating and updating  a Zarr dataset from smaller dataset slices. It is build on top of the awesome Python packages xarray  and zarr.</p>"},{"location":"intro/#motivation","title":"Motivation","text":"<p>The objective of <code>zappend</code> is to address recurring memory issues when  generating large geospatial datacubes using the  Zarr format by subsequently concatenating data slices along an append dimension, usually <code>time</code>. Each append step is atomic,  that is, the append operation is a transaction that can be rolled back,  in case the append operation fails. This ensures integrity of the target  data cube. </p>"},{"location":"intro/#features","title":"Features","text":"<p>The <code>zappend</code> tool provides the following features</p> <ul> <li>Transaction-based dataset appends: On failure during an append step,    the transaction is rolled back, so that the target dataset remains valid and    preserves its integrity.</li> <li>Filesystem transparency: The target dataset may be generated and updated in    any writable filesystems supported by the    fsspec package.    The same holds for the slice datasets to be appended.</li> <li>Slices polling: The tool can be configured to wait for slice datasets to    become available. </li> <li>CLI and Python API: The tool can be used in a shell using the <code>zappend</code>   command or from Python. When used from Python using the    <code>zappend()</code> function, slice datasets can be passed as local file paths,    URIs, or as in-memory datasets of type    xarray.Dataset.   Users can implement their own slice sources and provide them to the that provide    slice dataset objects and are disposed after each slice has been processed.</li> </ul>"},{"location":"intro/#how-it-works","title":"How It Works","text":"<p>At its core, <code>zappend</code> calls the  to_zarr() method of  xarray.Dataset  for each dataset slice it receives and either creates the target dataset if it does not  exist yet or updates it with the current slice, if it already exists.</p> <p>If there is no target dataset yet, <code>zappend</code> does the following:</p> <ul> <li>create target metadata from configuration and slice dataset;</li> <li>tailor slice according to target metadata and configuration;</li> <li>set encoding and attributes in slice according to target metadata;</li> <li>write target from slice.</li> </ul> <p>If target dataset exists, then <code>zappend</code> will:</p> <ul> <li>create target metadata from configuration and target dataset;</li> <li>create slice metadata from configuration and slice dataset;</li> <li>verify target and slice metadata are compatible;</li> <li>tailor slice according to target metadata and configuration;</li> <li>remove encoding and attributes from slice;</li> <li>update target from slice.</li> </ul>"},{"location":"requirements/","title":"Requirements","text":"<p>Given here are the original user requirements that have driven the development  of the <code>zappend</code> tool.</p>"},{"location":"requirements/#core-requirements","title":"Core Requirements","text":"<ul> <li>Create a target Zarr dataset by appending Zarr dataset slices along a    given append dimension, usually <code>time</code>.   </li> <li>The tool takes care of modifying the target dataset using the slices,   but doesn't care how the slice datasets are created.</li> <li>Slice datasets may be given as URIs with storage options or as    in-memory datasets of type    xarray.Dataset   or    xcube.core.mldataset.MultiLevelDataset.</li> <li>Target and slices are allowed to live in different filesystems.</li> <li>The tool is configurable. The configuration defines <ul> <li>the append dimension;</li> <li>optional target encoding for all or individual target variables;</li> <li>the target path into the target filesystem;</li> <li>optional target storage options;</li> <li>optional slice storage options.</li> </ul> </li> <li>The target chunking of the append dimension equals the size of the append    dimension in each slice and vice versa. </li> <li>The target encoding should allow for specifying the target storage chunking,    data type, and compression. </li> <li>The target encoding should also allow for packing floating point data into    integer data with fewer bits using scaling factor and offset.</li> <li>Detect coordinate variables and allow them to stay un-chunked.   This is important for coordinate variables containing or corresponding    to the append-dimension.</li> <li>If the target does not exist, it will be created from a copy of the first    slice. This first slice will specify any not-yet-configured properties   of the target dataset, e.g., the append dimension chunking.</li> <li>If the target exists, the slice will be appended. Check if the slice to be    appended is last. If not, refuse to append (alternative: insert but this is    probably difficult or error prone).</li> <li>Slices are appended in the order they are provided.</li> <li>If a slice is not yet available, wait and retry until it <ul> <li>exists, and</li> <li>is complete.</li> </ul> </li> <li>Check for each slice that it is valid. A valid slice<ul> <li>is self-consistent, </li> <li>has the same structure as target, and</li> <li>has an append dimension whose size is equal to the target chunking of   this dimension.</li> </ul> </li> <li>Before appending a slice, lock the target so that another tool invocation    can recognize it, e.g., write a lock file.</li> <li>If the target is locked, either wait until it becomes available or exit    with an error. The behaviour is controlled by a tool option.</li> <li>After successfully appending a slice, remove the lock from the target.</li> <li>Appending a slice shall be an atomic operation to ensure target dataset    integrity. That is, in case a former append step failed, a rollback must   be performed to restore the last valid state of the target. Rolling back   shall take place after an append failed, or before a new slice is appended,   or to sanitize a target to make it usable again. Rolling back shall    include restoring all changed files, removing all added files,    and removing any locks. </li> <li>The tool shall allow for continuing appending slices at the point   it failed.</li> <li>The tool shall offer a CLI and a Python API.<ul> <li>Using the CLI, slices are given as a variadic argument that provides the    file paths into the slice filesystem.</li> <li>Using the Python API, it shall be possible to provide the slices by    specifying a function that generates the slice datasets and an   iterable providing the arguments for the function.   This is similar how the Python <code>map()</code> built-in works.</li> </ul> </li> </ul>"},{"location":"requirements/#further-ideas","title":"Further Ideas","text":"<ul> <li>Allow for inserting and deleting slices.</li> <li>Allow specifying a constant delta between coordinates of the append dimension.</li> <li>Verify append dimension coordinates increase or decrease monotonically. </li> <li>Verify coordinate deltas of append dimension to be constant. </li> <li>Integration with xcube:<ul> <li>Add xcube server API: Add endpoint to xcube server that works similar    to the CLI and also uses a similar request parameters.</li> <li>Use it in xcube data stores for the <code>write_data()</code> method, as a parameter    to enforce sequential writing of Zarr datasets as a robust option when a    plain write fails.</li> </ul> </li> </ul>"},{"location":"start/","title":"Getting Started","text":""},{"location":"start/#installation","title":"Installation","text":"<pre><code>pip install zappend\n</code></pre>"},{"location":"start/#using-the-cli","title":"Using the CLI","text":"<p>Get usage help:</p> <pre><code>zappend --help\n</code></pre> <p>Get configuration help: </p> <pre><code>zappend --help-config md\n</code></pre> <p>Process list of local slice paths:</p> <pre><code>zappend --target target.zarr slice-1.nc slice-2.nc slice-3.nc\n</code></pre> <p>Process list of local slice paths with configuration in <code>config.yaml</code>:</p> <pre><code>zappend --config config.yaml slice-1.nc slice-2.nc slice-3.nc\n</code></pre>"},{"location":"start/#using-the-python-api","title":"Using the Python API","text":"<p>Process list of local slice paths:</p> <pre><code>from zappend.api import zappend\n\nzappend([\"slice-1.nc\", \"slice-2.nc\", \"slice-3.nc\"], target_dir=\"target.zarr\")\n</code></pre> <p>Process list of local slice paths with configuration:</p> <pre><code>from zappend.api import zappend\n\nconfig = { \"target_dir\": \"target.zarr\" }\n\nzappend([\"slice-1.nc\", \"slice-2.nc\", \"slice-3.nc\"], config=config)\n</code></pre> <p>Process slice paths in S3 with slice generator and configuration:</p> <pre><code>import numpy as np\nimport xarray as xr\nfrom zappend.api import zappend\n\nconfig = { \"target_dir\": \"target.zarr\" }\n\ndef get_mean_time(slice_ds: xr.Dataset) -&gt; xr.DataArray:\n    time = slice_ds.time\n    t0 = time[0]\n    dt = time[-1] - t0\n    return xr.DataArray(np.array([t0 + dt / 2], \n                                 dtype=slice_ds.time.dtype), \n                        dims=\"time\")\n\ndef get_mean_slice(slice_ds: xr.Dataset) -&gt; xr.Dataset: \n    mean_slice_ds = slice_ds.mean(\"time\")\n    mean_slice_ds = mean_slice_ds.expand_dims(\"time\", axis=0)\n    mean_slice_ds.coords[\"time\"] = get_mean_time(slice_ds)\n    return mean_slice_ds \n\ndef get_slices(slice_paths: list[str]):\n    for slice_path in slice_paths:\n        ds = xr.open_dataset(\"s3://mybucket/eodata/\" + slice_path)\n        yield get_mean_slice(ds) \n\nzappend(get_slices([\"slice-1.nc\", \"slice-2.nc\", \"slice-3.nc\"]),\n        config=config)\n</code></pre>"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"The zappend Tool","text":"<p><code>zappend</code> is a tool written in Python that is used for creating and updating  a Zarr dataset from smaller dataset slices. It is build on top of the awesome Python packages xarray  and zarr.</p>"},{"location":"#motivation","title":"Motivation","text":"<p>The objective of <code>zappend</code> is to address recurring memory issues when  generating large geospatial datacubes using the  Zarr format by subsequently concatenating data slices along an append dimension, usually <code>time</code>. Each append step is atomic,  that is, the append operation is a transaction that can be rolled back,  in case the append operation fails. This ensures integrity of the target  data cube. </p>"},{"location":"#features","title":"Features","text":"<p>The <code>zappend</code> tool provides the following features</p> <ul> <li>Transaction-based dataset appends: On failure during an append step,    the transaction is rolled back, so that the target dataset remains valid and    preserves its integrity.</li> <li>Filesystem transparency: The target dataset may be generated and updated in    any writable filesystems supported by the    fsspec package.    The same holds for the slice datasets to be appended.</li> <li>Slices polling: The tool can be configured to wait for slice datasets to    become available. </li> <li>CLI and Python API: The tool can be used in a shell using the <code>zappend</code>   command or from Python. When used from Python using the    <code>zappend()</code> function, slice datasets can be passed as local file paths,    URIs, or as in-memory datasets of type    xarray.Dataset.   Users can implement their own slice sources and provide them to the that provide    slice dataset objects and are disposed after each slice has been processed.</li> </ul>"},{"location":"#how-it-works","title":"How It Works","text":"<p>At its core, <code>zappend</code> calls the  to_zarr() method of  xarray.Dataset  for each dataset slice it receives and either creates the target dataset if it does not  exist yet or updates it with the current slice, if it already exists.</p> <p>If there is no target dataset yet, <code>zappend</code> does the following:</p> <ul> <li>create target metadata from configuration and slice dataset;</li> <li>tailor slice according to target metadata and configuration;</li> <li>set encoding and attributes in slice according to target metadata;</li> <li>write target from slice.</li> </ul> <p>If target dataset exists, then <code>zappend</code> will:</p> <ul> <li>create target metadata from configuration and target dataset;</li> <li>create slice metadata from configuration and slice dataset;</li> <li>verify target and slice metadata are compatible;</li> <li>tailor slice according to target metadata and configuration;</li> <li>remove encoding and attributes from slice;</li> <li>update target from slice.</li> </ul>"},{"location":"about/","title":"About zappend","text":""},{"location":"about/#changelog","title":"Changelog","text":"<p>You can find the complete <code>zappend</code> changelog  here. </p>"},{"location":"about/#reporting","title":"Reporting","text":"<p>If you have suggestions, ideas, feature requests, or if you have identified a malfunction or error, then please  post an issue. </p>"},{"location":"about/#contributions","title":"Contributions","text":"<p>The <code>zappend</code> project welcomes contributions of any form as long as you respect our  code of conduct and follow our  contribution guide.</p> <p>If you'd like to submit code or documentation changes, we ask you to provide a  pull request (PR)  here.  For code and configuration changes, your PR must be linked to a  corresponding issue. </p>"},{"location":"about/#development","title":"Development","text":"<p>Setup development environment:</p> <pre><code>pip install -r requirements.txt\npip install -r requirements-dev.txt\npip install -r requirements-docs.txt\n</code></pre>"},{"location":"about/#testing-and-coverage","title":"Testing and Coverage","text":"<p><code>zappend</code> uses pytest for unit-level testing  and code coverage analysis.</p> <pre><code>pytest --cov=zappend tests\n</code></pre>"},{"location":"about/#code-style","title":"Code Style","text":"<p><code>zappend</code> source code is formatted using the black tool.</p> <pre><code>black zappend\n</code></pre>"},{"location":"about/#documentation","title":"Documentation","text":"<p><code>zappend</code> documentation is build using the mkdocs tool.</p> <pre><code>pip install -r requirements-doc.txt\n\nmkdocs build\nmkdocs serve\nmkdocs gh-deploy\n</code></pre>"},{"location":"about/#license","title":"License","text":"<p><code>zappend</code> is open source made available under the terms and conditions of the  MIT License.</p> <p>Copyright \u00a9 2024 Brockmann Consult Development</p>"},{"location":"api/","title":"Python API reference","text":"<p>All described objects can be imported from the <code>zappend.api</code> module.</p>"},{"location":"api/#function-zappend","title":"Function <code>zappend</code>","text":"<p>Create or update a Zarr dataset from dataset slices.</p> <p>Parameters:</p> Name Type Description Default <code>slices</code> <code>Iterable[SliceObj | SliceFactory]</code> <p>An iterable that yields slice objects. A slice object is either a <code>str</code>, <code>xarray.Dataset</code>, <code>SliceSource</code> or a factory function that returns a slice object. If <code>str</code> is used, it is interpreted as local dataset path or dataset URI. If a URI is used, protocol-specific parameters apply, given by configuration parameter <code>slice_storage_options</code>.</p> required <code>config</code> <code>ConfigLike</code> <p>Processor configuration. May be a file path or URI, a <code>dict</code>, <code>None</code>, or a sequence of the aforementioned. If a sequence is used, subsequent configurations are incremental to the previous ones.</p> <code>None</code> <code>kwargs</code> <code>Any</code> <p>Additional configuration parameters. Can be used to pass or override configuration values in config.</p> <code>{}</code>"},{"location":"api/#class-slicesource","title":"Class <code>SliceSource</code>","text":"<p>             Bases: <code>ABC</code></p> <p>Represents a source for a slice dataset. Instances of this class are supposed to be used as context managers. The context manager provides the dataset instance by calling the get_dataset() method. When the context manager exits, the dispose() method is called.</p> <p>You may implement your own slice source class and define a slice source factory function that creates instances of your slice source. Such functions can be passed input to the zappend() function, usually in the form of a closure to capture slice-specific information.</p> <p>Parameters:</p> Name Type Description Default <code>ctx</code> <code>Context</code> <p>The processing context.</p> required"},{"location":"api/#zappend.slice.abc.SliceSource.ctx","title":"<code>ctx: Context</code>  <code>property</code>","text":"<p>The processing context passed to the constructor.</p>"},{"location":"api/#zappend.slice.abc.SliceSource.dispose","title":"<code>dispose()</code>","text":"<p>Dispose this slice source. This should include cleaning up of used resources.</p> <p>This method is not intended to be called directly. Instead, instances of this class are context managers and should be used as such.</p>"},{"location":"api/#zappend.slice.abc.SliceSource.get_dataset","title":"<code>get_dataset()</code>  <code>abstractmethod</code>","text":"<p>Open this slice source and return the dataset instance.</p> <p>This method is not intended to be called directly. Instead, instances of this class are context managers and should be used as such.</p> <p>It should return a dataset that is compatible with target dataset:</p> <ul> <li>slice must have same fixed dimensions;</li> <li>append dimension must exist in slice.</li> </ul> <p>Returns:</p> Type Description <code>Dataset</code> <p>A slice dataset.</p>"},{"location":"api/#class-context","title":"Class <code>Context</code>","text":"<p>Provides access to configuration values and values derived from it.</p> <p>Parameters:</p> Name Type Description Default <code>config</code> <code>Dict[str, Any]</code> <p>A validated configuration.</p> required <p>Raises:</p> Type Description <code>ValueError</code> <p>If <code>target_dir</code> is missing in the configuration.</p>"},{"location":"api/#zappend.context.Context.append_dim_name","title":"<code>append_dim_name: str</code>  <code>property</code>","text":"<p>The configured append dimension.</p>"},{"location":"api/#zappend.context.Context.disable_rollback","title":"<code>disable_rollback: bool</code>  <code>property</code>","text":"<p>Whether to disable transaction rollbacks.</p>"},{"location":"api/#zappend.context.Context.dry_run","title":"<code>dry_run: bool</code>  <code>property</code>","text":"<p>Whether to run in dry mode.</p>"},{"location":"api/#zappend.context.Context.persist_mem_slices","title":"<code>persist_mem_slices: bool</code>  <code>property</code>","text":"<p>Whether to persist in-memory slice datasets.</p>"},{"location":"api/#zappend.context.Context.slice_engine","title":"<code>slice_engine: str | None</code>  <code>property</code>","text":"<p>The configured slice engine to be used if a slice object is not a Zarr. If defined, it will be passed to the <code>xarray.open_dataset()</code> function.</p>"},{"location":"api/#zappend.context.Context.slice_polling","title":"<code>slice_polling: tuple[float, float] | tuple[None, None]</code>  <code>property</code>","text":"<p>The configured slice dataset polling. If slice polling is enabled, returns tuple (interval, timeout) in seconds, otherwise, return (None, None).</p>"},{"location":"api/#zappend.context.Context.slice_storage_options","title":"<code>slice_storage_options: dict[str, Any] | None</code>  <code>property</code>","text":"<p>The configured slice storage options to be used if a slice object is a Zarr.</p>"},{"location":"api/#zappend.context.Context.target_dir","title":"<code>target_dir: FileObj</code>  <code>property</code>","text":"<p>The configured target directory.</p>"},{"location":"api/#zappend.context.Context.target_metadata","title":"<code>target_metadata: DatasetMetadata | None</code>  <code>property</code> <code>writable</code>","text":"<p>The metadata for the target dataset. May be <code>None</code> while the target dataset hasn't been created yet. Will be set, once the target dataset has been created from the first slice dataset.</p>"},{"location":"api/#zappend.context.Context.temp_dir","title":"<code>temp_dir: FileObj</code>  <code>property</code>","text":"<p>The configured directory used for temporary files such as rollback data.</p>"},{"location":"api/#zappend.context.Context.zarr_version","title":"<code>zarr_version: int</code>  <code>property</code>","text":"<p>The configured Zarr version for the target dataset.</p>"},{"location":"api/#zappend.context.Context.get_dataset_metadata","title":"<code>get_dataset_metadata(dataset)</code>","text":"<p>Get the dataset metadata from configuration and the given dataset.</p> <p>Parameters:</p> Name Type Description Default <code>dataset</code> <code>Dataset</code> <p>The dataset</p> required <p>Returns:</p> Type Description <code>DatasetMetadata</code> <p>The dataset metadata</p>"},{"location":"api/#class-fileobj","title":"Class <code>FileObj</code>","text":"<p>An object that represents a file or directory in some filesystem.</p> <p>Parameters:</p> Name Type Description Default <code>uri</code> <code>str</code> <p>The file or directory URI</p> required <code>storage_options</code> <code>dict[str, Any] | None</code> <p>Optional storage options specific to the protocol of the URI</p> <code>None</code> <code>fs</code> <code>AbstractFileSystem | None</code> <p>Optional fsspec filesystem instance. Use with care, the filesystem must be consistent with uri and storage_options. For internal use only.</p> <code>None</code> <code>path</code> <code>str | None</code> <p>The path info the filesystem fs. Use with care, the path must be consistent with uri. For internal use only.</p> <code>None</code>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.filename","title":"<code>filename: str</code>  <code>property</code>","text":"<p>The filename part of the URI.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.fs","title":"<code>fs: fsspec.AbstractFileSystem</code>  <code>property</code>","text":"<p>The filesystem.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.parent","title":"<code>parent: FileObj</code>  <code>property</code>","text":"<p>The parent file object.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.path","title":"<code>path: str</code>  <code>property</code>","text":"<p>The path of the file or directory into the filesystem.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.storage_options","title":"<code>storage_options: dict[str, Any] | None</code>  <code>property</code>","text":"<p>Storage options for creating the filesystem object.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.uri","title":"<code>uri: str</code>  <code>property</code>","text":"<p>The URI.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.__truediv__","title":"<code>__truediv__(rel_path)</code>","text":"<p>Overriden to call for_path(rel_path).</p> <p>Parameters:</p> Name Type Description Default <code>rel_path</code> <code>str</code> <p>Relative path to append.</p> required"},{"location":"api/#zappend.fsutil.fileobj.FileObj.close","title":"<code>close()</code>","text":"<p>Close the filesystem used by this file object.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.delete","title":"<code>delete(recursive=False)</code>","text":"<p>Delete the file or directory represented by this file object.</p> <p>Parameters:</p> Name Type Description Default <code>recursive</code> <code>bool</code> <p>Set to <code>True</code> to delete a non-empty directory.</p> <code>False</code>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.exists","title":"<code>exists()</code>","text":"<p>Check if the file or directory represented by this file object exists.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.for_path","title":"<code>for_path(rel_path)</code>","text":"<p>Gets a new file object for the given relative path.</p> <p>Parameters:</p> Name Type Description Default <code>rel_path</code> <code>str</code> <p>Relative path to append.</p> required <p>Returns:</p> Type Description <code>FileObj</code> <p>A new file object</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.mkdir","title":"<code>mkdir()</code>","text":"<p>Create the directory represented by this file object.</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.read","title":"<code>read(mode='rb')</code>","text":"<p>Read the contents of the file represented by this file object.</p> <p>Parameters:</p> Name Type Description Default <code>mode</code> <code>Literal['rb'] | Literal['r']</code> <p>Read mode, must be \"rb\" or \"r\"</p> <code>'rb'</code> <p>Returns:</p> Type Description <code>bytes | str</code> <p>The contents of the file either as <code>bytes</code> if mode is \"rb\" or as <code>str</code></p> <code>bytes | str</code> <p>if mode is \"r\".</p>"},{"location":"api/#zappend.fsutil.fileobj.FileObj.write","title":"<code>write(data, mode=None)</code>","text":"<p>Write the contents of the file represented by this file object.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>str | bytes</code> <p>The data to write.</p> required <code>mode</code> <code>Literal['wb'] | Literal['w'] | Literal['ab'] | Literal['a'] | None</code> <p>Write mode, must be \"wb\", \"w\", \"ab\", or \"a\".</p> <code>None</code> <p>Returns:</p> Type Description <code>int</code> <p>The number of bytes written.</p>"},{"location":"api/#types","title":"Types","text":""},{"location":"api/#zappend.slice.common.SliceObj","title":"<code>zappend.slice.common.SliceObj = str | FileObj | xr.Dataset | SliceSource</code>  <code>module-attribute</code>","text":"<p>The possible types that can represent a slice dataset.</p>"},{"location":"api/#zappend.slice.common.SliceFactory","title":"<code>zappend.slice.common.SliceFactory = Callable[[Context], SliceObj]</code>  <code>module-attribute</code>","text":"<p>Tne type for a factory function that returns a slice object for a given processing context.</p>"},{"location":"api/#zappend.config.ConfigItem","title":"<code>zappend.config.ConfigItem = FileObj | str | dict[str, Any]</code>  <code>module-attribute</code>","text":""},{"location":"api/#zappend.config.ConfigList","title":"<code>zappend.config.ConfigList = list[ConfigItem] | tuple[ConfigItem]</code>  <code>module-attribute</code>","text":""},{"location":"api/#zappend.config.ConfigLike","title":"<code>zappend.config.ConfigLike = ConfigItem | ConfigList | None</code>  <code>module-attribute</code>","text":"<p>The possible types used to represent processor configuration.</p>"},{"location":"cli/","title":"Command Line Interface Reference","text":"<pre><code>Usage: zappend [OPTIONS] [SLICES]...\n\n  Create or update a Zarr dataset TARGET from slice datasets SLICES.\n\nOptions:\n  -c, --config CONFIG    Configuration JSON or YAML file. If multiple are\n                         passed, subsequent configurations are incremental to\n                         the previous ones.\n  -t, --target TARGET    Target Zarr dataset path or URI. Overrides the\n                         'target_dir' configuration field.\n  --dry-run              Run the tool without creating, changing, or deleting\n                         any files.\n  --help-config json|md  Show configuration help and exit.\n  --help                 Show this message and exit.\n</code></pre>"},{"location":"config/","title":"Configuration Reference","text":""},{"location":"config/#version","title":"<code>version</code>","text":"<p>Configuration schema version. Allows the schema to evolve while still preserving backwards compatibility. Its value is <code>1</code>.</p>"},{"location":"config/#target_dir","title":"<code>target_dir</code>","text":"<p>Type string. The URI or local path of the target Zarr dataset. Must be a directory.</p>"},{"location":"config/#target_storage_options","title":"<code>target_storage_options</code>","text":"<p>Type object. Options for the filesystem given by the URI of <code>target_dir</code>.</p>"},{"location":"config/#slice_engine","title":"<code>slice_engine</code>","text":"<p>Type string. The name of the engine to be used for opening contributing datasets. Refer to the <code>engine</code> argument of the function <code>xarray.open_dataset()</code>.</p>"},{"location":"config/#slice_storage_options","title":"<code>slice_storage_options</code>","text":"<p>Type object. Options for the filesystem given by the protocol of the URIs of contributing datasets.</p>"},{"location":"config/#slice_polling","title":"<code>slice_polling</code>","text":"<p>Defines how to poll for contributing datasets. Must be one of the following: * No polling, fail immediately if dataset is not available.   Its value is <code>false</code>. * Poll using default values.   Its value is <code>true</code>. * Type object.   Polling parameters.   The keys <code>interval</code>, <code>timeout</code> are required.</p> <ul> <li><code>interval</code>:     Type number.     Polling interval in seconds.     Defaults to <code>2</code>.</li> <li><code>timeout</code>:     Type number.     Polling timeout in seconds.     Defaults to <code>60</code>.</li> </ul>"},{"location":"config/#temp_dir","title":"<code>temp_dir</code>","text":"<p>Type string. The URI or local path of the directory that will be used to temporarily store rollback information.</p>"},{"location":"config/#temp_storage_options","title":"<code>temp_storage_options</code>","text":"<p>Type object. Options for the filesystem given by the protocol of <code>temp_dir</code>.</p>"},{"location":"config/#zarr_version","title":"<code>zarr_version</code>","text":"<p>The Zarr version to be used. Its value is <code>2</code>.</p>"},{"location":"config/#fixed_dims","title":"<code>fixed_dims</code>","text":"<p>Type object. Specifies the fixed dimensions of the target dataset. Keys are dimension names, values are dimension sizes.</p> <p>The object's values are of type integer.</p>"},{"location":"config/#append_dim","title":"<code>append_dim</code>","text":"<p>Type string. The name of the variadic append dimension. Defaults to <code>\"time\"</code>.</p>"},{"location":"config/#variables","title":"<code>variables</code>","text":"<p>Type object. Defines dimensions, encoding, and attributes for variables in the target dataset. Object property names refer to variable names. The special name <code>*</code> refers to all variables, which is useful for defining common values.</p> <p>The object's values are of type object. Variable metadata.</p> <ul> <li><code>dims</code>:   Type array.   The names of the variable's dimensions in the given order. Each dimension must exist in contributing datasets.</li> </ul> <p>The array's items are of type string. * <code>encoding</code>:   Type object.   Variable storage encoding. Settings given here overwrite the encoding settings of the first contributing dataset.</p> <ul> <li><code>dtype</code>:     Storage data type     Must be one of <code>\"int8\", \"uint8\", \"int16\", \"uint16\", \"int32\", \"uint32\", \"int64\", \"uint64\", \"float32\", \"float64\"</code>.</li> <li> <p><code>chunks</code>:     Storage chunking.     Must be one of the following:</p> <ul> <li>Type array.   Chunk sizes in the order of the dimensions.</li> </ul> <p>The array's items are of type integer. * Disable chunking.   Its value is <code>null</code>.   * <code>fill_value</code>: Storage fill value. Must be one of the following: * Type number.   A number of type and unit of the given storage <code>dtype</code>. * Not-a-number. Can be used only if storage <code>dtype</code> is <code>float32</code> or <code>float64</code>.   Its value is <code>\"NaN\"</code>. * No fill value.   Its value is <code>null</code>.   * <code>scale_factor</code>: Type number. Scale factor for computing the in-memory value: <code>memory_value = scale_factor * storage_value + add_offset</code>.   * <code>add_offset</code>: Type number. Add offset for computing the in-memory value: <code>memory_value = scale_factor * storage_value + add_offset</code>.   * <code>units</code>: Type string. Units of the storage data type if memory data type is date/time.   * <code>calendar</code>: Type string. The calendar to be used if memory data type is date/time.   * <code>compressor</code>: Type array | null. Compressor definition. Set to <code>null</code> to disable data compression. The key <code>id</code> is required.</p> <ul> <li><code>id</code>:   Type string.</li> </ul> </li> <li> <p><code>filters</code>:     Type array | null.     Filters. Set to <code>null</code> to not use filters.</p> <p>The array's items are of type object. Filter definition. The key <code>id</code> is required.</p> <ul> <li><code>id</code>:   Type string.</li> </ul> </li> <li> <p><code>attrs</code>:   Type object.   Arbitrary variable metadata attributes.</p> </li> </ul>"},{"location":"config/#included_variables","title":"<code>included_variables</code>","text":"<p>Type array. Specifies the names of variables to be included in the target dataset. Defaults to all variables found in the first contributing dataset.</p> <p>The array's items are of type string.</p>"},{"location":"config/#excluded_variables","title":"<code>excluded_variables</code>","text":"<p>Type array. Specifies the names of individual variables to be excluded  from all contributing datasets.</p> <p>The array's items are of type string.</p>"},{"location":"config/#persist_mem_slices","title":"<code>persist_mem_slices</code>","text":"<p>Type boolean. Persist in-memory slices and reopen from a temporary Zarr before appending them to the target dataset. This can prevent expensive re-computation of dask chunks at the cost of additional i/o. Defaults to <code>false</code>.</p>"},{"location":"config/#disable_rollback","title":"<code>disable_rollback</code>","text":"<p>Type boolean. Disable rolling back dataset changes on failure. Effectively disables transactional dataset modifications, so use this setting with care. Defaults to <code>false</code>.</p>"},{"location":"config/#dry_run","title":"<code>dry_run</code>","text":"<p>Type boolean. If 'true', log only what would have been done, but don't apply any changes. Defaults to <code>false</code>.</p>"},{"location":"config/#logging","title":"<code>logging</code>","text":"<p>Type object. Logging configuration. For details refer to the dictionary schema of the Python module <code>logging.config</code>. The key <code>version</code> is required.</p> <ul> <li><code>version</code>:   Logging schema version.   Its value is <code>1</code>.</li> <li><code>formatters</code>:   Type object.   Formatter definitions. Each key is a formatter id and each value is an object describing how to configure the corresponding formatter.</li> </ul> <p>The object's values are of type object.   Formatter configuration.</p> <ul> <li><code>format</code>:     Type string.     Format string in the given <code>style</code>.     Defaults to <code>\"%(message)s\"</code>.</li> <li><code>datefmt</code>:     Type string.     Format string in the given <code>style</code> for the date/time portion.     Defaults to <code>\"%Y-%m-%d %H:%M:%S,uuu\"</code>.</li> <li> <p><code>style</code>:     Determines how the format string will be merged with its data.     Must be one of <code>\"%\", \"{\", \"$\"</code>.</p> </li> <li> <p><code>filters</code>:   Type object.   Filter definitions. Each key is a filter id and each value is a dict describing how to configure the corresponding filter.</p> </li> </ul> <p>The object's values are of type object.   Filter configuration. * <code>handlers</code>:   Type object.   Handler definitions. Each key is a handler id and each value is an object describing how to configure the corresponding handler.</p> <p>The object's values are of type object.   Handler configuration. All keys other than the following are passed through as keyword arguments to the handler's constructor.   The key <code>class</code> is required.</p> <ul> <li><code>class</code>:     Type string.     The fully qualified name of the handler class. See logging handlers.</li> <li><code>level</code>:     The level of the handler.     Must be one of <code>\"CRITICAL\", \"ERROR\", \"WARNING\", \"INFO\", \"DEBUG\", \"NOTSET\"</code>.</li> <li><code>formatter</code>:     Type string.     The id of the formatter for this handler.</li> <li> <p><code>filters</code>:     Type array.     A list of ids of the filters for this logger.</p> <p>The array's items are of type string.</p> </li> <li> <p><code>loggers</code>:   Type object.   Logger definitions. Each key is a logger name and each value is an object describing how to configure the corresponding logger. The tool's logger has the id <code>'zappend'</code>.</p> </li> </ul> <p>The object's values are of type object.   Logger configuration.</p> <ul> <li><code>level</code>:     The level of the logger.     Must be one of <code>\"CRITICAL\", \"ERROR\", \"WARNING\", \"INFO\", \"DEBUG\", \"NOTSET\"</code>.</li> <li><code>propagate</code>:     Type boolean.     The propagation setting of the logger.</li> <li> <p><code>filters</code>:     Type array.     A list of ids of the filters for this logger.</p> <p>The array's items are of type string.   * <code>handlers</code>: Type array. A list of ids of the handlers for this logger.</p> <p>The array's items are of type string.</p> </li> </ul>"},{"location":"guide/","title":"User Guide","text":"<p>Both the <code>zappend</code> CLI command and the Python function can be run  without any further configuration except the target dataset path and the  slice dataset paths that contribute to the datacube to be generated. The target dataset  path must point to a directory that will contain a Zarr group to be created and  updated. The slice dataset paths may be provided as Zarr as well or in other data  formats supported by the xarray.open_dataset() function.  The target and slice dataset are allowed to live in different filesystems.  Additional filesystem storage options may be specified via the tool's configuration.</p> <p>The tool takes care of generating the target dataset from slice datasets, but doesn't  care how the slice datasets are created. Hence, when using the Python <code>zappend()</code>  function, the slice datasets can be provided in various forms. More on this below.</p> <p>Note</p> <p>We use the term Dataset in the same way <code>xarray</code> does: A dataset comprises any  number of multidimensional Data Variables, and usually 1-dimensional  Coordinate Variables that provide the labels for the dimensions used by the data  variables. A variable comprises the actual data array as well as metadata describing  the data dimensions, units, and encoding, such as chunking and compression.</p>"},{"location":"guide/#dataset-outline","title":"Dataset Outline","text":"<p>If no further configuration is supplied, then the target dataset's outline and data  encoding is fully prescribed by the first slice dataset provided. By default, the  dimension along subsequent slice datasets are concatenated is <code>time</code>. If you use a  different append dimension, the <code>append_dim</code> setting can be used to specify its name:</p> <pre><code>{\n    \"append_dim\": \"depth\"\n}\n</code></pre> <p>All other non-variadic dimensions can and should be specified using the <code>fixed_dims</code> setting which is a mapping from dimension name to the  fixed dimension sizes, e.g.:</p> <pre><code>{\n    \"fixed_dims\": {\n        \"x\": 16384,\n        \"y\": 8192\n    }\n}\n</code></pre> <p>By default, without further configuration, all data variables seen in the first dataset slice will be included in the target dataset. If only a subset of  variables shall be used from the slice dataset, they can be specified using the <code>included_variables</code> setting, which is a list of names of variables that will  be included:</p> <pre><code>{\n    \"included_variables\": [\n        \"time\", \"y\", \"x\",\n        \"chl\", \n        \"tsm\"\n    ]\n}\n</code></pre> <p>Often, it is easier to tell which variables should be excluded:</p> <pre><code>{\n    \"excluded_variables\": [\"GridCellId\"]\n}\n</code></pre>"},{"location":"guide/#variable-metadata","title":"Variable Metadata","text":"<p>Without any additional configuration, <code>zappend</code> uses the dimensions, attributes,  and encoding information from the data variables of the first slice dataset.  Encoding information is used only to the extent applicable to the Zarr format. Non-applicable encoding information will be reported by a warning log record  but is otherwise ignored. </p> <p>Variable metadata can be specified by the <code>variables</code> setting, which is a  mapping from variable name to a mapping that provides the dimensions, attributes,  and encoding information of data variables for the target dataset. All such  information is optional. The provided settings will be merged with the information retrieved from the data variables with same name included in the first dataset slice.</p> <p>A special \"variable name\" is the wildcard <code>*</code> that can be used to define default values for all variables:</p> <pre><code>{\n    \"variables\": {\n        \"*\": { \n        }\n    }\n}\n</code></pre> <p>If <code>*</code> is specified, the effective variable metadata applied is gained by merging a  given specific metadata, into the common metadata given by <code>*</code>, which is eventually  merged into metadata of the variable in the first dataset slice.</p> <p>Note</p> <p>Only metadata from the first slice dataset is used, metadata of variables from  subsequent slice datasets is ignored entirely.</p>"},{"location":"guide/#variable-dimensions","title":"Variable Dimensions","text":"<p>To ensure a slice variable has the expected dimensionality and shape, the <code>dims</code>  setting is used. The following example defines the dimensions of a data variable  named <code>chl</code> (Chlorophyll):</p> <pre><code>{\n    \"variables\": {\n        \"chl\": { \n            \"dims\": [\"time\", \"y\", \"x\"]\n        }\n    }\n}\n</code></pre> <p>An error will be raised if a variable from a subsequent slice has different dimensions.</p>"},{"location":"guide/#variable-attributes","title":"Variable Attributes","text":"<p>Extra variable attributes can be provided using the <code>attrs</code> setting:</p> <pre><code>{\n    \"variables\": {\n        \"chl\": { \n            \"attrs\": {\n                \"units\": \"mg/m^3\",\n                \"long_name\": \"chlorophyll_concentration\"\n            }\n        }\n    }\n}\n</code></pre>"},{"location":"guide/#variable-encoding","title":"Variable Encoding","text":"<p>Encoding metadata specifies how array data is stored in the target dataset and includes  storage data type, packing, chunking, and compression. Encoding metadata for a given  variable is provided by the <code>encoding</code> setting. Since the encoding is often shared by  multiple variables the wildcard variable name <code>*</code> can often be of help.</p> <p>Verify encoding is as expected</p> <p>To verify that <code>zappend</code> uses the expected encoding for your variables create a  target dataset for testing from your first slice dataset and open it using  <code>ds = xarray.open_zarr(target_dir, decode_cf=False)</code>. Then inspect dataset <code>ds</code>  using the Python console or Jupyter Notebook (attribute <code>ds.&lt;var&gt;.encoding</code>). You can also inspect the Zarr directly by opening the <code>&lt;target_dir&gt;/&lt;var&gt;/.zarray</code> or <code>&lt;target_dir&gt;/.zmetadata</code> metadata JSON files.    </p>"},{"location":"guide/#chunking","title":"Chunking","text":"<p>By default, the chunking of the coordinate variable corresponding to the append  dimension will be its dimension in the first slice dataset. Often, this will be one or  a small number. Since <code>xarray</code> loads coordinates eagerly when opening a dataset, this  can lead to performance issues if the target dataset is served from object storage such  as S3. This is because, a separate HTTP request is required for every single chunk. It  is therefore very advisable to set the chunks of that variable to a larger number using  the <code>chunks</code> setting. For other variables, the chunking within the append dimension may  stay small if desired:</p> <pre><code>{\n    \"variables\": {\n        \"time\": { \n            \"dims\": [\"time\"],\n            \"encoding\": {\n                \"chunks\": [1024]\n            }\n        },\n        \"chl\": { \n            \"dims\": [\"time\", \"y\", \"x\"],\n            \"encoding\": {\n                \"chunks\": [1, 2048, 2048]\n            }\n        }\n    }\n}\n</code></pre>"},{"location":"guide/#missing-data","title":"Missing Data","text":"<p>To indicate missing data in a variable data array, a dedicated no-data or missing value  can be specified by the <code>fill_value</code> setting. The value is given in a variable's storage  type and storage units, see next section Data Packing.</p> <pre><code>{\n    \"variables\": {\n        \"chl\": { \n            \"encoding\": {\n                \"fill_value\": -999\n            }\n        }\n    }\n}\n</code></pre> <p>If the <code>fill_value</code> is not specified, the default is <code>NaN</code> (given as string <code>\"NaN\"</code>  in JSON) if the storage data type is floating point; it is <code>None</code> (<code>null</code> in JSON)  if the storage data types is integer, which effectively means, no fill value is used.  You can also explicitly set <code>fill_value</code> to <code>null</code> (<code>None</code> in Python) to not use one.</p> <p>Setting the <code>fill_value</code> for a variable can be important for saving storage space and  improving data I/O performance in many cases, because <code>zappend</code> does not write empty  array chunks - chunks that comprise missing data only, i.e.,  <code>slice.to_zarr(target_dir, write_empty_chunks=False, ...)</code>.</p>"},{"location":"guide/#data-packing","title":"Data Packing","text":"<p>Data packing refers to a simple lossy data compression method where 32- or 64-bit  floating point values are linearly scaled so that their value range can be fully or  partially represented by a lower precision integer data type. Packed values usually also give higher compression rates when using a <code>compressor</code>, see next section.</p> <p>Data packing is specified using the <code>scale_factor</code> and <code>add_offset</code> settings together with the storage data type setting <code>dtype</code>. The settings should be given as a triple:</p> <pre><code>{\n    \"variables\": {\n        \"chl\": { \n            \"encoding\": {\n                \"dtype\": \"int16\",\n                \"scale_factor\": 0.005,\n                \"add_offset\": 0.0\n            }\n        }\n    }\n}\n</code></pre> <p>The in-memory value in its physical units for a given encoded value in storage is  computed according to </p> <pre><code>memory_value = scale_factor * storage_value + add_offset\n</code></pre> <p>Hence, the encoded value is computed from an in-memory value in physical units as</p> <pre><code>storage_value = (memory_value - add_offset) / scale_factor\n</code></pre> <p>You can compute <code>scale_factor</code> and <code>add_offset</code> from given data range in physical units according to</p> <pre><code>  add_offset = memory_value_min\n  scale_factor = (memory_value_max - memory_value_min) / (2 ** num_bits - 1)\n</code></pre> <p>with <code>num_bits</code> being the number of bits for the integer type to be used.</p>"},{"location":"guide/#compression","title":"Compression","text":"<p>Data compression is specified by the <code>compressor</code> setting, optionally paired with the <code>filters</code> setting: </p> <pre><code>{\n    \"variables\": {\n        \"chl\": { \n            \"encoding\": {\n                \"compressor\": {},\n                \"filters\": []\n            }\n        }\n    }\n}\n</code></pre> <p>By default, <code>zappend</code> uses default the default <code>blosc</code> compressor of Zarr, if not  specified. To explicitly disable compression you must set the <code>compressor</code> to <code>None</code>  (<code>null</code> in JSON).</p> <p>The usage of compressors and filters is best explained in dedicated sections of the  Zarr Tutorial, namely  Compressors and  Filters.</p>"},{"location":"guide/#data-io","title":"Data I/O","text":"<p>This section is a work in progress.</p> <ul> <li> <p><code>dry_run</code></p> </li> <li> <p><code>target_dir</code></p> </li> <li><code>target_storage_options</code></li> <li> <p><code>zarr_version</code></p> </li> <li> <p><code>slice_engine</code></p> </li> <li><code>slice_storage_options</code></li> <li> <p><code>persist_mem_slices</code></p> </li> <li> <p>If a slice is not yet available, wait and retry until it </p> <ul> <li>exists, and</li> <li>is complete.</li> </ul> </li> <li>Check for each slice that it is valid. A valid slice<ul> <li>is self-consistent, </li> <li>has the same structure as target, and</li> <li>has an append dimension whose size is equal to the target chunking of   this dimension.</li> </ul> </li> <li>Before appending a slice, lock the target so that another tool invocation    can recognize it, e.g., write a lock file.</li> <li>If the target is locked, either wait until it becomes available or exit    with an error. The behaviour is controlled by a tool option.</li> <li>After successfully appending a slice, remove the lock from the target.</li> <li>Appending a slice shall be an atomic operation to ensure target dataset    integrity. That is, in case a former append step failed, a rollback must   be performed to restore the last valid state of the target. Rolling back   shall take place after an append failed, or before a new slice is appended,   or to sanitize a target to make it usable again. Rolling back shall    include restoring all changed files, removing all added files,    and removing any locks. </li> <li>The tool shall allow for continuing appending slices at the point   it failed.</li> </ul>"},{"location":"guide/#slice-polling","title":"Slice Polling","text":"<p>This section is a work in progress.</p> <ul> <li><code>slice_polling</code></li> </ul>"},{"location":"guide/#transactions","title":"Transactions","text":"<p>This section is a work in progress.</p> <ul> <li><code>temp_dir</code></li> <li><code>temp_storage_options</code></li> <li><code>disable_rollback</code></li> </ul>"},{"location":"guide/#slice-data-types","title":"Slice Data Types","text":"<p>This section is a work in progress.</p>"},{"location":"guide/#logging","title":"Logging","text":"<p>The <code>zappend</code> logging configuration follows exactly the dictionary schema of the  Python module <code>logging.config</code>. The logger used by the <code>zappend</code> tool is named  <code>zappend</code>. Note that you can also configure the logger of other Python modules, e.g., <code>xarray</code> or <code>dask</code> here.</p> <p>Given here is an example that logs <code>zappend</code>'s output to the console using  the INFO level:</p> <pre><code>{\n    \"logging\": {\n        \"version\": 1,\n        \"formatters\": {\n            \"normal\": {\n                \"format\": \"%(asctime)s %(levelname)s %(message)s\",\n                \"style\": \"%\"\n            }\n        },\n        \"handlers\": {\n            \"console\": {\n                \"class\": \"logging.StreamHandler\",\n                \"formatter\": \"normal\"\n            }\n        },\n        \"loggers\": {\n            \"zappend\": {\n                \"level\": \"INFO\",\n                \"handlers\": [\"console\"]\n            }\n        }\n    }\n}\n</code></pre>"},{"location":"requirements/","title":"Requirements","text":"<p>Given here are the original user requirements that have driven the development  of the <code>zappend</code> tool.</p>"},{"location":"requirements/#core-requirements","title":"Core Requirements","text":"<ul> <li>Create a target Zarr dataset by appending Zarr dataset slices along a    given append dimension, usually <code>time</code>.   </li> <li>The tool takes care of modifying the target dataset using the slices,   but doesn't care how the slice datasets are created.</li> <li>Slice datasets may be given as URIs with storage options or as    in-memory datasets of type    xarray.Dataset   or    xcube.core.mldataset.MultiLevelDataset.</li> <li>Target and slices are allowed to live in different filesystems.</li> <li>The tool is configurable. The configuration defines <ul> <li>the append dimension;</li> <li>optional target encoding for all or individual target variables;</li> <li>the target path into the target filesystem;</li> <li>optional target storage options;</li> <li>optional slice storage options.</li> </ul> </li> <li>The target chunking of the append dimension equals the size of the append    dimension in each slice and vice versa. </li> <li>The target encoding should allow for specifying the target storage chunking,    data type, and compression. </li> <li>The target encoding should also allow for packing floating point data into    integer data with fewer bits using scaling factor and offset.</li> <li>Detect coordinate variables and allow them to stay un-chunked.   This is important for coordinate variables containing or corresponding    to the append-dimension.</li> <li>If the target does not exist, it will be created from a copy of the first    slice. This first slice will specify any not-yet-configured properties   of the target dataset, e.g., the append dimension chunking.</li> <li>If the target exists, the slice will be appended. Check if the slice to be    appended is last. If not, refuse to append (alternative: insert but this is    probably difficult or error prone).</li> <li>Slices are appended in the order they are provided.</li> <li>If a slice is not yet available, wait and retry until it <ul> <li>exists, and</li> <li>is complete.</li> </ul> </li> <li>Check for each slice that it is valid. A valid slice<ul> <li>is self-consistent, </li> <li>has the same structure as target, and</li> <li>has an append dimension whose size is equal to the target chunking of   this dimension.</li> </ul> </li> <li>Before appending a slice, lock the target so that another tool invocation    can recognize it, e.g., write a lock file.</li> <li>If the target is locked, either wait until it becomes available or exit    with an error. The behaviour is controlled by a tool option.</li> <li>After successfully appending a slice, remove the lock from the target.</li> <li>Appending a slice shall be an atomic operation to ensure target dataset    integrity. That is, in case a former append step failed, a rollback must   be performed to restore the last valid state of the target. Rolling back   shall take place after an append failed, or before a new slice is appended,   or to sanitize a target to make it usable again. Rolling back shall    include restoring all changed files, removing all added files,    and removing any locks. </li> <li>The tool shall allow for continuing appending slices at the point   it failed.</li> <li>The tool shall offer a CLI and a Python API.<ul> <li>Using the CLI, slices are given as a variadic argument that provides the    file paths into the slice filesystem.</li> <li>Using the Python API, it shall be possible to provide the slices by    specifying a function that generates the slice datasets and an   iterable providing the arguments for the function.   This is similar how the Python <code>map()</code> built-in works.</li> </ul> </li> </ul>"},{"location":"requirements/#further-ideas","title":"Further Ideas","text":"<ul> <li>Allow for inserting and deleting slices.</li> <li>Allow specifying a constant delta between coordinates of the append dimension.</li> <li>Verify append dimension coordinates increase or decrease monotonically. </li> <li>Verify coordinate deltas of append dimension to be constant. </li> <li>Integration with xcube:<ul> <li>Add xcube server API: Add endpoint to xcube server that works similar    to the CLI and also uses a similar request parameters.</li> <li>Use it in xcube data stores for the <code>write_data()</code> method, as a parameter    to enforce sequential writing of Zarr datasets as a robust option when a    plain write fails.</li> </ul> </li> </ul>"},{"location":"start/","title":"Getting Started","text":""},{"location":"start/#installation","title":"Installation","text":"<pre><code>pip install zappend\n</code></pre>"},{"location":"start/#using-the-cli","title":"Using the CLI","text":"<p>Get usage help:</p> <pre><code>zappend --help\n</code></pre> <p>Get configuration help: </p> <pre><code>zappend --help-config md\n</code></pre> <p>Process list of local slice paths:</p> <pre><code>zappend --target target.zarr slice-1.nc slice-2.nc slice-3.nc\n</code></pre> <p>Process list of local slice paths with configuration in <code>config.yaml</code>:</p> <pre><code>zappend --config config.yaml slice-1.nc slice-2.nc slice-3.nc\n</code></pre>"},{"location":"start/#using-the-python-api","title":"Using the Python API","text":"<p>Process list of local slice paths:</p> <pre><code>from zappend.api import zappend\n\nzappend([\"slice-1.nc\", \"slice-2.nc\", \"slice-3.nc\"], target_dir=\"target.zarr\")\n</code></pre> <p>Process list of local slice paths with configuration:</p> <pre><code>from zappend.api import zappend\n\nconfig = { \"target_dir\": \"target.zarr\" }\n\nzappend([\"slice-1.nc\", \"slice-2.nc\", \"slice-3.nc\"], config=config)\n</code></pre> <p>Process slice paths in S3 with slice generator and configuration:</p> <pre><code>import numpy as np\nimport xarray as xr\nfrom zappend.api import zappend\n\nconfig = { \"target_dir\": \"target.zarr\" }\n\ndef get_mean_time(slice_ds: xr.Dataset) -&gt; xr.DataArray:\n    time = slice_ds.time\n    t0 = time[0]\n    dt = time[-1] - t0\n    return xr.DataArray(np.array([t0 + dt / 2], \n                                 dtype=slice_ds.time.dtype), \n                        dims=\"time\")\n\ndef get_mean_slice(slice_ds: xr.Dataset) -&gt; xr.Dataset: \n    mean_slice_ds = slice_ds.mean(\"time\")\n    mean_slice_ds = mean_slice_ds.expand_dims(\"time\", axis=0)\n    mean_slice_ds.coords[\"time\"] = get_mean_time(slice_ds)\n    return mean_slice_ds \n\ndef get_slices(slice_paths: list[str]):\n    for slice_path in slice_paths:\n        ds = xr.open_dataset(\"s3://mybucket/eodata/\" + slice_path)\n        yield get_mean_slice(ds) \n\nzappend(get_slices([\"slice-1.nc\", \"slice-2.nc\", \"slice-3.nc\"]),\n        config=config)\n</code></pre>"}]}
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index ff0205cd0e83557bf94c8b403caeb07f09f9b0ab..e232e398121c6c9cede43d5eec6657690f1ef78f 100755
GIT binary patch
delta 13
Ucmb=gXP58h;0Q=qHj%vo02|{3CIA2c

delta 13
Ucmb=gXP58h;K)i@I+48s03445LjV8(

diff --git a/start/index.html b/start/index.html
index dfc0431..1b5b04b 100755
--- a/start/index.html
+++ b/start/index.html
@@ -9,7 +9,7 @@
       
       
       
-        <link rel="prev" href="../intro/">
+        <link rel="prev" href="..">
       
       
         <link rel="next" href="../guide/">
@@ -193,11 +193,11 @@
   
   
     <li class="md-nav__item">
-      <a href="../intro/" class="md-nav__link">
+      <a href=".." class="md-nav__link">
         
   
   <span class="md-ellipsis">
-    Introduction
+    Overview
   </span>
   
 
@@ -463,64 +463,64 @@
 
 <h1 id="getting-started">Getting Started</h1>
 <h2 id="installation">Installation</h2>
-<pre><code class="language-bash">pip install zappend
-</code></pre>
+<div class="highlight"><pre><span></span><code>pip<span class="w"> </span>install<span class="w"> </span>zappend
+</code></pre></div>
 <h2 id="using-the-cli">Using the CLI</h2>
 <p>Get usage help:</p>
-<pre><code class="language-bash">zappend --help
-</code></pre>
+<div class="highlight"><pre><span></span><code>zappend<span class="w"> </span>--help
+</code></pre></div>
 <p>Get <a href="../config/">configuration</a> help: </p>
-<pre><code class="language-bash">zappend --help-config md
-</code></pre>
+<div class="highlight"><pre><span></span><code>zappend<span class="w"> </span>--help-config<span class="w"> </span>md
+</code></pre></div>
 <p>Process list of local slice paths:</p>
-<pre><code class="language-bash">zappend --target target.zarr slice-1.nc slice-2.nc slice-3.nc
-</code></pre>
+<div class="highlight"><pre><span></span><code>zappend<span class="w"> </span>--target<span class="w"> </span>target.zarr<span class="w"> </span>slice-1.nc<span class="w"> </span>slice-2.nc<span class="w"> </span>slice-3.nc
+</code></pre></div>
 <p>Process list of local slice paths with <a href="../config/">configuration</a> in
 <code>config.yaml</code>:</p>
-<pre><code class="language-bash">zappend --config config.yaml slice-1.nc slice-2.nc slice-3.nc
-</code></pre>
+<div class="highlight"><pre><span></span><code>zappend<span class="w"> </span>--config<span class="w"> </span>config.yaml<span class="w"> </span>slice-1.nc<span class="w"> </span>slice-2.nc<span class="w"> </span>slice-3.nc
+</code></pre></div>
 <h2 id="using-the-python-api">Using the Python API</h2>
 <p>Process list of local slice paths:</p>
-<pre><code class="language-python">from zappend.api import zappend
+<div class="highlight"><pre><span></span><code><span class="kn">from</span> <span class="nn">zappend.api</span> <span class="kn">import</span> <span class="n">zappend</span>
 
-zappend([&quot;slice-1.nc&quot;, &quot;slice-2.nc&quot;, &quot;slice-3.nc&quot;], target_dir=&quot;target.zarr&quot;)
-</code></pre>
+<span class="n">zappend</span><span class="p">([</span><span class="s2">&quot;slice-1.nc&quot;</span><span class="p">,</span> <span class="s2">&quot;slice-2.nc&quot;</span><span class="p">,</span> <span class="s2">&quot;slice-3.nc&quot;</span><span class="p">],</span> <span class="n">target_dir</span><span class="o">=</span><span class="s2">&quot;target.zarr&quot;</span><span class="p">)</span>
+</code></pre></div>
 <p>Process list of local slice paths with <a href="../config/">configuration</a>:</p>
-<pre><code class="language-python">from zappend.api import zappend
+<div class="highlight"><pre><span></span><code><span class="kn">from</span> <span class="nn">zappend.api</span> <span class="kn">import</span> <span class="n">zappend</span>
 
-config = { &quot;target_dir&quot;: &quot;target.zarr&quot; }
+<span class="n">config</span> <span class="o">=</span> <span class="p">{</span> <span class="s2">&quot;target_dir&quot;</span><span class="p">:</span> <span class="s2">&quot;target.zarr&quot;</span> <span class="p">}</span>
 
-zappend([&quot;slice-1.nc&quot;, &quot;slice-2.nc&quot;, &quot;slice-3.nc&quot;], config=config)
-</code></pre>
+<span class="n">zappend</span><span class="p">([</span><span class="s2">&quot;slice-1.nc&quot;</span><span class="p">,</span> <span class="s2">&quot;slice-2.nc&quot;</span><span class="p">,</span> <span class="s2">&quot;slice-3.nc&quot;</span><span class="p">],</span> <span class="n">config</span><span class="o">=</span><span class="n">config</span><span class="p">)</span>
+</code></pre></div>
 <p>Process slice paths in S3 with slice generator and <a href="../config/">configuration</a>:</p>
-<pre><code class="language-python">import numpy as np
-import xarray as xr
-from zappend.api import zappend
-
-config = { &quot;target_dir&quot;: &quot;target.zarr&quot; }
-
-def get_mean_time(slice_ds: xr.Dataset) -&gt; xr.DataArray:
-    time = slice_ds.time
-    t0 = time[0]
-    dt = time[-1] - t0
-    return xr.DataArray(np.array([t0 + dt / 2], 
-                                 dtype=slice_ds.time.dtype), 
-                        dims=&quot;time&quot;)
-
-def get_mean_slice(slice_ds: xr.Dataset) -&gt; xr.Dataset: 
-    mean_slice_ds = slice_ds.mean(&quot;time&quot;)
-    mean_slice_ds = mean_slice_ds.expand_dims(&quot;time&quot;, axis=0)
-    mean_slice_ds.coords[&quot;time&quot;] = get_mean_time(slice_ds)
-    return mean_slice_ds 
-
-def get_slices(slice_paths: list[str]):
-    for slice_path in slice_paths:
-        ds = xr.open_dataset(&quot;s3://mybucket/eodata/&quot; + slice_path)
-        yield get_mean_slice(ds) 
-
-zappend(get_slices([&quot;slice-1.nc&quot;, &quot;slice-2.nc&quot;, &quot;slice-3.nc&quot;]),
-        config=config)
-</code></pre>
+<div class="highlight"><pre><span></span><code><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
+<span class="kn">import</span> <span class="nn">xarray</span> <span class="k">as</span> <span class="nn">xr</span>
+<span class="kn">from</span> <span class="nn">zappend.api</span> <span class="kn">import</span> <span class="n">zappend</span>
+
+<span class="n">config</span> <span class="o">=</span> <span class="p">{</span> <span class="s2">&quot;target_dir&quot;</span><span class="p">:</span> <span class="s2">&quot;target.zarr&quot;</span> <span class="p">}</span>
+
+<span class="k">def</span> <span class="nf">get_mean_time</span><span class="p">(</span><span class="n">slice_ds</span><span class="p">:</span> <span class="n">xr</span><span class="o">.</span><span class="n">Dataset</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">xr</span><span class="o">.</span><span class="n">DataArray</span><span class="p">:</span>
+    <span class="n">time</span> <span class="o">=</span> <span class="n">slice_ds</span><span class="o">.</span><span class="n">time</span>
+    <span class="n">t0</span> <span class="o">=</span> <span class="n">time</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+    <span class="n">dt</span> <span class="o">=</span> <span class="n">time</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">-</span> <span class="n">t0</span>
+    <span class="k">return</span> <span class="n">xr</span><span class="o">.</span><span class="n">DataArray</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="n">t0</span> <span class="o">+</span> <span class="n">dt</span> <span class="o">/</span> <span class="mi">2</span><span class="p">],</span> 
+                                 <span class="n">dtype</span><span class="o">=</span><span class="n">slice_ds</span><span class="o">.</span><span class="n">time</span><span class="o">.</span><span class="n">dtype</span><span class="p">),</span> 
+                        <span class="n">dims</span><span class="o">=</span><span class="s2">&quot;time&quot;</span><span class="p">)</span>
+
+<span class="k">def</span> <span class="nf">get_mean_slice</span><span class="p">(</span><span class="n">slice_ds</span><span class="p">:</span> <span class="n">xr</span><span class="o">.</span><span class="n">Dataset</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">xr</span><span class="o">.</span><span class="n">Dataset</span><span class="p">:</span> 
+    <span class="n">mean_slice_ds</span> <span class="o">=</span> <span class="n">slice_ds</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="s2">&quot;time&quot;</span><span class="p">)</span>
+    <span class="n">mean_slice_ds</span> <span class="o">=</span> <span class="n">mean_slice_ds</span><span class="o">.</span><span class="n">expand_dims</span><span class="p">(</span><span class="s2">&quot;time&quot;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
+    <span class="n">mean_slice_ds</span><span class="o">.</span><span class="n">coords</span><span class="p">[</span><span class="s2">&quot;time&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">get_mean_time</span><span class="p">(</span><span class="n">slice_ds</span><span class="p">)</span>
+    <span class="k">return</span> <span class="n">mean_slice_ds</span> 
+
+<span class="k">def</span> <span class="nf">get_slices</span><span class="p">(</span><span class="n">slice_paths</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]):</span>
+    <span class="k">for</span> <span class="n">slice_path</span> <span class="ow">in</span> <span class="n">slice_paths</span><span class="p">:</span>
+        <span class="n">ds</span> <span class="o">=</span> <span class="n">xr</span><span class="o">.</span><span class="n">open_dataset</span><span class="p">(</span><span class="s2">&quot;s3://mybucket/eodata/&quot;</span> <span class="o">+</span> <span class="n">slice_path</span><span class="p">)</span>
+        <span class="k">yield</span> <span class="n">get_mean_slice</span><span class="p">(</span><span class="n">ds</span><span class="p">)</span> 
+
+<span class="n">zappend</span><span class="p">(</span><span class="n">get_slices</span><span class="p">([</span><span class="s2">&quot;slice-1.nc&quot;</span><span class="p">,</span> <span class="s2">&quot;slice-2.nc&quot;</span><span class="p">,</span> <span class="s2">&quot;slice-3.nc&quot;</span><span class="p">]),</span>
+        <span class="n">config</span><span class="o">=</span><span class="n">config</span><span class="p">)</span>
+</code></pre></div>