Skip to content

Commit

Permalink
chore: update python dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
roeap committed Aug 5, 2024
1 parent f924f9c commit 4ddfdcf
Show file tree
Hide file tree
Showing 6 changed files with 782 additions and 791 deletions.
24 changes: 6 additions & 18 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,32 +16,20 @@ repos:
- id: commitizen
stages: [commit-msg]

- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: "v0.5.6"
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.6
hooks:
- id: ruff
args: ["--config", "pyproject.toml"]

- repo: https://github.com/psf/black
rev: 24.8.0
hooks:
- id: black
args: ["--config", "pyproject.toml"]
types_or: [python, pyi]
args: [--fix]
- id: ruff-format
types_or: [python, pyi, jupyter]

- repo: https://github.com/pre-commit/mirrors-prettier
rev: v4.0.0-alpha.8
hooks:
- id: prettier

- repo: https://github.com/asottile/pyupgrade
rev: v3.17.0
hooks:
- id: pyupgrade
exclude: setup.py
entry: pyupgrade --py38-plus
types:
- python

- repo: https://github.com/python-poetry/poetry
rev: "1.8.0"
hooks:
Expand Down
18 changes: 10 additions & 8 deletions examples/object_store.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
"\n",
"import numpy as np\n",
"import pyarrow as pa\n",
"import pyarrow.fs as fs\n",
"import pyarrow.dataset as ds\n",
"import pyarrow.fs as fs\n",
"import pyarrow.parquet as pq\n",
"\n",
"from object_store.arrow import ArrowFileSystemHandler\n",
Expand All @@ -25,7 +25,7 @@
"pq.write_table(table.slice(0, 5), \"data/data1.parquet\", filesystem=store)\n",
"pq.write_table(table.slice(5, 10), \"data/data2.parquet\", filesystem=store)\n",
"\n",
"dataset = ds.dataset(\"data\", format=\"parquet\", filesystem=store)\n"
"dataset = ds.dataset(\"data\", format=\"parquet\", filesystem=store)"
]
},
{
Expand All @@ -38,8 +38,8 @@
"\n",
"import numpy as np\n",
"import pyarrow as pa\n",
"import pyarrow.fs as fs\n",
"import pyarrow.dataset as ds\n",
"import pyarrow.fs as fs\n",
"import pyarrow.parquet as pq\n",
"\n",
"from object_store.arrow import ArrowFileSystemHandler\n",
Expand All @@ -66,7 +66,7 @@
"metadata": {},
"outputs": [],
"source": [
"from object_store import ObjectStore, ObjectMeta\n",
"from object_store import ObjectMeta, ObjectStore\n",
"\n",
"# we use an in-memory store for demonstration purposes.\n",
"# data will not be persisted and is not shared across store instances\n",
Expand Down Expand Up @@ -126,7 +126,7 @@
" file_visitor=file_visitor,\n",
")\n",
"\n",
"len(visited_paths)\n"
"len(visited_paths)"
]
},
{
Expand All @@ -137,7 +137,7 @@
"source": [
"partitioning = ds.partitioning(pa.schema([(\"c\", pa.int64())]), flavor=\"hive\")\n",
"dataset_part = ds.dataset(\"/partitioned\", format=\"parquet\", filesystem=store, partitioning=partitioning)\n",
"dataset_part.schema\n"
"dataset_part.schema"
]
},
{
Expand All @@ -159,10 +159,12 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"import pyarrow.fs as pa_fs\n",
"from object_store.arrow import ArrowFileSystemHandler\n",
"\n",
"from object_store import ClientOptions\n",
"import os\n",
"from object_store.arrow import ArrowFileSystemHandler\n",
"\n",
"storage_options = {\n",
" \"account_name\": os.environ[\"AZURE_STORAGE_ACCOUNT_NAME\"],\n",
Expand Down
47 changes: 47 additions & 0 deletions object-store/python/object_store/_internal.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@ class ObjectMeta:
@property
def size(self) -> int:
"""The size in bytes of the object"""

@property
def location(self) -> Path:
"""The full path to the object"""

@property
def last_modified(self) -> int:
"""The last modified time"""
Expand All @@ -28,6 +30,7 @@ class ListResult:
@property
def common_prefixes(self) -> list[Path]:
"""Prefixes that are common (like directories)"""

@property
def objects(self) -> list[ObjectMeta]:
"""Object metadata for the listing"""
Expand All @@ -41,12 +44,15 @@ class ClientOptions:
Default is based on the version of this crate
"""

@property
def default_content_type(self) -> str | None:
"""Set the default CONTENT_TYPE for uploads"""

@property
def proxy_url(self) -> str | None:
"""Set an HTTP proxy to use for requests"""

@property
def allow_http(self) -> bool:
"""Sets what protocol is allowed.
Expand All @@ -55,6 +61,7 @@ class ClientOptions:
* false (default): Only HTTPS ise allowed
* true: HTTP and HTTPS are allowed
"""

@property
def allow_insecure(self) -> bool:
"""Allows connections to invalid SSL certificates
Expand All @@ -69,16 +76,19 @@ class ClientOptions:
introduces significant vulnerabilities, and should only be used
as a last resort or for testing.
"""

@property
def timeout(self) -> int:
"""Set a request timeout (seconds)
The timeout is applied from when the request starts connecting until the
response body has finished
"""

@property
def connect_timeout(self) -> int:
"""Set a timeout (seconds) for only the connect phase of a Client"""

@property
def pool_idle_timeout(self) -> int:
"""Set the pool max idle timeout (seconds)
Expand All @@ -87,17 +97,20 @@ class ClientOptions:
Default is 90 seconds
"""

@property
def pool_max_idle_per_host(self) -> int:
"""Set the maximum number of idle connections per host
Default is no limit"""

@property
def http2_keep_alive_interval(self) -> int:
"""Sets an interval for HTTP2 Ping frames should be sent to keep a connection alive.
Default is disabled
"""

@property
def http2_keep_alive_timeout(self) -> int:
"""Sets a timeout for receiving an acknowledgement of the keep-alive ping.
Expand All @@ -107,6 +120,7 @@ class ClientOptions:
Default is disabled
"""

@property
def http2_keep_alive_while_idle(self) -> bool:
"""Enable HTTP2 keep alive pings for idle connections
Expand All @@ -116,9 +130,11 @@ class ClientOptions:
Default is disabled
"""

@property
def http1_only(self) -> bool:
"""Only use http1 connections"""

@property
def http2_only(self) -> bool:
"""Only use http2 connections"""
Expand All @@ -131,32 +147,42 @@ class ObjectStore:
) -> None: ...
def get(self, location: Path) -> bytes:
"""Return the bytes that are stored at the specified location."""

async def get_async(self, location: Path) -> bytes:
"""Return the bytes that are stored at the specified location."""

def get_range(self, location: Path, start: int, length: int) -> bytes:
"""Return the bytes that are stored at the specified location in the given byte range."""

async def get_range_async(self, location: Path, start: int, length: int) -> bytes:
"""Return the bytes that are stored at the specified location in the given byte range."""

def put(self, location: Path, bytes: bytes) -> None:
"""Save the provided bytes to the specified location."""

async def put_async(self, location: Path, bytes: bytes) -> None:
"""Save the provided bytes to the specified location."""

def list(self, prefix: Path | None) -> list[ObjectMeta]:
"""List all the objects with the given prefix.
Prefixes are evaluated on a path segment basis, i.e. `foo/bar/` is a prefix
of `foo/bar/x` but not of `foo/bar_baz/x`.
"""

async def list_async(self, prefix: Path | None) -> list[ObjectMeta]:
"""List all the objects with the given prefix.
Prefixes are evaluated on a path segment basis, i.e. `foo/bar/` is a prefix
of `foo/bar/x` but not of `foo/bar_baz/x`.
"""

def head(self, location: Path) -> ObjectMeta:
"""Return the metadata for the specified location"""

async def head_async(self, location: Path) -> ObjectMeta:
"""Return the metadata for the specified location"""

def list_with_delimiter(self, prefix: Path | None) -> ListResult:
"""List objects with the given prefix and an implementation specific
delimiter. Returns common prefixes (directories) in addition to object
Expand All @@ -165,6 +191,7 @@ class ObjectStore:
Prefixes are evaluated on a path segment basis, i.e. `foo/bar/` is a prefix
of `foo/bar/x` but not of `foo/bar_baz/x`.
"""

async def list_with_delimiter_async(self, prefix: Path | None) -> ListResult:
"""List objects with the given prefix and an implementation specific
delimiter. Returns common prefixes (directories) in addition to object
Expand All @@ -173,30 +200,37 @@ class ObjectStore:
Prefixes are evaluated on a path segment basis, i.e. `foo/bar/` is a prefix
of `foo/bar/x` but not of `foo/bar_baz/x`.
"""

def delete(self, location: Path) -> None:
"""Delete the object at the specified location."""

async def delete_async(self, location: Path) -> None:
"""Delete the object at the specified location."""

def copy(self, src: Path, dst: Path) -> None:
"""Copy an object from one path to another in the same object store.
If there exists an object at the destination, it will be overwritten.
"""

async def copy_async(self, src: Path, dst: Path) -> None:
"""Copy an object from one path to another in the same object store.
If there exists an object at the destination, it will be overwritten.
"""

def copy_if_not_exists(self, src: Path, dst: Path) -> None:
"""Copy an object from one path to another, only if destination is empty.
Will return an error if the destination already has an object.
"""

async def copy_if_not_exists_async(self, src: Path, dst: Path) -> None:
"""Copy an object from one path to another, only if destination is empty.
Will return an error if the destination already has an object.
"""

def rename(self, src: Path, dst: Path) -> None:
"""Move an object from one path to another in the same object store.
Expand All @@ -205,6 +239,7 @@ class ObjectStore:
If there exists an object at the destination, it will be overwritten.
"""

async def rename_async(self, src: Path, dst: Path) -> None:
"""Move an object from one path to another in the same object store.
Expand All @@ -213,11 +248,13 @@ class ObjectStore:
If there exists an object at the destination, it will be overwritten.
"""

def rename_if_not_exists(self, src: Path, dst: Path) -> None:
"""Move an object from one path to another in the same object store.
Will return an error if the destination already has an object.
"""

async def rename_if_not_exists_async(self, src: Path, dst: Path) -> None:
"""Move an object from one path to another in the same object store.
Expand Down Expand Up @@ -263,27 +300,33 @@ class ArrowFileSystemHandler:
If the destination exists and is a directory, an error is returned. Otherwise, it is replaced.
"""

def create_dir(self, path: str, *, recursive: bool = True) -> None:
"""Create a directory and subdirectories.
This function succeeds if the directory already exists.
"""

def delete_dir(self, path: str) -> None:
"""Delete a directory and its contents, recursively."""

def delete_file(self, path: str) -> None:
"""Delete a file."""

def equals(self, other) -> bool: ...
def delete_dir_contents(self, path: str, *, accept_root_dir: bool = False, missing_dir_ok: bool = False) -> None:
"""Delete a directory's contents, recursively.
Like delete_dir, but doesn't delete the directory itself.
"""

def get_file_info(self, paths: list[str]) -> list[fs.FileInfo]:
"""Get info for the given files.
A non-existing or unreachable file returns a FileStat object and has a FileType of value NotFound.
An exception indicates a truly exceptional condition (low-level I/O error, etc.).
"""

def get_file_info_selector(
self, base_dir: str, allow_not_found: bool = False, recursive: bool = False
) -> list[fs.FileInfo]:
Expand All @@ -292,16 +335,20 @@ class ArrowFileSystemHandler:
A non-existing or unreachable file returns a FileStat object and has a FileType of value NotFound.
An exception indicates a truly exceptional condition (low-level I/O error, etc.).
"""

def move_file(self, src: str, dest: str) -> None:
"""Move / rename a file or directory.
If the destination exists: - if it is a non-empty directory, an error is returned - otherwise,
if it has the same type as the source, it is replaced - otherwise, behavior is
unspecified (implementation-dependent).
"""

def normalize_path(self, path: str) -> str:
"""Normalize filesystem path."""

def open_input_file(self, path: str) -> ObjectInputFile:
"""Open an input file for random access reading."""

def open_output_stream(self, path: str, metadata: dict[str, str] | None = None) -> ObjectOutputStream:
"""Open an output stream for sequential writing."""
2 changes: 1 addition & 1 deletion object-store/tests/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_file_info(file_systems: tuple[fs.PyFileSystem, fs.SubTreeFileSystem], t
info = store.get_file_info(file_path)
arrow_info = arrow_fs.get_file_info(file_path)

assert type(info) == type(arrow_info)
assert type(info) is type(arrow_info)
assert info.path == arrow_info.path
assert info.type == arrow_info.type
assert info.size == arrow_info.size
Expand Down
Loading

0 comments on commit 4ddfdcf

Please sign in to comment.