-
Notifications
You must be signed in to change notification settings - Fork 83
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Reimplement download_all using a queue
- Loading branch information
Showing
9 changed files
with
446 additions
and
80 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,6 +52,7 @@ dev = [ | |
|
||
test = [ | ||
"pytest", | ||
"pytest-httpserver", | ||
"vermin", | ||
] | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
import asyncio | ||
import hashlib | ||
import os | ||
import tempfile | ||
from pathlib import Path | ||
from typing import NamedTuple | ||
|
||
import pytest | ||
from pytest_httpserver import HTTPServer | ||
|
||
from twitchdl.http import TaskError, TaskSuccess, download_all | ||
|
||
MiB = 1024**2 | ||
|
||
|
||
class File(NamedTuple): | ||
data: bytes | ||
hash: str | ||
path: str | ||
|
||
|
||
def generate_test_file(size: int): | ||
data = os.urandom(size) | ||
hash = hashlib.sha256(data).hexdigest() | ||
return File(data, hash, f"/{hash}") | ||
|
||
|
||
def hash_file(path: Path): | ||
hash = hashlib.sha256() | ||
with open(path, "rb") as f: | ||
while True: | ||
chunk = f.read() | ||
if not chunk: | ||
break | ||
hash.update(chunk) | ||
return hash.hexdigest() | ||
|
||
|
||
@pytest.fixture | ||
def temp_dir(): | ||
with tempfile.TemporaryDirectory() as tmp_dir: | ||
yield Path(tmp_dir) | ||
|
||
|
||
def test_success(httpserver: HTTPServer, temp_dir: Path): | ||
count = 10 | ||
workers = 5 | ||
file_size = 1 * MiB | ||
|
||
files = [generate_test_file(file_size) for _ in range(count)] | ||
for f in files: | ||
httpserver.expect_request(f.path).respond_with_data(f.data) # type: ignore | ||
|
||
sources = [httpserver.url_for(f.path) for f in files] | ||
targets = [temp_dir / f.hash for f in files] | ||
|
||
result = asyncio.run(download_all(zip(sources, targets), workers)) | ||
assert result.ok | ||
assert len(result.results) == count | ||
|
||
for index, (file, source, target, result) in enumerate( | ||
zip(files, sources, targets, result.results) | ||
): | ||
assert isinstance(result, TaskSuccess) | ||
assert result.ok | ||
assert not result.existing | ||
assert result.task_id == index | ||
assert result.size == file_size | ||
assert result.url == source | ||
assert result.target == target | ||
|
||
assert target.exists() | ||
assert os.path.getsize(target) == file_size | ||
assert file.hash == hash_file(target) | ||
|
||
|
||
def test_allow_failures(httpserver: HTTPServer, temp_dir: Path): | ||
count = 10 | ||
workers = 5 | ||
file_size = 1 * MiB | ||
failing_index = 5 | ||
|
||
files = [generate_test_file(file_size) for _ in range(count)] | ||
for index, f in enumerate(files): | ||
if index == failing_index: | ||
httpserver.expect_request(f.path).respond_with_data("not found", status=404) # type: ignore | ||
else: | ||
httpserver.expect_request(f.path).respond_with_data(f.data) # type: ignore | ||
|
||
sources = [httpserver.url_for(f.path) for f in files] | ||
targets = [temp_dir / f.hash for f in files] | ||
|
||
result = asyncio.run(download_all(zip(sources, targets), workers)) | ||
results = result.results | ||
assert result.ok | ||
assert len(results) == count | ||
|
||
for index, (file, source, target, result) in enumerate(zip(files, sources, targets, results)): | ||
if index == failing_index: | ||
assert not target.exists() | ||
assert isinstance(result, TaskError) | ||
assert result.task_id == index | ||
assert not result.ok | ||
assert result.url == source | ||
assert result.target == target | ||
else: | ||
assert target.exists() | ||
assert os.path.getsize(target) == file_size | ||
assert file.hash == hash_file(target) | ||
assert isinstance(result, TaskSuccess) | ||
assert result.task_id == index | ||
assert result.size == file_size | ||
assert not result.existing | ||
assert result.ok | ||
assert result.url == source | ||
assert result.target == target | ||
|
||
|
||
def test_dont_allow_failures(httpserver: HTTPServer, temp_dir: Path): | ||
count = 10 | ||
workers = 5 | ||
file_size = 1 * MiB | ||
failing_index = 5 | ||
|
||
files = [generate_test_file(file_size) for _ in range(count)] | ||
for index, f in enumerate(files): | ||
if index == failing_index: | ||
httpserver.expect_request(f.path).respond_with_data("not found", status=404) # type: ignore | ||
else: | ||
httpserver.expect_request(f.path).respond_with_data(f.data) # type: ignore | ||
|
||
sources = [httpserver.url_for(f.path) for f in files] | ||
targets = [temp_dir / f.hash for f in files] | ||
|
||
result = asyncio.run(download_all(zip(sources, targets), workers, allow_failures=False)) | ||
results = result.results | ||
assert not result.ok | ||
assert len(results) == count | ||
|
||
for index, (file, source, target, result) in enumerate(zip(files, sources, targets, results)): | ||
if index == failing_index: | ||
assert not target.exists() | ||
assert isinstance(result, TaskError) | ||
assert result.task_id == index | ||
assert not result.ok | ||
assert result.url == source | ||
assert result.target == target | ||
else: | ||
assert target.exists() | ||
assert os.path.getsize(target) == file_size | ||
assert file.hash == hash_file(target) | ||
assert isinstance(result, TaskSuccess) | ||
assert result.task_id == index | ||
assert result.size == file_size | ||
assert not result.existing | ||
assert result.ok | ||
assert result.url == source | ||
assert result.target == target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -349,7 +349,6 @@ def download( | |
max_workers=max_workers, | ||
cache_dir=cache_dir, | ||
) | ||
|
||
download(list(ids), options) | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.