Skip to content

Commit

Permalink
refactor: use monorepo structure
Browse files Browse the repository at this point in the history
  • Loading branch information
alejandrodnm committed Oct 9, 2024
1 parent 57df731 commit 0ade028
Show file tree
Hide file tree
Showing 116 changed files with 4,494 additions and 422 deletions.
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ RUN set -eux; \
ENV PIP_BREAK_SYSTEM_PACKAGES=1
COPY requirements-test.txt /build/requirements-test.txt
RUN pip install -r /build/requirements-test.txt
COPY projects/pgai/requirements.txt /build/requirements-pgai.txt
RUN pip install -r /build/requirements-pgai.txt
RUN rm -r /build

WORKDIR /pgai
WORKDIR /pgai
15 changes: 11 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -82,22 +82,30 @@ test-server:
vectorizer:
@./build.py vectorizer

.PHONY: test-vectorizer
test-vectorizer:
@cd projects/pgai && pytest

.PHONY: test
test:
test: test-vectorizer
@./build.py test

.PHONY: lint-sql
lint-sql:
@./build.py lint-sql

.PHONY: lint-py
lint-py:
lint-py: type-check-py
@./build.py lint-py

.PHONY: lint
lint:
lint: type-check-py
@./build.py lint

.PHONY: type-check-py
type-check-py:
pyright

.PHONY: format-py
format-py:
@./build.py format-py
Expand Down Expand Up @@ -146,4 +154,3 @@ docker-shell:
.PHONY: psql-shell
psql-shell:
@docker exec -it -u postgres pgai /bin/bash -c "set -e; if [ -f .env ]; then set -a; source .env; set +a; fi; psql"

74 changes: 41 additions & 33 deletions build.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/usr/bin/env python3
import os
import platform
import subprocess
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
Expand Down Expand Up @@ -69,19 +69,19 @@ def project_dir() -> Path:


def sql_dir() -> Path:
return project_dir().joinpath("sql").resolve()
return project_extension_dir().joinpath("sql").resolve()


def src_dir() -> Path:
return project_dir().joinpath("src").resolve()
def projects_dir() -> Path:
return project_dir().joinpath("projects").resolve()


def src_extension_dir() -> Path:
return src_dir().joinpath("extension").resolve()
def project_extension_dir() -> Path:
return projects_dir().joinpath("extension").resolve()


def src_vectorizer_dir() -> Path:
return src_dir().joinpath("vectorizer").resolve()
def project_pgai_dir() -> Path:
return projects_dir().joinpath("pgai").resolve()


def incremental_sql_dir() -> Path:
Expand Down Expand Up @@ -135,7 +135,7 @@ def output_sql_file() -> Path:


def tests_dir() -> Path:
return project_dir().joinpath("tests")
return project_dir().joinpath("projects/extension/tests")


def where_am_i() -> str:
Expand Down Expand Up @@ -307,7 +307,7 @@ def python_install_dir() -> Path:
def install_old_py_deps() -> None:
# this is necessary for versions prior to 0.4.0
# we will deprecate these versions and then get rid of this function
old_reqs_file = src_extension_dir().joinpath("old_requirements.txt").resolve()
old_reqs_file = project_extension_dir().joinpath("old_requirements.txt").resolve()
if old_reqs_file.exists():
env = {k: v for k, v in os.environ.items()}
env["PIP_BREAK_SYSTEM_PACKAGES"] = "1"
Expand All @@ -316,7 +316,7 @@ def install_old_py_deps() -> None:
shell=True,
check=True,
env=env,
cwd=str(src_extension_dir()),
cwd=str(project_extension_dir()),
)


Expand Down Expand Up @@ -363,7 +363,7 @@ def build_init_py() -> None:
# function just ensures that you can't screw up the current version. The
# only place you have to update the version when starting a new release is
# in the versions() function.
init_py = src_extension_dir().joinpath("ai", "__init__.py").resolve()
init_py = project_extension_dir().joinpath("ai", "__init__.py").resolve()
content = init_py.read_text()
lines = []
for line in content.splitlines(keepends=True):
Expand All @@ -390,28 +390,28 @@ def install_py() -> None:
): # delete package info if exists
shutil.rmtree(d)
subprocess.run(
f'pip3 install -v --no-deps --compile -t "{version_target_dir}" "{src_extension_dir()}"',
f'pip3 install -v --no-deps --compile -t "{version_target_dir}" "{project_extension_dir()}"',
check=True,
shell=True,
env=os.environ,
cwd=str(src_extension_dir()),
cwd=str(project_extension_dir()),
)
else:
version_target_dir.mkdir(exist_ok=True)
subprocess.run(
f'pip3 install -v --compile -t "{version_target_dir}" "{src_extension_dir()}"',
f'pip3 install -v --compile -t "{version_target_dir}" "{project_extension_dir()}"',
check=True,
shell=True,
env=os.environ,
cwd=str(src_extension_dir()),
cwd=str(project_extension_dir()),
)


def clean_py() -> None:
d = src_extension_dir().joinpath("build")
d = project_extension_dir().joinpath("build")
if d.exists():
shutil.rmtree(d, ignore_errors=True)
d = src_extension_dir().joinpath("pgai.egg-info")
d = project_extension_dir().joinpath("pgai.egg-info")
if d.exists():
shutil.rmtree(d, ignore_errors=True)

Expand All @@ -420,13 +420,13 @@ def uninstall_py() -> None:
shutil.rmtree(python_install_dir(), ignore_errors=True)


def build_vectorizer_init_py() -> None:
# vectorizer/__init__.py is checked in to version control. So, all the previous
def build_pgai_init_py() -> None:
# pgai/__init__.py is checked in to version control. So, all the previous
# versions will have the file with the correct version already in it. This
# function just ensures that you can't screw up the current version. The
# only place you have to update the version when starting a new release is
# in the versions() function.
init_py = src_vectorizer_dir().joinpath("vectorizer", "__init__.py").resolve()
init_py = project_pgai_dir().joinpath("pgai", "__init__.py").resolve()
content = init_py.read_text()
lines = []
for line in content.splitlines(keepends=True):
Expand All @@ -438,32 +438,32 @@ def build_vectorizer_init_py() -> None:


def install_vectorizer() -> None:
build_vectorizer_init_py()
build_pgai_init_py()
subprocess.run(
f'pip3 install -v --compile "{src_vectorizer_dir()}"',
f'pip3 install -v --compile "{project_pgai_dir()}"',
check=True,
shell=True,
env=os.environ,
cwd=str(src_vectorizer_dir()),
cwd=str(project_pgai_dir()),
)


def clean_vectorizer() -> None:
d = src_vectorizer_dir().joinpath("build")
d = project_pgai_dir().joinpath("build")
if d.exists():
shutil.rmtree(d, ignore_errors=True)
d = src_vectorizer_dir().joinpath("vectorizer.egg-info")
d = project_pgai_dir().joinpath("vectorizer.egg-info")
if d.exists():
shutil.rmtree(d, ignore_errors=True)


def uninstall_vectorizer() -> None:
subprocess.run(
f'pip3 uninstall -v -y vectorizer',
"pip3 uninstall -v -y vectorizer",
check=True,
shell=True,
env=os.environ,
cwd=str(src_vectorizer_dir()),
cwd=str(project_pgai_dir()),
)


Expand Down Expand Up @@ -497,7 +497,7 @@ def clean() -> None:

def test_server() -> None:
if where_am_i() == "host":
cmd = "docker exec -it -w /pgai/tests/vectorizer pgai fastapi dev server.py"
cmd = "docker exec -it -w /projects/extension/tests/vectorizer pgai fastapi dev server.py"
subprocess.run(cmd, shell=True, check=True, env=os.environ, cwd=project_dir())
else:
cmd = "fastapi dev server.py"
Expand Down Expand Up @@ -542,7 +542,9 @@ def lint_sql() -> None:


def lint_py() -> None:
subprocess.run(f"ruff check {src_dir()}", shell=True, check=True, env=os.environ)
subprocess.run(
f"ruff check {projects_dir()}", shell=True, check=True, env=os.environ
)


def lint() -> None:
Expand All @@ -552,12 +554,12 @@ def lint() -> None:

def format_py() -> None:
subprocess.run(
f"ruff format --diff {src_dir()}", shell=True, check=True, env=os.environ
f"ruff format --diff {projects_dir()}", shell=True, check=True, env=os.environ
)


def docker_build() -> None:
if platform.machine().lower() in {'i386', 'i686', 'x86_64'}:
if platform.machine().lower() in {"i386", "i686", "x86_64"}:
rust_flags = "--build-arg RUSTFLAGS='-C target-feature=+avx2,+fma'"
else:
rust_flags = ""
Expand All @@ -578,15 +580,21 @@ def docker_build_vectorizer() -> None:
check=True,
env=os.environ,
text=True,
cwd=src_vectorizer_dir(),
cwd=project_pgai_dir(),
)


def docker_run() -> None:
# Set TESTCONTAINERS_HOST_OVERRIDE when running on MacOS.
env_var = ""
if platform.system() == "Darwin":
env_var = "-e TESTCONTAINERS_HOST_OVERRIDE=host.docker.internal"
cmd = " ".join(
[
"docker run -d --name pgai -p 127.0.0.1:5432:5432 -e POSTGRES_HOST_AUTH_METHOD=trust",
"-v /var/run/docker.sock:/var/run/docker.sock",
f"--mount type=bind,src={project_dir()},dst=/pgai",
env_var, # Include the environment variable if on macOS
"pgai",
"-c shared_preload_libraries='timescaledb, pgextwlist'",
"-c extwlist.extensions='ai,vector'",
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
6 changes: 3 additions & 3 deletions tests/conftest.py → projects/extension/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,16 @@ def create_test_db(cur: psycopg.Cursor) -> None:
@pytest.fixture(scope="session", autouse=True)
def set_up_test_db() -> None:
# create a test user and test database owned by the test user
with psycopg.connect(f"postgres://[email protected]:5432/postgres", autocommit=True) as con:
with psycopg.connect("postgres://[email protected]:5432/postgres", autocommit=True) as con:
with con.cursor() as cur:
create_test_user(cur)
create_test_db(cur)
# grant some things to the test user in the test database
with psycopg.connect(f"postgres://[email protected]:5432/test", autocommit=True) as con:
with psycopg.connect("postgres://[email protected]:5432/test", autocommit=True) as con:
with con.cursor() as cur:
cur.execute("grant execute on function pg_read_binary_file(text) to test")
cur.execute("grant pg_read_server_files to test")
# use the test user to create the extension in the test database
with psycopg.connect(f"postgres://[email protected]:5432/test") as con:
with psycopg.connect("postgres://[email protected]:5432/test") as con:
with con.cursor() as cur:
cur.execute("create extension ai cascade")
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def dump_db() -> None:
def restore_db() -> None:
with psycopg.connect(db_url(user=USER, dbname="dst")) as con:
with con.cursor() as cur:
cur.execute(f"create extension ai cascade")
cur.execute("create extension ai cascade")
cmd = " ".join([
"psql",
f'''-d "{db_url(USER, "dst")}"''',
Expand Down Expand Up @@ -145,4 +145,3 @@ def test_dump_restore():
assert dst == src
after_dst() # make sure we can USE the restored db
assert count_vectorizers() == 2

File renamed without changes
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import json
import os
import subprocess
import json

import psycopg
from psycopg.rows import namedtuple_row
import pytest
from psycopg.rows import namedtuple_row

# skip tests in this module if disabled
enable_vectorizer_tests = os.getenv("ENABLE_VECTORIZER_TESTS")
Expand Down Expand Up @@ -249,7 +249,7 @@ def test_vectorizer_timescaledb():
assert actual == 3

# bob should have select on the source table
cur.execute(f"select has_table_privilege('bob', 'website.blog', 'select')")
cur.execute("select has_table_privilege('bob', 'website.blog', 'select')")
actual = cur.fetchone()[0]
assert actual

Expand Down Expand Up @@ -375,7 +375,7 @@ def test_vectorizer_timescaledb():
cur2.execute("begin transaction")
# lock 1 row from the queue
cur2.execute(f"select * from {vec.queue_schema}.{vec.queue_table} where title = 'how to grill a steak' for update")
locked = cur2.fetchone()
cur2.fetchone()
# check that vectorizer queue depth still gets the correct count
cur.execute("select ai.vectorizer_queue_pending(%s)", (vectorizer_id,))
actual = cur.fetchone()[0]
Expand Down Expand Up @@ -528,7 +528,7 @@ def test_drop_vectorizer():
assert actual == 0

# does the func that backed the trigger exist? (it should not)
cur.execute(f"""
cur.execute("""
select count(*)
from pg_proc
where oid = %s
Expand All @@ -537,7 +537,7 @@ def test_drop_vectorizer():
assert actual == 0

# does the timescaledb job exist? (it should not)
cur.execute(f"""
cur.execute("""
select count(*)
from timescaledb_information.jobs
where job_id = %s
Expand Down Expand Up @@ -625,7 +625,7 @@ def index_creation_tester(cur: psycopg.Cursor, vectorizer_id: int) -> None:
cur.execute(f"insert into {vectorizer.queue_schema}.{vectorizer.queue_table}(id) select generate_series(1, 5)")

# should NOT create index
cur.execute(f"""
cur.execute("""
select ai._vectorizer_should_create_vector_index(v)
from ai.vectorizer v
where v.id = %s
Expand All @@ -650,7 +650,7 @@ def index_creation_tester(cur: psycopg.Cursor, vectorizer_id: int) -> None:
cur.execute(f"delete from {vectorizer.queue_schema}.{vectorizer.queue_table}")

# SHOULD create index
cur.execute(f"""
cur.execute("""
select ai._vectorizer_should_create_vector_index(v)
from ai.vectorizer v
where v.id = %s
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from .cli import run


if __name__ == "__main__":
run()
Loading

0 comments on commit 0ade028

Please sign in to comment.