diff --git a/notebooks/experimental/Profiling.ipynb b/notebooks/experimental/Profiling.ipynb new file mode 100644 index 00000000000..a003d21b00c --- /dev/null +++ b/notebooks/experimental/Profiling.ipynb @@ -0,0 +1,93 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "a810d2e7-86f4-4764-a153-5a65ab704889", + "metadata": {}, + "outputs": [], + "source": [ + "# syft absolute\n", + "import syft as sy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d3b2e87-0488-45bb-9395-d2027815556a", + "metadata": {}, + "outputs": [], + "source": [ + "# Works only for in-memory python workers with uvicorn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a38eedb-849b-4cc3-8077-4a347ab8db34", + "metadata": {}, + "outputs": [], + "source": [ + "canada_datasite = sy.orchestra.launch(\n", + " \"canada-datasite\", port=\"auto\", dev_mode=True, profile=True, profile_interval=0.001\n", + ")\n", + "\n", + "italy_datasite = sy.orchestra.launch(\n", + " \"italy-datasite\", port=\"auto\", dev_mode=True, profile=True, profile_interval=0.001\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "300b2180-d611-465c-8d13-c02fedf8a959", + "metadata": {}, + "outputs": [], + "source": [ + "do_canada_client = canada_datasite.login(\n", + " email=\"info@openmined.org\", password=\"changethis\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abb6481b-22c0-4cce-a8c9-4f0cea63b564", + "metadata": {}, + "outputs": [], + "source": [ + "%%pyinstrument\n", + "do_italy_client = italy_datasite.login(email=\"info@openmined.org\", password=\"changethis\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c030558-0a16-4194-9397-0261ecb40b9b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/packages/syft/setup.cfg b/packages/syft/setup.cfg index 9d579446775..aa5379a76e4 100644 --- a/packages/syft/setup.cfg +++ b/packages/syft/setup.cfg @@ -94,6 +94,7 @@ dev = %(test_plugins)s %(telemetry)s bandit==1.7.8 + pyinstrument==4.6.2 debugpy==1.8.2 importlib-metadata==7.1.0 isort==5.13.2 diff --git a/packages/syft/src/syft/orchestra.py b/packages/syft/src/syft/orchestra.py index ebe74b85d31..d6556ce5b30 100644 --- a/packages/syft/src/syft/orchestra.py +++ b/packages/syft/src/syft/orchestra.py @@ -186,6 +186,9 @@ def deploy_to_python( background_tasks: bool = False, debug: bool = False, migrate: bool = False, + profile: bool = False, + profile_interval: float = 0.001, + profile_dir: str | None = None, ) -> ServerHandle: worker_classes = { ServerType.DATASITE: Datasite, @@ -215,6 +218,9 @@ def deploy_to_python( "background_tasks": background_tasks, "debug": debug, "migrate": migrate, + "profile": profile, + "profile_interval": profile_interval, + "profile_dir": profile_dir, } if port: @@ -325,6 +331,10 @@ def launch( background_tasks: bool = False, debug: bool = False, migrate: bool = False, + # Profiling Related Input for in-memory fastapi server + profile: bool = False, + profile_interval: float = 0.001, + profile_dir: str | None = None, ) -> ServerHandle: if dev_mode is True: thread_workers = True @@ -363,6 +373,9 @@ def launch( background_tasks=background_tasks, debug=debug, migrate=migrate, + profile=profile, + profile_interval=profile_interval, + profile_dir=profile_dir, ) display( SyftInfo( diff --git a/packages/syft/src/syft/server/routes.py b/packages/syft/src/syft/server/routes.py index e4d6906ae7f..92238d3e316 100644 --- a/packages/syft/src/syft/server/routes.py +++ b/packages/syft/src/syft/server/routes.py @@ -2,7 +2,10 @@ import base64 import binascii from collections.abc import AsyncGenerator +from collections.abc import Callable +from datetime import datetime import logging +from pathlib import Path from typing import Annotated # third party @@ -34,12 +37,13 @@ from ..util.telemetry import TRACE_MODE from .credentials import SyftVerifyKey from .credentials import UserLoginCredentials +from .uvicorn_settings import UvicornSettings from .worker import Worker logger = logging.getLogger(__name__) -def make_routes(worker: Worker) -> APIRouter: +def make_routes(worker: Worker, settings: UvicornSettings | None = None) -> APIRouter: if TRACE_MODE: # third party try: @@ -49,6 +53,34 @@ def make_routes(worker: Worker) -> APIRouter: except Exception as e: logger.error("Failed to import opentelemetry", exc_info=e) + def _handle_profile( + request: Request, handler_func: Callable, *args: list, **kwargs: dict + ) -> Response: + if not settings: + raise Exception("Server Settings are required to enable profiling") + # third party + from pyinstrument import Profiler # Lazy Load + + profiles_dir = Path(settings.profile_dir or Path.cwd()) / "profiles" + profiles_dir.mkdir(parents=True, exist_ok=True) + + with Profiler( + interval=settings.profile_interval, async_mode="enabled" + ) as profiler: + response = handler_func(*args, **kwargs) + + timestamp = datetime.now().strftime("%d-%m-%Y-%H:%M:%S") + url_path = request.url.path.replace("/api/v2", "").replace("/", "-") + profile_output_path = ( + profiles_dir / f"{settings.name}-{timestamp}{url_path}.html" + ) + profiler.write_html(profile_output_path) + + logger.info( + f"Request to {request.url.path} took {profiler.last_session.duration:.2f} seconds" + ) + return response + router = APIRouter() async def get_body(request: Request) -> bytes: @@ -165,6 +197,13 @@ def syft_new_api( kind=trace.SpanKind.SERVER, ): return handle_syft_new_api(user_verify_key, communication_protocol) + elif settings and settings.profile: + return _handle_profile( + request, + handle_syft_new_api, + user_verify_key, + communication_protocol, + ) else: return handle_syft_new_api(user_verify_key, communication_protocol) @@ -188,6 +227,8 @@ def syft_new_api_call( kind=trace.SpanKind.SERVER, ): return handle_new_api_call(data) + elif settings and settings.profile: + return _handle_profile(request, handle_new_api_call, data) else: return handle_new_api_call(data) @@ -255,6 +296,8 @@ def login( kind=trace.SpanKind.SERVER, ): return handle_login(email, password, worker) + elif settings and settings.profile: + return _handle_profile(request, handle_login, email, password, worker) else: return handle_login(email, password, worker) @@ -269,6 +312,8 @@ def register( kind=trace.SpanKind.SERVER, ): return handle_register(data, worker) + elif settings and settings.profile: + return _handle_profile(request, handle_register, data, worker) else: return handle_register(data, worker) diff --git a/packages/syft/src/syft/server/uvicorn.py b/packages/syft/src/syft/server/uvicorn.py index 953d19a4c2e..be50efff117 100644 --- a/packages/syft/src/syft/server/uvicorn.py +++ b/packages/syft/src/syft/server/uvicorn.py @@ -1,5 +1,6 @@ # stdlib from collections.abc import Callable +from datetime import datetime import logging import multiprocessing import multiprocessing.synchronize @@ -15,8 +16,8 @@ # third party from fastapi import APIRouter from fastapi import FastAPI -from pydantic_settings import BaseSettings -from pydantic_settings import SettingsConfigDict +from fastapi import Request +from fastapi import Response import requests from starlette.middleware.cors import CORSMiddleware import uvicorn @@ -34,6 +35,7 @@ from .server import ServerType from .utils import get_named_server_uid from .utils import remove_temp_dir_for_server +from .uvicorn_settings import UvicornSettings if os_name() == "macOS": # needed on MacOS to prevent [__NSCFConstantString initialize] may have been in @@ -43,26 +45,8 @@ WAIT_TIME_SECONDS = 20 -class AppSettings(BaseSettings): - name: str - server_type: ServerType = ServerType.DATASITE - server_side_type: ServerSideType = ServerSideType.HIGH_SIDE - processes: int = 1 - reset: bool = False - dev_mode: bool = False - enable_warnings: bool = False - in_memory_workers: bool = True - queue_port: int | None = None - create_producer: bool = False - n_consumers: int = 0 - association_request_auto_approval: bool = False - background_tasks: bool = False - - model_config = SettingsConfigDict(env_prefix="SYFT_", env_parse_none_str="None") - - def app_factory() -> FastAPI: - settings = AppSettings() + settings = UvicornSettings() worker_classes = { ServerType.DATASITE: Datasite, @@ -75,21 +59,49 @@ def app_factory() -> FastAPI: ) worker_class = worker_classes[settings.server_type] - kwargs = settings.model_dump() + worker_kwargs = settings.model_dump() + # Remove Profiling inputs + worker_kwargs.pop("profile") + worker_kwargs.pop("profile_interval") + worker_kwargs.pop("profile_dir") if settings.dev_mode: print( f"WARN: private key is based on server name: {settings.name} in dev_mode. " "Don't run this in production." ) - worker = worker_class.named(**kwargs) + worker = worker_class.named(**worker_kwargs) else: - worker = worker_class(**kwargs) + worker = worker_class(**worker_kwargs) app = FastAPI(title=settings.name) - router = make_routes(worker=worker) + router = make_routes(worker=worker, settings=settings) api_router = APIRouter() api_router.include_router(router) app.include_router(api_router, prefix="/api/v2") + + # Register middlewares + _register_middlewares(app, settings) + + return app + + +def _register_middlewares(app: FastAPI, settings: UvicornSettings) -> None: + _register_cors_middleware(app) + + # As currently sync routes are not supported in pyinstrument + # we are not registering the profiler middleware for sync routes + # as currently most of our routes are sync routes in syft (routes.py) + # ex: syft_new_api, syft_new_api_call, login, register + # we should either convert these routes to async or + # wait until pyinstrument supports sync routes + # The reason we cannot our sync routes to async is because + # we have blocking IO operations, like the requests library, like if one route calls to + # itself, it will block the event loop and the server will hang + # if settings.profile: + # _register_profiler(app, settings) + + +def _register_cors_middleware(app: FastAPI) -> None: app.add_middleware( CORSMiddleware, allow_origins=["*"], @@ -97,7 +109,55 @@ def app_factory() -> FastAPI: allow_methods=["*"], allow_headers=["*"], ) - return app + + +def _register_profiler(app: FastAPI, settings: UvicornSettings) -> None: + # third party + from pyinstrument import Profiler + + profiles_dir = ( + Path.cwd() / "profiles" + if settings.profile_dir is None + else Path(settings.profile_dir) / "profiles" + ) + + @app.middleware("http") + async def profile_request( + request: Request, call_next: Callable[[Request], Response] + ) -> Response: + with Profiler( + interval=settings.profile_interval, async_mode="enabled" + ) as profiler: + response = await call_next(request) + + # Profile File Name - Datasite Name - Timestamp - URL Path + timestamp = datetime.now().strftime("%d-%m-%Y-%H:%M:%S") + profiles_dir.mkdir(parents=True, exist_ok=True) + url_path = request.url.path.replace("/api/v2", "").replace("/", "-") + profile_output_path = ( + profiles_dir / f"{settings.name}-{timestamp}{url_path}.html" + ) + + # Write the profile to a HTML file + profiler.write_html(profile_output_path) + + print( + f"Request to {request.url.path} took {profiler.last_session.duration:.2f} seconds" + ) + + return response + + +def _load_pyinstrument_jupyter_extension() -> None: + try: + # third party + from IPython import get_ipython + + ipython = get_ipython() # noqa: F821 + ipython.run_line_magic("load_ext", "pyinstrument") + print("Pyinstrument Jupyter extension loaded") + except Exception as e: + print(f"Error loading pyinstrument jupyter extension: {e}") def attach_debugger() -> None: @@ -152,7 +212,7 @@ def run_uvicorn( attach_debugger() # Set up all kwargs as environment variables so that they can be accessed in the app_factory function. - env_prefix = AppSettings.model_config.get("env_prefix", "") + env_prefix = UvicornSettings.model_config.get("env_prefix", "") for key, value in kwargs.items(): key_with_prefix = f"{env_prefix}{key.upper()}" os.environ[key_with_prefix] = str(value) @@ -198,6 +258,10 @@ def serve_server( association_request_auto_approval: bool = False, background_tasks: bool = False, debug: bool = False, + # Profiling inputs + profile: bool = False, + profile_interval: float = 0.001, + profile_dir: str | None = None, ) -> tuple[Callable, Callable]: starting_uvicorn_event = multiprocessing.Event() @@ -205,6 +269,13 @@ def serve_server( if dev_mode: enable_autoreload() + # Load the Pyinstrument Jupyter extension if profile is enabled. + if profile: + _load_pyinstrument_jupyter_extension() + if profile_dir is None: + profile_dir = str(Path.cwd()) + print("Profiling Output Directory: ", profile_dir) + server_process = multiprocessing.Process( target=run_uvicorn, kwargs={ @@ -225,6 +296,9 @@ def serve_server( "background_tasks": background_tasks, "debug": debug, "starting_uvicorn_event": starting_uvicorn_event, + "profile": profile, + "profile_interval": profile_interval, + "profile_dir": profile_dir, }, ) diff --git a/packages/syft/src/syft/server/uvicorn_settings.py b/packages/syft/src/syft/server/uvicorn_settings.py new file mode 100644 index 00000000000..0936a33dea9 --- /dev/null +++ b/packages/syft/src/syft/server/uvicorn_settings.py @@ -0,0 +1,30 @@ +# third party +from pydantic_settings import BaseSettings +from pydantic_settings import SettingsConfigDict + +# relative +from ..abstract_server import ServerSideType +from ..abstract_server import ServerType + + +class UvicornSettings(BaseSettings): + name: str + server_type: ServerType = ServerType.DATASITE + server_side_type: ServerSideType = ServerSideType.HIGH_SIDE + processes: int = 1 + reset: bool = False + dev_mode: bool = False + enable_warnings: bool = False + in_memory_workers: bool = True + queue_port: int | None = None + create_producer: bool = False + n_consumers: int = 0 + association_request_auto_approval: bool = False + background_tasks: bool = False + + # Profiling inputs + profile: bool = False + profile_interval: float = 0.001 + profile_dir: str | None = None + + model_config = SettingsConfigDict(env_prefix="SYFT_", env_parse_none_str="None")