From 1966c2ae79f255ebd94c54e72a4bad2df12e6258 Mon Sep 17 00:00:00 2001 From: Samuel Howard Date: Wed, 15 May 2024 19:28:30 -0500 Subject: [PATCH] Add an option to limit the size of query results for efficiency --- README.md | 2 ++ makefile | 4 ++-- pyproject.toml | 2 +- src/multi_bible_search/bible_search_adapter.py | 6 ++++-- src/multi_bible_search/multi_bible_search.c | 9 ++++++--- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index ab40740..395381d 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,8 @@ The results of each query are ranked by the number of keyword occurrences. If a query is made with no matches, say "notawordinthebible," the result of the query will be a list of length 0. +The maximum number of results may be specified with the optional `max_results` parameter. + ### Preloading an Index Versions are automatically loaded as needed, but you may wish to preload a version for the sake of speed. diff --git a/makefile b/makefile index b4f76ec..1097901 100644 --- a/makefile +++ b/makefile @@ -1,8 +1,8 @@ build: py -m build -install: dist/multi_bible_search-2.0.0.tar.gz - pip install --force-reinstall ./dist/multi_bible_search-2.0.0.tar.gz +install: dist/multi_bible_search-2.0.1.tar.gz + pip install --force-reinstall ./dist/multi_bible_search-2.0.1.tar.gz copy venv\\Lib\\site-packages\\multi_bible_search\\*.pyd src\\multi_bible_search\\ full: build install diff --git a/pyproject.toml b/pyproject.toml index 71d6d2d..64571da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "multi_bible_search" -version = "2.0.0" +version = "2.0.1" authors = [ { name="Samuel Howard" }, ] diff --git a/src/multi_bible_search/bible_search_adapter.py b/src/multi_bible_search/bible_search_adapter.py index 85068d0..c313ec7 100644 --- a/src/multi_bible_search/bible_search_adapter.py +++ b/src/multi_bible_search/bible_search_adapter.py @@ -1,6 +1,7 @@ import bz2 import json import os +import sys from typing import List, Union from .multi_bible_search import BibleSearch as cBibleSearch @@ -78,17 +79,18 @@ def unload_version(self, version: str) -> None: else: raise Exception(f"Invalid version {version}") - def search(self, query: str, version="KJV") -> List[str]: + def search(self, query: str, version: str = "KJV", max_results: int = sys.maxsize) -> List[str]: """ Search for a passage in the Bible. :param query: The search query string. :param version: The version to search. + :param max_results: The maximum number of results to retrieve. :return: List of match references (e.g., `["John 11:35", "Matthew 1:7", ...]`). """ # Load the version if it is not already loaded if version not in self.__loaded: self.load(version) - return self.__c_search.search(query, version) + return self.__c_search.search(query, version, max_results) def internal_index_size(self) -> int: """ diff --git a/src/multi_bible_search/multi_bible_search.c b/src/multi_bible_search/multi_bible_search.c index d2280ea..91e2201 100644 --- a/src/multi_bible_search/multi_bible_search.c +++ b/src/multi_bible_search/multi_bible_search.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -264,7 +265,7 @@ static PyObject* rtranslate(long reference) { // Tokenizes a given string based on spaces char **tokenize(const char *input_string, int *num_tokens, int *len_tokens) { // Allocate memory for token array - char **tokens = calloc(strlen(input_string), sizeof(char *)); + char **tokens = calloc(strlen(input_string) + 1, sizeof(char *)); if (tokens == NULL) { // Handle memory allocation failure return NULL; @@ -596,8 +597,10 @@ PyObject *SearchObject_search(SearchObject *self, PyObject *args) { char *query1, // The query string *version, // The version to query **tokens; // The tokenized form of the query + // Maximum number of results to return to Python + Py_ssize_t max_results = PY_SSIZE_T_MAX; - if (!PyArg_ParseTuple(args, "ss", &query1, &version)) { + if (!PyArg_ParseTuple(args, "ss|n", &query1, &version, &max_results)) { PyObject *exception_type = PyExc_RuntimeError; PyObject *exception_value = PyUnicode_FromString("Bad search arguments!\n"); PyObject *exception_traceback = NULL; @@ -716,7 +719,7 @@ PyObject *SearchObject_search(SearchObject *self, PyObject *args) { // Rank the results, storing the length of the deduplicated portion of the array result_count = rank(token_result_list, token_result_list_len, num_tokens); - for (size_t i = 0; i < result_count && i < token_result_list_len; i++) { + for (size_t i = 0; i < result_count && i < token_result_list_len && i < max_results; i++) { // Translate the reference and add it to the Python list str_ref = rtranslate(token_result_list[i]); // Make sure the result isn't None. Basically another double check of the Python side of things.