diff --git a/mindsdb_sdk/agents.py b/mindsdb_sdk/agents.py index 73161b2..682b96c 100644 --- a/mindsdb_sdk/agents.py +++ b/mindsdb_sdk/agents.py @@ -8,6 +8,7 @@ from mindsdb_sdk.knowledge_bases import KnowledgeBase from mindsdb_sdk.models import Model from mindsdb_sdk.skills import Skill +from mindsdb_sdk.utils.mind import S3Config from mindsdb_sdk.utils.objects_collection import CollectionBase _DEFAULT_LLM_MODEL = 'gpt-4o' @@ -333,6 +334,17 @@ def add_file(self, name: str, file_path: str, description: str, knowledge_base: """ self.add_files(name, [file_path], description, knowledge_base) + def add_s3_config(self, name: str, s3_config: S3Config, knowledge_base: str = None ): + #TODO: Validate s3 Config + agent = self.get(name) + if knowledge_base is not None: + kb = self.knowledge_bases.get(knowledge_base) + else: + kb_name = f'{name.lower()}_web_{uuid4().hex}_kb' + kb = self._create_default_knowledge_base(agent, kb_name) + + kb.insert_s3_config(s3_config) + def add_webpages( self, name: str, diff --git a/mindsdb_sdk/connectors/rest_api.py b/mindsdb_sdk/connectors/rest_api.py index 7d99465..cd9d58c 100644 --- a/mindsdb_sdk/connectors/rest_api.py +++ b/mindsdb_sdk/connectors/rest_api.py @@ -10,6 +10,8 @@ from mindsdb_sdk import __about__ from sseclient import SSEClient +from mindsdb_sdk.utils.mind import S3Config + def _try_relogin(fnc): @wraps(fnc) @@ -428,3 +430,17 @@ def insert_webpages_into_knowledge_base(self, project: str, knowledge_base_name: _raise_for_status(r) return r.json() + + @_try_relogin + def insert_s3_config_into_knowledge_base(self, project: str, knowledge_base_name: str, s3_config: S3Config): + data = { + 's3': [dict(s3_config)] + } + r = self.session.put( + self.url + f'/api/projects/{project}/knowledge_bases/{knowledge_base_name}', + json={ + 'knowledge_base': data + } + ) + _raise_for_status(r) + return r.json() diff --git a/mindsdb_sdk/knowledge_bases.py b/mindsdb_sdk/knowledge_bases.py index 3919046..34d581a5 100644 --- a/mindsdb_sdk/knowledge_bases.py +++ b/mindsdb_sdk/knowledge_bases.py @@ -15,6 +15,7 @@ from .tables import Table from .query import Query from .databases import Database +from .utils.mind import S3Config class KnowledgeBase(Query): @@ -134,6 +135,9 @@ def insert_webpages(self, urls: List[str], crawl_depth: int = 1, filters: List[s """ self.api.insert_webpages_into_knowledge_base(self.project.name, self.name, urls, crawl_depth=crawl_depth, filters=filters) + def insert_s3_config(self, s3_config: S3Config): + self.api.insert_s3_config_into_knowledge_base(self.project.name, self.name, s3_config) + def insert(self, data: Union[pd.DataFrame, Query, dict]): """ Insert data to knowledge base diff --git a/mindsdb_sdk/utils/mind.py b/mindsdb_sdk/utils/mind.py index 478eaaa..a345d1a 100644 --- a/mindsdb_sdk/utils/mind.py +++ b/mindsdb_sdk/utils/mind.py @@ -1,5 +1,7 @@ +from datetime import datetime + from pydantic import BaseModel, Field -from typing import List, Optional +from typing import List, Optional, Union from uuid import uuid4 import requests @@ -53,6 +55,26 @@ class FileConfig(DataSourceConfig): # TODO: Configure Vector storage. Use defaults for now. +class S3Config(DataSourceConfig): + """ + A configuration in order to find and filter a collection of files in an AWS Bucket. + + :param buckets: A list of bucket names or regexes to match bucket names to. + :param files: A list of filenames or regexes to match filenames to. + :param aws_access_key_id: Access Key for AWS, defaults to boto defaults (e.g. viewing .aws/credentials). + :param aws_secret_access_key: Secret Key for AWS, defaults to boto defaults (e.g. viewing .aws/credentials). + :param region_name: Region for AWS, defaults to boto defaults (e.g. viewing .aws/credentials). + :param aws_session_token: AWS session token for temporary credentials. + :param update_from_last: Optional datetime to filter files by their last modified date; only files modified after this date will be considered. + """ + + buckets: List[str] = [] + files: List[str] = [] + aws_access_key_id: Union[str, None] = None + aws_secret_access_key: Union[str, None] = None + region_name: Union[str, None] = None + aws_session_token: Union[str, None] = None + update_from_last: Union[datetime, None] = None class WebConfig(DataSourceConfig): """