Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding S3Config / S3 Knowledge Base connector #160

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions mindsdb_sdk/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from mindsdb_sdk.knowledge_bases import KnowledgeBase
from mindsdb_sdk.models import Model
from mindsdb_sdk.skills import Skill
from mindsdb_sdk.utils.mind import S3Config
from mindsdb_sdk.utils.objects_collection import CollectionBase

_DEFAULT_LLM_MODEL = 'gpt-4o'
Expand Down Expand Up @@ -333,6 +334,17 @@ def add_file(self, name: str, file_path: str, description: str, knowledge_base:
"""
self.add_files(name, [file_path], description, knowledge_base)

def add_s3_config(self, name: str, s3_config: S3Config, knowledge_base: str = None ):
#TODO: Validate s3 Config
agent = self.get(name)
if knowledge_base is not None:
kb = self.knowledge_bases.get(knowledge_base)
else:
kb_name = f'{name.lower()}_web_{uuid4().hex}_kb'
kb = self._create_default_knowledge_base(agent, kb_name)

kb.insert_s3_config(s3_config)

def add_webpages(
self,
name: str,
Expand Down
16 changes: 16 additions & 0 deletions mindsdb_sdk/connectors/rest_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from mindsdb_sdk import __about__
from sseclient import SSEClient

from mindsdb_sdk.utils.mind import S3Config


def _try_relogin(fnc):
@wraps(fnc)
Expand Down Expand Up @@ -428,3 +430,17 @@ def insert_webpages_into_knowledge_base(self, project: str, knowledge_base_name:
_raise_for_status(r)

return r.json()

@_try_relogin
def insert_s3_config_into_knowledge_base(self, project: str, knowledge_base_name: str, s3_config: S3Config):
data = {
's3': [dict(s3_config)]
}
r = self.session.put(
self.url + f'/api/projects/{project}/knowledge_bases/{knowledge_base_name}',
json={
'knowledge_base': data
}
)
_raise_for_status(r)
return r.json()
4 changes: 4 additions & 0 deletions mindsdb_sdk/knowledge_bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from .tables import Table
from .query import Query
from .databases import Database
from .utils.mind import S3Config


class KnowledgeBase(Query):
Expand Down Expand Up @@ -134,6 +135,9 @@ def insert_webpages(self, urls: List[str], crawl_depth: int = 1, filters: List[s
"""
self.api.insert_webpages_into_knowledge_base(self.project.name, self.name, urls, crawl_depth=crawl_depth, filters=filters)

def insert_s3_config(self, s3_config: S3Config):
self.api.insert_s3_config_into_knowledge_base(self.project.name, self.name, s3_config)

def insert(self, data: Union[pd.DataFrame, Query, dict]):
"""
Insert data to knowledge base
Expand Down
24 changes: 23 additions & 1 deletion mindsdb_sdk/utils/mind.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from datetime import datetime

from pydantic import BaseModel, Field
from typing import List, Optional
from typing import List, Optional, Union
from uuid import uuid4

import requests
Expand Down Expand Up @@ -53,6 +55,26 @@ class FileConfig(DataSourceConfig):

# TODO: Configure Vector storage. Use defaults for now.

class S3Config(DataSourceConfig):
"""
A configuration in order to find and filter a collection of files in an AWS Bucket.

:param buckets: A list of bucket names or regexes to match bucket names to.
:param files: A list of filenames or regexes to match filenames to.
:param aws_access_key_id: Access Key for AWS, defaults to boto defaults (e.g. viewing .aws/credentials).
:param aws_secret_access_key: Secret Key for AWS, defaults to boto defaults (e.g. viewing .aws/credentials).
:param region_name: Region for AWS, defaults to boto defaults (e.g. viewing .aws/credentials).
:param aws_session_token: AWS session token for temporary credentials.
:param update_from_last: Optional datetime to filter files by their last modified date; only files modified after this date will be considered.
"""

buckets: List[str] = []
files: List[str] = []
aws_access_key_id: Union[str, None] = None
aws_secret_access_key: Union[str, None] = None
region_name: Union[str, None] = None
aws_session_token: Union[str, None] = None
update_from_last: Union[datetime, None] = None

class WebConfig(DataSourceConfig):
"""
Expand Down
Loading