Merge branch 'dev' into vm-updates

code-kern-ai · Nov 6, 2024 · 17e1c97 · 17e1c97
2 parents a96d2a1 + 58a37b1
commit 17e1c97
Show file tree

Hide file tree

Showing 81 changed files with 712 additions and 2,764 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM kernai/refinery-parent-images:v1.18.1-common
+FROM kernai/refinery-parent-images:v1.19.0-common
 
 WORKDIR /app
 

diff --git a/alembic/versions/05bbef1eec3f_new_diff_columns.py b/alembic/versions/05bbef1eec3f_new_diff_columns.py
@@ -0,0 +1,36 @@
+"""New diff columns
+
+Revision ID: 05bbef1eec3f
+Revises: f8c313f63a36
+Create Date: 2024-10-28 07:46:38.865170
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '05bbef1eec3f'
+down_revision = 'f8c313f63a36'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('pipeline_logs', sa.Column('scope_dict_diff_new', sa.JSON(), nullable=True), schema='cognition')
+    op.add_column('pipeline_logs', sa.Column('record_dict_diff_new', sa.JSON(), nullable=True), schema='cognition')
+    op.add_column('message', sa.Column('scope_dict_diff_new', sa.JSON(), nullable=True), schema='cognition')
+
+    # note that migration of field values is done in cognition
+    # after the release another one is needed to remove the old columns
+
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('pipeline_logs', 'record_dict_diff_new', schema='cognition')
+    op.drop_column('pipeline_logs', 'scope_dict_diff_new', schema='cognition')
+    op.drop_column('message', 'scope_dict_diff_new', schema='cognition')
+    # ### end Alembic commands ###
diff --git a/alembic/versions/1118c7327b96_added_order_number_for_strategies.py b/alembic/versions/1118c7327b96_added_order_number_for_strategies.py
@@ -0,0 +1,28 @@
+"""Added order number for strategies
+
+Revision ID: 1118c7327b96
+Revises: 414c990688f3
+Create Date: 2024-10-10 15:15:29.164393
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '1118c7327b96'
+down_revision = '414c990688f3'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('strategy', sa.Column('order', sa.Integer(), nullable=True), schema='cognition')
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('strategy', 'order', schema='cognition')
+    # ### end Alembic commands ###
diff --git a/alembic/versions/11675e102ac4_add_file_caching.py b/alembic/versions/11675e102ac4_add_file_caching.py
@@ -0,0 +1,100 @@
+"""add file caching
+
+Revision ID: 11675e102ac4
+Revises: 1118c7327b96
+Create Date: 2024-10-09 15:37:46.744638
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = '11675e102ac4'
+down_revision = '1118c7327b96'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('file_reference',
+    sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False),
+    sa.Column('organization_id', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('hash', sa.String(), nullable=True),
+    sa.Column('minio_path', sa.String(), nullable=True),
+    sa.Column('bucket', sa.String(), nullable=True),
+    sa.Column('created_at', sa.DateTime(), nullable=True),
+    sa.Column('created_by', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('file_size_bytes', sa.BigInteger(), nullable=True),
+    sa.Column('content_type', sa.String(), nullable=True),
+    sa.Column('original_file_name', sa.String(), nullable=True),
+    sa.Column('state', sa.String(), nullable=True),
+    sa.Column('meta_data', sa.JSON(), nullable=True),
+    sa.ForeignKeyConstraint(['created_by'], ['user.id'], ondelete='SET NULL'),
+    sa.ForeignKeyConstraint(['organization_id'], ['organization.id'], ondelete='CASCADE'),
+    sa.PrimaryKeyConstraint('id'),
+    sa.UniqueConstraint('organization_id', 'hash', 'file_size_bytes', name='unique_file_reference'),
+    schema='cognition'
+    )
+    op.create_index(op.f('ix_cognition_file_reference_created_by'), 'file_reference', ['created_by'], unique=False, schema='cognition')
+    op.create_index(op.f('ix_cognition_file_reference_hash'), 'file_reference', ['hash'], unique=False, schema='cognition')
+    op.create_index(op.f('ix_cognition_file_reference_organization_id'), 'file_reference', ['organization_id'], unique=False, schema='cognition')
+    op.create_table('file_extraction',
+    sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False),
+    sa.Column('organization_id', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('file_reference_id', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('extraction_key', sa.String(), nullable=True),
+    sa.Column('minio_path', sa.String(), nullable=True),
+    sa.Column('bucket', sa.String(), nullable=True),
+    sa.Column('created_at', sa.DateTime(), nullable=True),
+    sa.Column('created_by', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('state', sa.String(), nullable=True),
+    sa.ForeignKeyConstraint(['created_by'], ['user.id'], ondelete='SET NULL'),
+    sa.ForeignKeyConstraint(['file_reference_id'], ['cognition.file_reference.id'], ondelete='CASCADE'),
+    sa.ForeignKeyConstraint(['organization_id'], ['organization.id'], ondelete='CASCADE'),
+    sa.PrimaryKeyConstraint('id'),
+    sa.UniqueConstraint('organization_id', 'file_reference_id', 'extraction_key', name='unique_file_extraction'),
+    schema='cognition'
+    )
+    op.create_index(op.f('ix_cognition_file_extraction_created_by'), 'file_extraction', ['created_by'], unique=False, schema='cognition')
+    op.create_index(op.f('ix_cognition_file_extraction_file_reference_id'), 'file_extraction', ['file_reference_id'], unique=False, schema='cognition')
+    op.create_index(op.f('ix_cognition_file_extraction_organization_id'), 'file_extraction', ['organization_id'], unique=False, schema='cognition')
+    op.create_table('file_transformation',
+    sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False),
+    sa.Column('organization_id', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('file_extraction_id', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('transformation_key', sa.String(), nullable=True),
+    sa.Column('minio_path', sa.String(), nullable=True),
+    sa.Column('bucket', sa.String(), nullable=True),
+    sa.Column('created_at', sa.DateTime(), nullable=True),
+    sa.Column('created_by', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('state', sa.String(), nullable=True),
+    sa.ForeignKeyConstraint(['created_by'], ['user.id'], ondelete='SET NULL'),
+    sa.ForeignKeyConstraint(['file_extraction_id'], ['cognition.file_extraction.id'], ondelete='CASCADE'),
+    sa.ForeignKeyConstraint(['organization_id'], ['organization.id'], ondelete='CASCADE'),
+    sa.PrimaryKeyConstraint('id'),
+    sa.UniqueConstraint('organization_id', 'file_extraction_id', 'transformation_key', name='unique_file_transformation'),
+    schema='cognition'
+    )
+    op.create_index(op.f('ix_cognition_file_transformation_created_by'), 'file_transformation', ['created_by'], unique=False, schema='cognition')
+    op.create_index(op.f('ix_cognition_file_transformation_file_extraction_id'), 'file_transformation', ['file_extraction_id'], unique=False, schema='cognition')
+    op.create_index(op.f('ix_cognition_file_transformation_organization_id'), 'file_transformation', ['organization_id'], unique=False, schema='cognition')
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index(op.f('ix_cognition_file_transformation_organization_id'), table_name='file_transformation', schema='cognition')
+    op.drop_index(op.f('ix_cognition_file_transformation_file_extraction_id'), table_name='file_transformation', schema='cognition')
+    op.drop_index(op.f('ix_cognition_file_transformation_created_by'), table_name='file_transformation', schema='cognition')
+    op.drop_table('file_transformation', schema='cognition')
+    op.drop_index(op.f('ix_cognition_file_extraction_organization_id'), table_name='file_extraction', schema='cognition')
+    op.drop_index(op.f('ix_cognition_file_extraction_file_reference_id'), table_name='file_extraction', schema='cognition')
+    op.drop_index(op.f('ix_cognition_file_extraction_created_by'), table_name='file_extraction', schema='cognition')
+    op.drop_table('file_extraction', schema='cognition')
+    op.drop_index(op.f('ix_cognition_file_reference_organization_id'), table_name='file_reference', schema='cognition')
+    op.drop_index(op.f('ix_cognition_file_reference_hash'), table_name='file_reference', schema='cognition')
+    op.drop_index(op.f('ix_cognition_file_reference_created_by'), table_name='file_reference', schema='cognition')
+    op.drop_table('file_reference', schema='cognition')
+    # ### end Alembic commands ###
diff --git a/alembic/versions/414c990688f3_remove_refinery_token_table.py b/alembic/versions/414c990688f3_remove_refinery_token_table.py
@@ -0,0 +1,103 @@
+"""Remove refinery token table
+
+Revision ID: 414c990688f3
+Revises: 3e59ce51739c
+Create Date: 2024-09-09 09:25:36.796509
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = '414c990688f3'
+down_revision = '3e59ce51739c'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    # generated code
+    op.drop_index('ix_personal_access_token_project_id', table_name='personal_access_token')
+    op.drop_index('ix_personal_access_token_user_id', table_name='personal_access_token')
+    op.drop_table('personal_access_token')
+
+    op.drop_index('ix_cognition_project_refinery_question_project_id', table_name='project', schema='cognition')
+    op.drop_index('ix_cognition_project_refinery_references_project_id', table_name='project', schema='cognition')
+    op.drop_index('ix_cognition_project_refinery_relevance_project_id', table_name='project', schema='cognition')
+    op.drop_constraint('project_refinery_references_project_id_fkey', 'project', schema='cognition', type_='foreignkey')
+    op.drop_constraint('project_refinery_question_project_id_fkey', 'project', schema='cognition', type_='foreignkey')
+    op.drop_constraint('project_refinery_relevance_project_id_fkey', 'project', schema='cognition', type_='foreignkey')
+    op.drop_column('project', 'refinery_references_project_id', schema='cognition')
+    op.drop_column('project', 'refinery_synchronization_interval_option', schema='cognition')
+    op.drop_column('project', 'refinery_question_project_id', schema='cognition')
+    op.drop_column('project', 'refinery_relevance_project_id', schema='cognition')
+    op.drop_column('project', 'execute_query_enrichment_if_source_code', schema='cognition')
+
+    op.drop_index('ix_cognition_refinery_synchronization_task_cognition_project_id', table_name='refinery_synchronization_task', schema='cognition')
+    op.drop_index('ix_cognition_refinery_synchronization_task_created_by', table_name='refinery_synchronization_task', schema='cognition')
+    op.drop_index('ix_cognition_refinery_synchronization_task_refinery_project_id', table_name='refinery_synchronization_task', schema='cognition')
+    op.drop_table('refinery_synchronization_task', schema='cognition')
+
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+
+    # ------------------------ pat remove ------------------------
+    op.create_table('personal_access_token',
+    sa.Column('id', postgresql.UUID(), autoincrement=False, nullable=False),
+    sa.Column('project_id', postgresql.UUID(), autoincrement=False, nullable=True),
+    sa.Column('user_id', postgresql.UUID(), autoincrement=False, nullable=True),
+    sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=True),
+    sa.Column('scope', sa.VARCHAR(), autoincrement=False, nullable=True),
+    sa.Column('created_at', postgresql.TIMESTAMP(), autoincrement=False, nullable=True),
+    sa.Column('expires_at', postgresql.TIMESTAMP(), autoincrement=False, nullable=True),
+    sa.Column('last_used', postgresql.TIMESTAMP(), autoincrement=False, nullable=True),
+    sa.Column('token', sa.VARCHAR(), autoincrement=False, nullable=True),
+    sa.ForeignKeyConstraint(['project_id'], ['project.id'], name='personal_access_token_project_id_fkey', ondelete='CASCADE'),
+    sa.ForeignKeyConstraint(['user_id'], ['user.id'], name='personal_access_token_user_id_fkey', ondelete='CASCADE'),
+    sa.PrimaryKeyConstraint('id', name='personal_access_token_pkey')
+    )
+    op.create_index('ix_personal_access_token_user_id', 'personal_access_token', ['user_id'], unique=False)
+    op.create_index('ix_personal_access_token_project_id', 'personal_access_token', ['project_id'], unique=False)
+
+    # ------------------------ cognition table fields ------------------------
+
+    op.add_column('project', sa.Column('execute_query_enrichment_if_source_code', sa.VARCHAR(), autoincrement=False, nullable=True), schema='cognition')
+    op.add_column('project', sa.Column('refinery_relevance_project_id', postgresql.UUID(), autoincrement=False, nullable=True), schema='cognition')
+    op.add_column('project', sa.Column('refinery_question_project_id', postgresql.UUID(), autoincrement=False, nullable=True), schema='cognition')
+    op.add_column('project', sa.Column('refinery_synchronization_interval_option', sa.VARCHAR(), autoincrement=False, nullable=True), schema='cognition')
+    op.add_column('project', sa.Column('refinery_references_project_id', postgresql.UUID(), autoincrement=False, nullable=True), schema='cognition')
+    op.create_foreign_key('project_refinery_relevance_project_id_fkey', 'project', 'project', ['refinery_relevance_project_id'], ['id'], source_schema='cognition', ondelete='SET NULL')
+    op.create_foreign_key('project_refinery_question_project_id_fkey', 'project', 'project', ['refinery_question_project_id'], ['id'], source_schema='cognition', ondelete='SET NULL')
+    op.create_foreign_key('project_refinery_references_project_id_fkey', 'project', 'project', ['refinery_references_project_id'], ['id'], source_schema='cognition', ondelete='SET NULL')
+    op.create_index('ix_cognition_project_refinery_relevance_project_id', 'project', ['refinery_relevance_project_id'], unique=False, schema='cognition')
+    op.create_index('ix_cognition_project_refinery_references_project_id', 'project', ['refinery_references_project_id'], unique=False, schema='cognition')
+    op.create_index('ix_cognition_project_refinery_question_project_id', 'project', ['refinery_question_project_id'], unique=False, schema='cognition')
+
+    # ------------------------ sync table ------------------------	
+
+    op.create_table('refinery_synchronization_task',
+    sa.Column('id', postgresql.UUID(), autoincrement=False, nullable=False),
+    sa.Column('cognition_project_id', postgresql.UUID(), autoincrement=False, nullable=True),
+    sa.Column('refinery_project_id', postgresql.UUID(), autoincrement=False, nullable=True),
+    sa.Column('created_by', postgresql.UUID(), autoincrement=False, nullable=True),
+    sa.Column('created_at', postgresql.TIMESTAMP(), autoincrement=False, nullable=True),
+    sa.Column('finished_at', postgresql.TIMESTAMP(), autoincrement=False, nullable=True),
+    sa.Column('state', sa.VARCHAR(), autoincrement=False, nullable=True),
+    sa.Column('logs', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True),
+    sa.Column('num_records_created', sa.INTEGER(), autoincrement=False, nullable=True),
+    sa.ForeignKeyConstraint(['cognition_project_id'], ['cognition.project.id'], name='refinery_synchronization_task_cognition_project_id_fkey', ondelete='CASCADE'),
+    sa.ForeignKeyConstraint(['created_by'], ['user.id'], name='refinery_synchronization_task_created_by_fkey', ondelete='CASCADE'),
+    sa.ForeignKeyConstraint(['refinery_project_id'], ['project.id'], name='refinery_synchronization_task_refinery_project_id_fkey', ondelete='CASCADE'),
+    sa.PrimaryKeyConstraint('id', name='refinery_synchronization_task_pkey'),
+    schema='cognition'
+    )
+    op.create_index('ix_cognition_refinery_synchronization_task_refinery_project_id', 'refinery_synchronization_task', ['refinery_project_id'], unique=False, schema='cognition')
+    op.create_index('ix_cognition_refinery_synchronization_task_created_by', 'refinery_synchronization_task', ['created_by'], unique=False, schema='cognition')
+    op.create_index('ix_cognition_refinery_synchronization_task_cognition_project_id', 'refinery_synchronization_task', ['cognition_project_id'], unique=False, schema='cognition')
+
+    # ### end Alembic commands ###
diff --git a/alembic/versions/c626887031f6_add.py b/alembic/versions/c626887031f6_add.py
@@ -0,0 +1,28 @@
+"""add
+
+Revision ID: c626887031f6
+Revises: 11675e102ac4
+Create Date: 2024-10-15 13:53:26.632068
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = 'c626887031f6'
+down_revision = '11675e102ac4'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('file_reference', sa.Column('last_used', sa.DateTime(), nullable=True), schema='cognition')
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('file_reference', 'last_used', schema='cognition')
+    # ### end Alembic commands ###
diff --git a/alembic/versions/f8c313f63a36_rename_llm_logs.py b/alembic/versions/f8c313f63a36_rename_llm_logs.py
@@ -0,0 +1,58 @@
+"""rename llm logs
+
+Revision ID: f8c313f63a36
+Revises: c626887031f6
+Create Date: 2024-10-15 16:01:26.391244
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = 'f8c313f63a36'
+down_revision = 'c626887031f6'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('file_transformation_llm_logs',
+    sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False),
+    sa.Column('file_transformation_id', postgresql.UUID(as_uuid=True), nullable=True),
+    sa.Column('created_at', sa.DateTime(), nullable=True),
+    sa.Column('finished_at', sa.DateTime(), nullable=True),
+    sa.Column('model_used', sa.String(), nullable=True),
+    sa.Column('input', sa.String(), nullable=True),
+    sa.Column('output', sa.String(), nullable=True),
+    sa.Column('error', sa.String(), nullable=True),
+    sa.ForeignKeyConstraint(['file_transformation_id'], ['cognition.file_transformation.id'], ondelete='CASCADE'),
+    sa.PrimaryKeyConstraint('id'),
+    schema='cognition'
+    )
+    op.create_index(op.f('ix_cognition_file_transformation_llm_logs_file_transformation_id'), 'file_transformation_llm_logs', ['file_transformation_id'], unique=False, schema='cognition')
+    op.drop_index('ix_cognition_markdown_llm_logs_markdown_file_id', table_name='markdown_llm_logs', schema='cognition')
+    op.drop_table('markdown_llm_logs', schema='cognition')
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('markdown_llm_logs',
+    sa.Column('id', postgresql.UUID(), autoincrement=False, nullable=False),
+    sa.Column('markdown_file_id', postgresql.UUID(), autoincrement=False, nullable=True),
+    sa.Column('created_at', postgresql.TIMESTAMP(), autoincrement=False, nullable=True),
+    sa.Column('finished_at', postgresql.TIMESTAMP(), autoincrement=False, nullable=True),
+    sa.Column('model_used', sa.VARCHAR(), autoincrement=False, nullable=True),
+    sa.Column('input', sa.VARCHAR(), autoincrement=False, nullable=True),
+    sa.Column('output', sa.VARCHAR(), autoincrement=False, nullable=True),
+    sa.Column('error', sa.VARCHAR(), autoincrement=False, nullable=True),
+    sa.ForeignKeyConstraint(['markdown_file_id'], ['cognition.markdown_file.id'], name='markdown_llm_logs_markdown_file_id_fkey', ondelete='CASCADE'),
+    sa.PrimaryKeyConstraint('id', name='markdown_llm_logs_pkey'),
+    schema='cognition'
+    )
+    op.create_index('ix_cognition_markdown_llm_logs_markdown_file_id', 'markdown_llm_logs', ['markdown_file_id'], unique=False, schema='cognition')
+    op.drop_index(op.f('ix_cognition_file_transformation_llm_logs_file_transformation_id'), table_name='file_transformation_llm_logs', schema='cognition')
+    op.drop_table('file_transformation_llm_logs', schema='cognition')
+    # ### end Alembic commands ###