OPS-6367 enable advanced prompt overwrite for agent

Flaconi · Nov 22, 2024 · 48bf368 · 48bf368
1 parent 95446af
commit 48bf368
Show file tree

Hide file tree

Showing 2 changed files with 308 additions and 0 deletions.
diff --git a/main.tf b/main.tf
@@ -270,6 +270,74 @@ resource "aws_bedrockagent_agent" "this" {
   foundation_model            = var.agent_model_id
   instruction                 = var.agent_instructions
 
+  prompt_override_configuration {
+    prompt_configurations {
+    # KNOWLEDGE_BASE_RESPONSE_GENERATION step
+      base_prompt_template = var.knowledge_base_response_generation_prompt_creation_mode == "OVERRIDDEN" ? var.knowledge_base_response_generation_prompt_template : null
+
+      inference_configuration {
+        max_length     = var.knowledge_base_response_generation_prompt_creation_mode == "OVERRIDDEN" ? var.knowledge_base_response_generation_max_length : null
+        stop_sequences = var.knowledge_base_response_generation_prompt_creation_mode == "OVERRIDDEN" ? var.knowledge_base_response_generation_stop_sequences : null
+        temperature    = var.knowledge_base_response_generation_prompt_creation_mode == "OVERRIDDEN" ? var.knowledge_base_response_generation_temperature : null
+        top_k          = var.knowledge_base_response_generation_prompt_creation_mode == "OVERRIDDEN" ? var.knowledge_base_response_generation_top_k : null
+        top_p          = var.knowledge_base_response_generation_prompt_creation_mode == "OVERRIDDEN" ? var.knowledge_base_response_generation_top_p : null
+      }
+      parser_mode          = var.knowledge_base_response_generation_parser_mode
+      prompt_creation_mode = var.knowledge_base_response_generation_prompt_creation_mode
+      prompt_state         = var.knowledge_base_response_generation_prompt_creation_mode == "OVERRIDDEN" ? var.knowledge_base_response_generation_prompt_state : null
+      prompt_type          = "KNOWLEDGE_BASE_RESPONSE_GENERATION"
+    }
+    # PRE_PROCESSING step
+    prompt_configurations {
+      base_prompt_template = var.pre_processing_prompt_creation_mode == "OVERRIDDEN" ? var.pre_processing_prompt_template : null
+
+      inference_configuration {
+        max_length     = var.pre_processing_prompt_creation_mode == "OVERRIDDEN" ? var.pre_processing_max_length : null
+        stop_sequences = var.pre_processing_prompt_creation_mode == "OVERRIDDEN" ? var.pre_processing_stop_sequences : null
+        temperature    = var.pre_processing_prompt_creation_mode == "OVERRIDDEN" ? var.pre_processing_temperature : null
+        top_k          = var.pre_processing_prompt_creation_mode == "OVERRIDDEN" ? var.pre_processing_top_k : null
+        top_p          = var.pre_processing_prompt_creation_mode == "OVERRIDDEN" ? var.pre_processing_top_p : null
+      }
+      parser_mode          = var.pre_processing_parser_mode
+      prompt_creation_mode = var.pre_processing_prompt_creation_mode
+      prompt_state         = var.pre_processing_prompt_creation_mode == "OVERRIDDEN" ? var.pre_processing_prompt_state : null
+      prompt_type          = "PRE_PROCESSING"
+    }
+    # ORCHESTRATION step
+    prompt_configurations {
+      base_prompt_template = var.orchestration_prompt_creation_mode == "OVERRIDDEN" ? var.orchestration_prompt_template : null
+
+      inference_configuration {
+        max_length     = var.orchestration_prompt_creation_mode == "OVERRIDDEN" ? var.orchestration_max_length : null
+        stop_sequences = var.orchestration_prompt_creation_mode == "OVERRIDDEN" ? var.orchestration_stop_sequences : null
+        temperature    = var.orchestration_prompt_creation_mode == "OVERRIDDEN" ? var.orchestration_temperature : null
+        top_k          = var.orchestration_prompt_creation_mode == "OVERRIDDEN" ? var.orchestration_top_k : null
+        top_p          = var.orchestration_prompt_creation_mode == "OVERRIDDEN" ? var.orchestration_top_p : null
+      }
+      parser_mode          = var.orchestration_parser_mode
+      prompt_creation_mode = var.orchestration_prompt_creation_mode
+      prompt_state         = var.orchestration_prompt_creation_mode == "OVERRIDDEN" ? var.orchestration_prompt_state : null
+      prompt_type          = "ORCHESTRATION"
+    }
+    # POST_PROCESSING step
+    prompt_configurations {
+      base_prompt_template = var.post_processing_prompt_creation_mode == "OVERRIDDEN" ? var.post_processing_prompt_template : null
+
+      inference_configuration {
+        max_length     = var.post_processing_prompt_creation_mode == "OVERRIDDEN" ? var.post_processing_max_length : null
+        stop_sequences = var.post_processing_prompt_creation_mode == "OVERRIDDEN" ? var.post_processing_stop_sequences : null
+        temperature    = var.post_processing_prompt_creation_mode == "OVERRIDDEN" ? var.post_processing_temperature : null
+        top_k          = var.post_processing_prompt_creation_mode == "OVERRIDDEN" ? var.post_processing_top_k : null
+        top_p          = var.post_processing_prompt_creation_mode == "OVERRIDDEN" ? var.post_processing_top_p : null
+      }
+
+      parser_mode          = var.post_processing_parser_mode
+      prompt_creation_mode = var.post_processing_prompt_creation_mode
+      prompt_state         = var.post_processing_prompt_creation_mode == "OVERRIDDEN" ? var.post_processing_prompt_state : null
+      prompt_type          = "POST_PROCESSING"
+    }
+  }
+
   depends_on = [
     aws_bedrockagent_knowledge_base.this
   ]

diff --git a/variables.tf b/variables.tf
@@ -108,6 +108,246 @@ variable "oss_additional_roles_arns" {
   default     = []
 }
 
+# KNOWLEDGE_BASE_RESPONSE_GENERATION Configuration
+variable "knowledge_base_response_generation_prompt_template" {
+  description = "Prompt template for pre-processing."
+  type        = string
+  default = <<EOF
+        You are a helpful assistant. Answer the following question using the context provided:
+        Question: {question}
+        Context: {context}
+        Your response should be thoughtful, detailed, and relevant to the provided context.
+        EOF
+}
+
+variable "knowledge_base_response_generation_parser_mode" {
+  description = "Parser mode for pre-processing."
+  type        = string
+  default     = "DEFAULT"
+}
+
+variable "knowledge_base_response_generation_prompt_creation_mode" {
+  description = "Prompt creation mode for pre-processing."
+  type        = string
+  default     = "OVERRIDDEN"
+}
+
+variable "knowledge_base_response_generation_prompt_state" {
+  description = "Prompt state for pre-processing."
+  type        = string
+  default     = "ENABLED"
+}
+
+variable "knowledge_base_response_generation_max_length" {
+  description = "Maximum number of tokens to allow in the generated response."
+  type        = number
+  default     = 512
+}
+
+variable "knowledge_base_response_generation_stop_sequences" {
+  description = "List of stop sequences that will stop generation."
+  type        = list(string)
+  default     = ["END"]
+}
+
+variable "knowledge_base_response_generation_temperature" {
+  description = "Likelihood of the model selecting higher-probability options while generating a response."
+  type        = number
+  default     = 0.7
+}
+
+variable "knowledge_base_response_generation_top_k" {
+  description = "Number of top most-likely candidates from which the model chooses the next token."
+  type        = number
+  default     = 50
+}
+
+variable "knowledge_base_response_generation_top_p" {
+  description = "Top percentage of the probability distribution of next tokens, from which the model chooses the next token."
+  type        = number
+  default     = 0.9
+}
+
+# PRE_PROCESSING Configuration
+variable "pre_processing_prompt_template" {
+  description = "Prompt template for pre-processing."
+  type        = string
+  default     = <<EOF
+        You are preparing the input. Extract relevant context and pre-process the following question:
+        Question: {question}
+        Context: {context}
+        Pre-processing should focus on extracting the core information.
+        EOF
+}
+
+variable "pre_processing_parser_mode" {
+  description = "Parser mode for pre-processing."
+  type        = string
+  default     = "DEFAULT" # Change to OVERRIDDEN if necessary
+}
+
+variable "pre_processing_prompt_creation_mode" {
+  description = "Prompt creation mode for pre-processing."
+  type        = string
+  default     = "OVERRIDDEN"
+}
+
+variable "pre_processing_prompt_state" {
+  description = "Prompt state for pre-processing."
+  type        = string
+  default     = "ENABLED"
+}
+
+variable "pre_processing_max_length" {
+  description = "Maximum number of tokens to allow in the generated response."
+  type        = number
+  default     = 512
+}
+
+variable "pre_processing_stop_sequences" {
+  description = "List of stop sequences that will stop generation."
+  type        = list(string)
+  default     = ["END"]
+}
+
+variable "pre_processing_temperature" {
+  description = "Likelihood of the model selecting higher-probability options while generating a response."
+  type        = number
+  default     = 0.7
+}
+
+variable "pre_processing_top_k" {
+  description = "Number of top most-likely candidates from which the model chooses the next token."
+  type        = number
+  default     = 50
+}
+
+variable "pre_processing_top_p" {
+  description = "Top percentage of the probability distribution of next tokens, from which the model chooses the next token."
+  type        = number
+  default     = 0.9
+}
+
+# ORCHESTRATION Configuration
+variable "orchestration_prompt_template" {
+  description = "Prompt template for orchestration."
+  type        = string
+  default     = <<EOF
+        You are orchestrating the flow of the agent. Based on the question and context, determine the next steps in the process:
+        Question: {question}
+        Context: {context}
+        Plan the next steps to follow the best strategy.
+        EOF
+}
+
+variable "orchestration_parser_mode" {
+  description = "Parser mode for orchestration."
+  type        = string
+  default     = "DEFAULT"
+}
+
+variable "orchestration_prompt_creation_mode" {
+  description = "Prompt creation mode for orchestration."
+  type        = string
+  default     = "OVERRIDDEN"
+}
+
+variable "orchestration_prompt_state" {
+  description = "Prompt state for orchestration."
+  type        = string
+  default     = "ENABLED"
+}
+
+variable "orchestration_max_length" {
+  description = "Maximum number of tokens to allow in the generated response."
+  type        = number
+  default     = 512
+}
+
+variable "orchestration_stop_sequences" {
+  description = "List of stop sequences that will stop generation."
+  type        = list(string)
+  default     = ["END"]
+}
+
+variable "orchestration_temperature" {
+  description = "Likelihood of the model selecting higher-probability options while generating a response."
+  type        = number
+  default     = 0.7
+}
+
+variable "orchestration_top_k" {
+  description = "Number of top most-likely candidates from which the model chooses the next token."
+  type        = number
+  default     = 50
+}
+
+variable "orchestration_top_p" {
+  description = "Top percentage of the probability distribution of next tokens, from which the model chooses the next token."
+  type        = number
+  default     = 0.9
+}
+
+# POST_PROCESSING Configuration
+variable "post_processing_prompt_template" {
+  description = "Prompt template for post-processing."
+  type        = string
+  default     = <<EOF
+You are performing post-processing. Review the agent's output and refine the response for clarity and relevance:
+Response: {response}
+Context: {context}
+Ensure the output is polished and aligns with the context.
+EOF
+}
+
+variable "post_processing_parser_mode" {
+  description = "Parser mode for post-processing."
+  type        = string
+  default     = "DEFAULT"
+}
+
+variable "post_processing_prompt_creation_mode" {
+  description = "Prompt creation mode for post-processing."
+  type        = string
+  default     = "OVERRIDDEN"
+}
+
+variable "post_processing_prompt_state" {
+  description = "Prompt state for post-processing."
+  type        = string
+  default     = "DISABLED"
+}
+
+variable "post_processing_max_length" {
+  description = "Maximum number of tokens to allow in the generated response."
+  type        = number
+  default     = 512
+}
+
+variable "post_processing_stop_sequences" {
+  description = "List of stop sequences that will stop generation."
+  type        = list(string)
+  default     = ["END"]
+}
+
+variable "post_processing_temperature" {
+  description = "Likelihood of the model selecting higher-probability options while generating a response."
+  type        = number
+  default     = 0.7
+}
+
+variable "post_processing_top_k" {
+  description = "Number of top most-likely candidates from which the model chooses the next token."
+  type        = number
+  default     = 50
+}
+
+variable "post_processing_top_p" {
+  description = "Top percentage of the probability distribution of next tokens, from which the model chooses the next token."
+  type        = number
+  default     = 0.9
+}
+
 variable "tags" {
   description = "A map of tags to assign to the customization job and custom model."
   type        = map(string)