From 3fc1b848b945f96a0112ce411da3615c76797867 Mon Sep 17 00:00:00 2001 From: Sergio Bayona Date: Tue, 7 May 2024 10:58:44 -0500 Subject: [PATCH] removed examples that need to be converted to Ruby --- docs/examples/classification.md | 108 ------------------ docs/examples/content_moderation.md | 68 ----------- docs/examples/query_decomposition.md | 125 -------------------- docs/examples/self_correction.md | 164 --------------------------- docs/examples/validated_citations.md | 159 -------------------------- 5 files changed, 624 deletions(-) delete mode 100644 docs/examples/classification.md delete mode 100644 docs/examples/content_moderation.md delete mode 100644 docs/examples/query_decomposition.md delete mode 100644 docs/examples/self_correction.md delete mode 100644 docs/examples/validated_citations.md diff --git a/docs/examples/classification.md b/docs/examples/classification.md deleted file mode 100644 index 9af0a39..0000000 --- a/docs/examples/classification.md +++ /dev/null @@ -1,108 +0,0 @@ -# Text Classification - -This tutorial showcases how to implement text classification tasks—specifically, single-label and multi-label classifications—using the OpenAI API. - -!!! tips "Motivation" - - Text classification is a common problem in many NLP applications, such as spam detection or support ticket categorization. The goal is to provide a systematic way to handle these cases using OpenAI's GPT models. - -## Single-Label Classification - -### Defining the Structures - -For single-label classification, we first define an **`enum`** for possible labels and a Zod schema for the output. - -```ts -import Instructor from "@/instructor" -import OpenAI from "openai" -import { z } from "zod" - -enum CLASSIFICATION_LABELS { - "SPAM" = "SPAM", - "NOT_SPAM" = "NOT_SPAM" -} - -const SimpleClassificationSchema = z.object({ - class_label: z.nativeEnum(CLASSIFICATION_LABELS) -}) - -type SimpleClassification = z.infer - -``` - -### Classifying Text - -The function **`classify`** will perform the single-label classification. - -```js -const oai = new OpenAI({ - apiKey: process.env.OPENAI_API_KEY ?? undefined, - organization: process.env.OPENAI_ORG_ID ?? undefined -}) - -const client = Instructor({ - client: oai, - mode: "FUNCTIONS" -}) - -async function classify(data: string): Promise { - const classification = await client.chat.completions.create({ - messages: [{ role: "user", content: `"Classify the following text: ${data}` }], - model: "gpt-3.5-turbo", - response_model: { schema: SimpleClassificationSchema }, - max_retries: 3 - }) - - return classification -} - -const classification = await createClassification( - "Hello there I'm a nigerian prince and I want to give you money" -) - -console.log({ classification }) -// { class_label: 'SPAM' } -``` - -## Multi-Label Classification - -### Defining the Structures - -For multi-label classification, we introduce a new enum class and a different Zod schema to handle multiple labels. - -```ts -enum MULTI_CLASSIFICATION_LABELS { - "BILLING" = "billing", - "GENERAL_QUERY" = "general_query", - "HARDWARE" = "hardware" -} - -const MultiClassificationSchema = z.object({ - predicted_labels: z.array(z.nativeEnum(MULTI_CLASSIFICATION_LABELS)) -}) - -type MultiClassification = z.infer -``` - -### Classifying Text - -The function **`multi_classify`** is responsible for multi-label classification. - -```ts -async function multi_classify(data: string): Promise { - const classification = await client.chat.completions.create({ - messages: [{ role: "user", content: `"Classify the following support ticket: ${data}` }], - model: "gpt-3.5-turbo", - response_model: { schema: MultiClassificationSchema }, - max_retries: 3 - }) - return classification -} - -const classification = await createClassification( - "My account is locked and I can't access my billing info. Phone is also broken" -) - -console.log({ classification }) -// { predicted_labels: [ 'billing', 'hardware' ] } -``` \ No newline at end of file diff --git a/docs/examples/content_moderation.md b/docs/examples/content_moderation.md deleted file mode 100644 index 4dfb854..0000000 --- a/docs/examples/content_moderation.md +++ /dev/null @@ -1,68 +0,0 @@ -# OpenAI Moderation - -## Overview - -This example uses OpenAI's moderation endpoint to check content compliance with OpenAI's usage policies. It can identify and filter harmful content that violates the policies. - -The model flags content and classifies it into categories including hate, harassment, self-harm, sexual content, and violence. Each category has subcategories for detailed classification. - -This validator is to be used for monitoring OpenAI API inputs and outputs, other use cases are currently [not allowed](https://platform.openai.com/docs/guides/moderation/overview). - -## Incorporating OpenAI moderation validation - -The following code defines a schema to validate content using OpenAI's Moderation endpoint. Zod's `.superRefine()` is used to apply OpenAI's moderation after the compute. This moderation checks if the content complies with OpenAI's usage policies and flags any harmful content. Here's how it works: - -1. Initialize the OpenAI client and extend it with `Instructor`. This is not strictly necessary for this example, always recommended in order to leverage the full `Instructor` functionality. - -2. Define a Zod schema for our content, then super refine our `message` field with `moderationValidator(client)`. This means that after `message` is computed, it will be passed to `moderationValidator()` for validation. - -```ts -import Instructor from "@/instructor"; -import OpenAI from "openai"; -import { z } from "zod"; -import { moderationValidator } from "@/dsl/validator" - -const oai = new OpenAI({ - apiKey: process.env.OPENAI_API_KEY ?? undefined, - organization: process.env.OPENAI_ORG_ID ?? undefined, -}); - -const client = Instructor({ - client: oai, - mode: "FUNCTIONS", -}); - -const Response = z.object({ - message: z.string().superRefine(moderationValidator(client)) -}) - -try { - await Response.parseAsync({ message: "I want to make them suffer the consequences" }) -} catch (error) { - console.log(error) -} -// ZodError: [ -// { -// "code": "custom", -// "message": "Moderation error, `I want to make them suffer the consequences` was flagged for violence", -// "path": [ -// "message" -// ] -// } -// ] - -try { - await Response.parseAsync({ message: "I want to hurt myself." }) -} catch (error) { - console.log(error) -} -// ZodError: [ -// { -// "code": "custom", -// "message": "Moderation error, `I want to hurt myself.` was flagged for self-harm, self-harm/intent", -// "path": [ -// "message" -// ] -// } -// ] -``` diff --git a/docs/examples/query_decomposition.md b/docs/examples/query_decomposition.md deleted file mode 100644 index d1ec216..0000000 --- a/docs/examples/query_decomposition.md +++ /dev/null @@ -1,125 +0,0 @@ -# Planning and Executing a Query Plan - -This example demonstrates how to use the OpenAI Function Call ChatCompletion model to plan and execute a query plan in a question-answering system. By breaking down a complex question into smaller sub-questions with defined dependencies, the system can systematically gather the necessary information to answer the main question. - -!!! tips "Motivation" - - The goal of this example is to showcase how query planning can be used to handle complex questions, facilitate iterative information gathering, automate workflows, and optimize processes. By leveraging the OpenAI Function Call model, you can design and execute a structured plan to find answers effectively. - - **Use Cases:** - - * Complex question answering - * Iterative information gathering - * Workflow automation - * Process optimization - -With the OpenAI Function Call model, you can customize the planning process and integrate it into your specific application to meet your unique requirements. - -## Defining the structures - -Let's define the necessary models to represent the query plan and the queries. - -```ts -import Instructor from "@/instructor" -import OpenAI from "openai" -import { z } from "zod" - -const QueryTypeSchema = z.enum(["SINGLE", "MERGE_MULTIPLE_RESPONSES"]); - -const QuerySchema = z.object({ - id: z.number(), - question: z.string(), - dependencies: z.array(z.number()).optional(), - node_type: QueryTypeSchema.default("SINGLE") -}) - -const QueryPlanSchema = z.object({ - query_graph: z.array(QuerySchema) -}) -``` - -## Planning a Query Plan - -Now, let's demonstrate how to plan and execute a query plan using the defined models and the OpenAI API. - -```ts -const oai = new OpenAI({ - apiKey: process.env.OPENAI_API_KEY ?? undefined, - organization: process.env.OPENAI_ORG_ID ?? undefined -}) - -const client = Instructor({ - client: oai, - mode: "FUNCTIONS", -}) - -const createQueryPlan = async (question: string): Promise => { - const queryPlan: QueryPlan = await client.chat.completions.create({ - messages: [ - { - "role": "system", - "content": "You are a world class query planning algorithm capable of breaking apart questions into its dependency queries such that the answers can be used to inform the parent question. Do not answer the questions, simply provide a correct compute graph with good specific questions to ask and relevant dependencies. Before you call the function, think step-by-step to get a better understanding of the problem.", - }, - { - "role": "user", - "content": `Consider: ${question}\nGenerate the correct query plan.`, - }, - ], - model: "gpt-4-turbo", - response_model: { schema: QueryPlanSchema }, - max_tokens: 1000, - temperature: 0.0, - max_retries: 2, - }) - - return queryPlan || undefined -} - -const queryPlan = await createQueryPlan( - "What is the difference in populations of Canada and the Jason's home country?" -) - -console.log({ queryPlan: JSON.stringify(queryPlan) }) -``` - -!!! warning "No RAG" - - While we build the query plan in this example, we do not propose a method to actually answer the question. You can implement your own answer function that perhaps makes a retrival and calls openai for retrival augmented generation. That step would also make use of function calls but goes beyond the scope of this example. - -```json -{ - "query_graph": [ - { - "id": 1, - "question": "What is the population of Canada?", - "dependencies": [], - "node_type": "SINGLE" - }, - { - "id": 2, - "question": "What is the name of Jason's home country?", - "dependencies": [], - "node_type": "SINGLE" - }, - { - "id": 3, - "question": "What is the population of {country}?", - "dependencies": [2], - "node_type": "SINGLE" - }, - { - "id": 4, - "question": "What is the difference in population between Canada and {country}?", - "dependencies": [1, 3], - "node_type": "MERGE_MULTIPLE_RESPONSES" - } - ] -} - -``` - -In the above code, we define a `createQueryPlan` function that takes a question as input and generates a query plan using the OpenAI API. - -## Conclusion - -In this example, we demonstrated how to use the OpenAI Function Call `ChatCompletion` model to plan and execute a query plan using a question-answering system. We defined the necessary structures using Zod and created a query planner function. diff --git a/docs/examples/self_correction.md b/docs/examples/self_correction.md deleted file mode 100644 index e5f4af4..0000000 --- a/docs/examples/self_correction.md +++ /dev/null @@ -1,164 +0,0 @@ -# Self-Correction with `LLMValidator` - -## Introduction - -This guide demonstrates how to use `LLMValidator` for implementing self-healing. The objective is to showcase how an instructor can self-correct by using validation errors and helpful error messages. - -## Setup - -Import required modules to create a zod model - -```ts -import { z } from "zod" -``` - -## Defining Models - -Before building validation logic, define a basic Zod model named `QuestionAnswer`. -We'll use this model to generate a response without validation to see the output. - -```ts -const QuestionAnswer = z.object({ - question: z.string(), - answer: z.string() -}) -``` - -## Generating a Response - -Here we coerce the model to generate a response that is objectionable. - -```ts -import { LLMValidator } from "@/dsl/validator" -import Instructor from "@/instructor" -import OpenAI from "openai" - -const openAi = new OpenAI({ apiKey: process.env.OPENAI_API_KEY ?? "" }) - -const instructor = Instructor({ - client: openAi, - mode: "TOOLS" -}) - -const question = "What is the meaning of life?" -const context = "According to the devil the meaning of live is to live a life of sin and debauchery." - -await instructor.chat.completions.create({ - model: "gpt-4", - max_retries: 0, - response_model: { schema: QuestionAnswer, name: "Question and Answer" }, - messages: [ - { - role: "system", - content: - "You are a system that answers questions based on the context. answer exactly what the question asks using the context." - }, - { - role: "user", - content: `using the context: ${context}\n\nAnswer the following question: ${question}` - } - ] - }) -``` - -### Output Before Validation - -While it calls out the objectionable content, it doesn't provide any details on how to correct it. - -```json -{ - "question": "What is the meaning of life?", - "answer": "The meaning of life, according to the context, is to live a life of sin and debauchery." -} -``` - -## Adding Custom Validation - -By adding a validator to the `answer` field, we can try to catch the issue and correct it. -Lets integrate `LLMValidator` into the model and see the error message. Its important to note that you can use all of Zod's validators as you would normally which raise a `ZodError` with a helpful error message as it will be used as part of the self correction prompt. - -```typescript -const QuestionAnswer = z.object({ - question: z.string(), - answer: z.string().superRefine( - LLMValidator(instructor, statement, { - model: "gpt-4" - }) - ) -}) - -try { - await instructor.chat.completions.create({ - model: "gpt-4", - max_retries: 0, - response_model: { schema: QuestionAnswer, name: "Question and Answer" }, - messages: [ - { - role: "system", - content: - "You are a system that answers questions based on the context. answer exactly what the question asks using the context." - }, - { - role: "user", - content: `using the context: ${context}\n\nAnswer the following question: ${question}` - } - ] - }) -} catch (e as ZodError[]) { - console.error(e[0].message) -} -``` - -### Output After Validation - -Now, we throw validation error that its objectionable and provide a helpful error message. - -```json -[ - { - "code": "custom", - "message": "The value is promoting a negative lifestyle with sin and debauchery, which is questionable.", - "path": [ - "answer" - ] - } -] -``` - -## Retrying with Corrections - -By adding the `max_retries` parameter, we can retry the request with corrections and use the error message to correct the output. - -```ts -try { - await instructor.chat.completions.create({ - model: "gpt-4", - max_retries: 2, - response_model: { schema: QuestionAnswer, name: "Question and Answer" }, - messages: [ - { - role: "system", - content: - "You are a system that answers questions based on the context. answer exactly what the question asks using the context." - }, - { - role: "user", - content: `using the context: ${context}\n\nAnswer the following question: ${question}` - } - ] - }) -} catch (e as ZodError[]) { - console.error(e[0].message) -} -``` - -### Final Output - -Now, we get a valid response that is not objectionable! - -```json -{ - "question": "What is the meaning of life?", - "answer": "The meaning of life is a subjective and complex question, often explored in religious, philosophical, and moral contexts. Different individuals and cultures have different beliefs and interpretations regarding the purpose and meaning of life.", -} -``` \ No newline at end of file diff --git a/docs/examples/validated_citations.md b/docs/examples/validated_citations.md deleted file mode 100644 index e86054c..0000000 --- a/docs/examples/validated_citations.md +++ /dev/null @@ -1,159 +0,0 @@ -# Example: Answering Questions with Validated Citations - -For the full code example check out [examples/validated_citations/index.ts](https://github.com/instructor-ai/instructor-js/blob/main/examples/validated_citations/index.ts) - -## Overview - -This example demonstrates how to use Instructor-js with Zod validators to ensure that every statement made by the Language Model (LM) is backed by a direct quote from the provided context, preventing hallucinations and ensuring citation accuracy. It defines TypeScript functions and Zod schemas to encapsulate the information of individual facts and the entire answer. - - -## Data Structures - -### The `Fact` Schema - -The `Fact` schema encapsulates a single statement or fact. It contains two properties: - -- `fact`: A string representing the body of the fact or statement. -- `substring_quote`: A list of strings. Each string is a direct quote from the context that supports the `fact`. - -#### Validation Method: `createFactWithContext` - -This method dynamically creates a Zod schema for Fact with context-dependent validation. It validates the sources (`substring_quote`) using regex to find the span of each substring quote within the given context. If a span is not found, the quote is removed from the list. -```ts hl_lines="6 8-13" -import Instructor from "@/instructor" -import { z } from "zod" - - -function createFactWithContext(dynamicContext: string) { - return z.object({ - statement: z.string(), - substring_quote: z.array(z.string()).transform((quotes) => { - return quotes.flatMap((quote) => { - const spans = getSpans(quote, dynamicContext); - return spans.map(span => dynamicContext.substring(span[0], span[1])); - }); - }) - }); -} - -function getSpans(quote: string, context: string): Array<[number, number]> { - const matches: any = []; - // Example regex search for simplicity; adjust according to your actual implementation - const regex = new RegExp(quote, 'g'); - let match; - - while ((match = regex.exec(context)) !== null) { - matches.push([match.index, regex.lastIndex]); - } - return matches.length > 0 ? matches : []; -} -``` - -### The `QuestionAnswer` Schema - -This schema encapsulates the question and its corresponding answer. It exists to provide a structure for responses from the OpenAI API call. It contains two properties: - -- `question`: The question asked. -- `answer`: A list of `Fact` objects that make up the answer. - -```ts hl_lines="5-8" -const QuestionAnswer = z.object({ - question: z.string(), - answer: z.array(z.object({ - statement: z.string(), - substring_quote: z.array(z.string()), // Basic structure without dynamic context validation - })) -}); -type QuestionAnswerType = z.infer -``` - -#### Validation Method: `createQuestionAnswerWithContext` - -This method dynamically generates a Zod schema for QuestionAnswer with context-sensitive validation, ensuring each Fact object in the answer list has at least one valid source. If a `Fact` object has no valid sources, it is removed from the `answer` list. - -```ts hl_lines="5-8" -function createQuestionAnswerWithContext(dynamicContext: string) { - const FactSchemaWithContext = createFactSchemaWithContext(dynamicContext); - - return z.object({ - question: z.string(), - answer: z.array(FactSchemaWithContext).transform((answers) => { - // Filter out any Facts that, after validation, have no valid quotes - return answers.filter(fact => fact.substring_quote.length > 0); - }) - }); -} -``` - -## Function to Ask AI a Question - -### The `askAI` Function - -This function takes a string `question` and a string `context` and returns a `QuestionAnswer` object. It uses the OpenAI API with the dynamic Zod schema for validation. - - -```ts hl_lines="5 6 14" -import Instructor from "@/instructor" -import OpenAI from "openai" -import { z } from "zod" - -const oai = new OpenAI({ - apiKey: process.env.OPENAI_API_KEY ?? undefined, - organization: process.env.OPENAI_ORG_ID ?? undefined -}) - -const client = Instructor({ - client: oai, - mode: "FUNCTIONS" -}) - -async function askAI(question: string, context: string): Promise { - const response = await client.chat.completions.create({ - model: "gpt-3.5-turbo-0613", - temperature: 0, - response_model: { schema: QuestionAnswer, name: "Question and Answer" }, - messages: [ - { role: "system", content: "You are a world class algorithm to answer questions with correct and exact citations." }, - { role: "user", content: context }, - { role: "user", content: `Question: ${question}` }, - ], - }); - const QuestionAnswerWithContext = createQuestionAnswerWithContext(context); - const parsedResponse = QuestionAnswerWithContext.parse(response); - - return parsedResponse; -} -``` - -## Example - - -Here's an example of using these classes and functions to ask a question and validate the answer. - -```ts -const question = "Where did he go to school?" -const context = `My name is Jason Liu, and I grew up in Toronto Canada but I was born in China. -I went to an arts high school but in university I studied Computational Mathematics and physics. - As part of coop I worked at many companies including Stitchfix, Facebook. - I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.` -``` - -The output would be a `QuestionAnswer` object containing validated facts and their sources. - -```ts -{ - question: "Where did Jason Liu go to school?", - answer: [ - { - statement: "Jason Liu went to an arts high school.", - substring_quote: [ "arts high school" ], - }, - { - statement: "Jason Liu studied Computational Mathematics and physics in university.", - substring_quote: [ "Computational Mathematics and physics" ], - } - ], -} -``` - -This ensures that every piece of information in the answer has been validated against the context.