Merge branch 'main' of https://github.com/OWASP-BLT/BLT

OWASP-BLT · Jun 30, 2024 · 730fa31 · 730fa31
2 parents 93661e6 + 733a330
commit 730fa31
Show file tree

Hide file tree

Showing 36 changed files with 4,563 additions and 544 deletions.
diff --git a/blt/.env.example b/blt/.env.example
@@ -9,3 +9,10 @@ CALLBACK_URL_FOR_GITHUB=http://127.0.0.1:8000/
 CALLBACK_URL_FOR_GOOGLE=http://127.0.0.1:8000/
 CALLBACK_URL_FOR_FACEBOOK=http://127.0.0.1:8000/
 
+OPENAI_API_KEY=openai_api_key
+
+#Langchain details are only needed to get the chatbot details in Langsmith. This is not necessary for the project.
+LANGCHAIN_API_KEY=langchain_api_key
+LANGCHAIN_TRACING_V2=true
+LANGCHAIN_PROJECT=default
+LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
diff --git a/blt/urls.py b/blt/urls.py
@@ -34,6 +34,7 @@
 )
 from website.views import (  # TODO(b) IssueView,; TODO(b): REMOVE like_issue2 etc
     AllIssuesView,
+    AutoLabel,
     CompanySettings,
     ContributorStatsView,
     CreateHunt,
@@ -72,6 +73,7 @@
     UserProfileDetailsView,
     UserProfileDetailView,
     change_bid_status,
+    chatbot_conversation,
     contributors_view,
     deletions,
     dislike_issue2,
@@ -506,6 +508,7 @@
     path("change_bid_status/", change_bid_status, name="change_bid_status"),
     path("fetch-current-bid/", fetch_current_bid, name="fetch_current_bid"),
     path("Submitpr/", submit_pr, name="submit_pr"),
+    path("issue-auto-label/", AutoLabel, name="AutoLabel"),
     re_path(
         r"^trademarks/query=(?P<slug>[\w\s]+)",
         website.views.trademark_detailview,
@@ -526,6 +529,7 @@
         ContributorStatsView.as_view(today=True),
         name="today-contributor-stats",
     ),
+    path("api/chatbot/conversation/", chatbot_conversation, name="chatbot_conversation"),
 ]
 
 if settings.DEBUG:

diff --git a/company/templates/company/company_analytics.html b/company/templates/company/company_analytics.html
@@ -65,17 +65,17 @@
             </div>
         </div>
         <div class="flex flex-col lg:flex-row gap-y-5 mt-4">
-            <div class="flex flex-col px-8  gap-5 ">
+            <div class="flex flex-col ps-8 gap-5 ">
                 <div class="flex flex-col lg:flex-row gap-5 bg-[#F3F5F7]">
-                    <div class="bg-white p-5 rounded-xl lg:w-[60vh]">
+                    <div class="bg-white p-5 rounded-xl lg:w-[56vh]">
                         <div class="header-container">
                             <h3 class="section-header">Bug Reported Type</h3>
                         </div>
                         <div class="pie-chart w-full h-[90%]">
                             <canvas id="bugTypePie" height="220px" width="660px"></canvas>
                         </div>
                     </div>
-                    <div class="bg-white p-5 rounded-xl lg:w-[60vh]">
+                    <div class="bg-white p-5 rounded-xl lg:w-[56vh]">
                         <div class="header-container">
                             <h3 class="section-header">Reported on Domains</h3>
                         </div>
@@ -84,7 +84,7 @@ <h3 class="section-header">Reported on Domains</h3>
                         </div>
                     </div>
                 </div>
-                <div class="bg-white p-5 rounded-xl w-full lg:w-[123vh]">
+                <div class="bg-white p-5 rounded-xl w-full lg:w-[115vh]">
                     <h3 class="section-header">Monthly Reports</h3>
                     <div class="year-selector">
                         <svg width="24"
@@ -109,7 +109,7 @@ <h3 class="section-header">Monthly Reports</h3>
                         <canvas id="myChart" height="220px" width="660px"></canvas>
                     </div>
                 </div>
-                <div class="p-5 bg-white rounded-xl flex lg:flex-row lg:w-[123vh]">
+                <div class="p-5 bg-white rounded-xl flex lg:flex-row lg:w-[115vh]">
                     <div class="w-full">
                         <div class="header-container">
                             <h3 class="section-header">Total Reported</h3>

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,7 +6,9 @@ authors = ["OWASP BLT <[email protected]>"]
 license = "AGPLv3"
 
 [tool.poetry.dependencies]
+
 python = "3.11.2"
+python-dotenv = "^1.0.1"
 toml = "^0.10.2"
 Django = "^5.0.6"
 dj-database-url = "^2.2.0"
@@ -15,7 +17,7 @@ beautifulsoup4 = "^4.12.3"
 colorthief = "^0.2.1"
 django-email-obfuscator = "^0.1.5"
 django-gravatar2 = "^1.4.4"
-django-import-export = "^4.0.9"
+django-import-export = "^4.1.0"
 django-annoying = "^0.10.6"
 dj-rest-auth = "^5.0.2"
 tweepy = "^4.8.0"
@@ -29,7 +31,7 @@ psycopg2-binary = "^2.9.9"
 boto = "^2.49.0"
 django-cors-headers = "^4.4.0"
 protobuf = "^4.25.3"
-django-storages = {extras = ["google"], version = "^1.14.3"}
+django-storages = { extras = ["google"], version = "^1.14.3" }
 django-timedeltafield = "^0.7.10"
 EasyProcess = "^1.1"
 "giturlparse.py" = "^0.0.5"
@@ -61,13 +63,23 @@ django-filter = "^22.1"
 webdriver-manager = "^4.0.1"
 pillow = "^10.2.0"
 chromedriver-autoinstaller = "^0.6.4"
-sentry-sdk = "^2.6.0"
+sentry-sdk = "^2.7.1"
 bitcash = "^1.0.2"
+pydantic = "^2.7.3"
+pydantic_core = "^2.18.4"
+langchain = "^0.2.1"
+langchain-community = "^0.2.1"
+langchain-core = "^0.2.3"
+langchain-openai = "^0.1.8"
+unstructured = "^0.14.4"
+Markdown = "^3.6"
+faiss-cpu = "^1.8.0"
+openai = "^1.35.7"
 
 [tool.poetry.group.dev.dependencies]
 black = "^24.2.0"
 isort = "^5.13.2"
-ruff = "^0.4.10"
+ruff = "^0.5.0"
 
 [tool.isort]
 known_first_party = ["blt"]

diff --git a/website/bot.py b/website/bot.py
@@ -0,0 +1,177 @@
+import tempfile
+from pathlib import Path
+
+import openai
+from django.core.files.base import ContentFile
+from django.core.files.storage import default_storage
+from dotenv import find_dotenv, load_dotenv
+from langchain.chains import ConversationalRetrievalChain
+from langchain.memory import ConversationSummaryMemory
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import (
+    DirectoryLoader,
+    Docx2txtLoader,
+    PyPDFLoader,
+    TextLoader,
+    UnstructuredMarkdownLoader,
+)
+from langchain_community.vectorstores import FAISS
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+from openai import OpenAI
+
+load_dotenv(find_dotenv(), override=True)
+
+
+def is_api_key_valid(api_key):
+    client = OpenAI(api_key=api_key)
+    try:
+        client.completions.create(prompt="Hello", model="gpt-3.5-turbo-instruct", max_tokens=1)
+        return True
+    except openai.APIConnectionError as e:
+        print(f"Failed to connect to OpenAI API: {e}")
+    except openai.RateLimitError as e:
+        print(f"OpenAI API rate limit exceeded: {e}")
+    except openai.APIError as e:
+        print(f"OpenAI API error: {e}")
+    return False
+
+
+def load_document(file_path):
+    loaders = {
+        ".pdf": PyPDFLoader,
+        ".docx": Docx2txtLoader,
+        ".txt": TextLoader,
+        ".md": UnstructuredMarkdownLoader,
+    }
+
+    file_path = Path(file_path)
+    extension = file_path.suffix
+    Loader = loaders.get(extension)
+
+    if Loader is None:
+        raise ValueError(f"Unsupported file format: {extension}")
+
+    return Loader(file_path).load()
+
+
+def load_directory(dir_path):
+    return DirectoryLoader(dir_path).load()
+
+
+def split_document(chunk_size, chunk_overlap, document):
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap,
+        length_function=len,
+    )
+    return text_splitter.split_documents(document)
+
+
+def get_temp_db_path(db_folder_path):
+    temp_dir = tempfile.TemporaryDirectory()
+    db_folder_str = str(db_folder_path)
+    temp_db_path = Path(temp_dir.name) / db_folder_path
+    temp_db_path.mkdir(parents=True, exist_ok=True)
+    return temp_dir, db_folder_str, temp_db_path
+
+
+def embed_documents_and_save(embed_docs):
+    db_folder_path = Path("faiss_index")
+
+    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
+
+    # Temporary directory for local operations
+    temp_dir, db_folder_str, temp_db_path = get_temp_db_path(db_folder_path)
+
+    # Check if the folder exists in the storage system and download files
+    if default_storage.exists(db_folder_str) and default_storage.listdir(db_folder_str):
+        # Download all files from the storage folder to the temp directory
+        for file_name in default_storage.listdir(db_folder_str)[1]:
+            with default_storage.open(db_folder_path / file_name, "rb") as f:
+                content = f.read()
+            with open(temp_db_path / file_name, "wb") as temp_file:
+                temp_file.write(content)
+
+        # Load the FAISS index from the temp directory
+        db = FAISS.load_local(temp_db_path, embeddings, allow_dangerous_deserialization=True)
+        # Add new documents to the index
+        db.add_documents(embed_docs)
+    else:
+        # Create a new FAISS index if it doesn't exist
+        db = FAISS.from_documents(embed_docs, embeddings)
+
+    # Save the updated FAISS index back to the temp directory
+    db.save_local(temp_db_path)
+
+    # Clean up the storage directory before uploading the new files
+    if default_storage.exists(db_folder_str):
+        for file_name in default_storage.listdir(db_folder_str)[1]:
+            default_storage.delete(db_folder_path / file_name)
+
+    # Upload the updated files back to Django's storage
+    for file in temp_db_path.rglob("*"):
+        if file.is_file():
+            with open(file, "rb") as f:
+                content = f.read()
+            default_storage.save(
+                str(db_folder_path / file.relative_to(temp_db_path)), ContentFile(content)
+            )
+    temp_dir.cleanup()
+
+    return db
+
+
+def load_vector_store():
+    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
+    db_folder_path = Path("faiss_index")
+
+    temp_dir, db_folder_str, temp_db_path = get_temp_db_path(db_folder_path)
+
+    # check the file exists in the storage system and download files if not exist return None
+    if not default_storage.exists(db_folder_str) or not default_storage.listdir(db_folder_str)[1]:
+        temp_dir.cleanup()
+        return None
+    # Download all files from the storage folder to the temp directory
+    for file_name in default_storage.listdir(db_folder_str)[1]:
+        with default_storage.open(db_folder_path / file_name, "rb") as f:
+            content = f.read()
+        with open(temp_db_path / file_name, "wb") as temp_file:
+            temp_file.write(content)
+
+    # Load the FAISS index from the temp directory
+    db = FAISS.load_local(temp_db_path, embeddings, allow_dangerous_deserialization=True)
+    temp_dir.cleanup()
+
+    return db
+
+
+def conversation_chain(vector_store):
+    prompt = ChatPromptTemplate.from_messages(
+        (
+            "human",
+            (
+                "You are an assistant specifically designed for answering questions about "
+                "the OWASP Bug Logging Tool (BLT) application. Use the following pieces of "
+                "retrieved context to answer the question. If the user's question is not "
+                "related to the BLT application or if the context does not provide enough "
+                "information to answer the question, respond with 'Please ask a query related "
+                "to the BLT Application.' Ensure your response is concise and does not exceed "
+                "three sentences.\nQuestion: {question}\nContext: {context}\nAnswer:"
+            ),
+        )
+    )
+    llm = ChatOpenAI(model_name="gpt-3.5-turbo-0125", temperature=0.5)
+    retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
+    memory = ConversationSummaryMemory(
+        llm=llm, return_messages=True, memory_key="chat_history", max_token_limit=1000
+    )
+
+    crc = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        retriever=retriever,
+        memory=memory,
+        chain_type="stuff",
+        combine_docs_chain_kwargs={"prompt": prompt},
+    )
+    return crc, memory
diff --git a/website/documents/BltAboutUs.md b/website/documents/BltAboutUs.md
@@ -0,0 +1,76 @@
+### Detailed Description of the "About Us" UI Component and the url path is "https://blt.owasp.org/about/" for the OWASP Bug Logging Tool (BLT) Application
+
+#### 1. Component Overview
+The "About Us" UI component in the OWASP Bug Logging Tool (BLT) application provides users with comprehensive information about the BLT project, its purpose, functionalities, and how it benefits users. This section aims to inform users about the initiative's goals, how it operates, and the incentives for participating in bug reporting.
+
+#### 2. User Interaction
+Users interact with the About Us component through the following steps:
+1. **Accessing the Component**: Users navigate to the "About Us" section via the sidebar menu.
+2. **Reading Information**: Users read through the detailed information provided to understand the BLT initiative, its goals, and how they can participate.
+3. **Navigating Links**: Users may click on embedded links to access related pages or external resources for more detailed information.
+
+#### 3. Key Elements
+- **Navigation Sidebar**: Contains links to various sections of the BLT application, including Issues, Companies, Scoreboard, Users, Teams, Bug Bounties, and more.
+- **About BLT Section**: Provides a detailed description of the BLT project, emphasizing its open-source nature, non-commercial goals, and community-driven approach.
+- **User Benefits**: Describes the incentives for users, including points, money from sponsored bug hunts, leaderboard recognition, and tips for finding bugs.
+- **How It Works**: Provides step-by-step instructions for testers on how to participate, including creating a user account, describing bugs, attaching screenshots, and submitting information.
+- **Organization Benefits**: Explains the advantages for organizations in maintaining a bug-free website and how they can participate in the bug bounty program.
+- **Additional Information**: Includes links to terms and conditions, privacy policy, and other relevant documentation.
+
+#### 4. Visual Design
+- **Layout**: The layout features a left sidebar for navigation and a main content area displaying the detailed information. The text is organized into sections with clear headings for easy reading.
+- **Color Scheme**: The design uses a consistent color scheme with red, white, and grey tones, matching the overall BLT branding. Red is used for headings and links to draw attention.
+- **Typography**: Modern, readable fonts are used for headings, body text, and links, ensuring clarity and ease of reading.
+- **Visual Cues**: Headings and sections are clearly delineated, and links are highlighted to indicate interactivity.
+
+#### 5. Accessibility Features
+- **Keyboard Navigation**: All interactive elements can be accessed and operated via keyboard shortcuts, allowing users with mobility impairments to navigate and use the component.
+- **Screen Reader Compatibility**: The text and links are labeled clearly to be compatible with screen readers, aiding visually impaired users in understanding and interacting with the component.
+- **High Contrast**: Text and interactive elements have high contrast against the background, making it easier for users with visual impairments to read the content.
+- **Descriptive Labels**: All interactive elements have clear and descriptive labels to ensure users understand their purpose and functionality.
+
+#### 6. Error Handling
+The About Us component includes mechanisms to handle errors and provide feedback to users:
+- **Error Messages**: If an error occurs while loading the information or navigating links, clear and concise error messages are displayed to inform the user and provide steps to resolve the issue.
+- **Fallback Content**: If the main content fails to load, the page provides fallback messages or placeholders, ensuring that the user experience is not significantly disrupted.
+- **Input Validation**: Ensures that users enter valid input before submitting queries or interactions.
+
+#### 7. Performance
+The component is designed with several features to enhance performance and user experience:
+- **Optimized Loading**: The page is optimized to load quickly, allowing users to access the information without delay.
+- **Responsive Design**: The layout is fully responsive, adapting to different screen sizes and devices to ensure a consistent and accessible experience across desktops, tablets, and mobile devices.
+- **Efficient Data Retrieval**: Uses efficient data retrieval techniques to fetch and display information quickly, minimizing wait times and enhancing user satisfaction.
+- **Scalability**: The component is designed to handle a large amount of textual information, maintaining performance and user experience even with extensive content.
+
+
+
+About us Page content : 
+About BLT
+BLT is 100% free to use, Open Source and a non-commercial, not for profit initiative. All prize money goes directly to the bug hunter.
+Software code allows us to buy a gift for Mom or Dad on amazon.com in 7 seconds, watch our favorite “House of Cards” episode on Netflix or read Yelp reviews about a new restaurant.
+
+When we can’t access the information we’re looking for on the internet within seconds, we are not happy.
+
+This is where you come into the picture.
+
+BLT wants you to identify the software (and hardware) bugs that delay downloads, freeze screens, create payloads that deliver malware to websites and generate other issues.
+
+What’s in it for you?
+Points.
+Money if you join a BLT Sponsored Bug Hunt.
+Jackpot money listed on the Leaderboard.
+Money if someone tips you for finding a bug through the tip button.
+Experience to add to your résumé or portfolio.
+How it Works
+Testers
+Create a User Account to log into BLT.
+Describe the software or hardware bug you found.
+Attach a screenshot of the bug.
+Submit the information.
+Win money through company-sponsored Bug Bounties, tips or the Grand Prize/Jackpot.We may also have "heists" where each bug is worth a specific amount based on what the company sets.
+If you participate in BLT’s sponsored Bug Bounties, you could win prize money known as tips.
+Organizations
+We want everyone to love your website.
+You want to keep your customers happy by giving them a consistent bug-free user experience. BLT offers monthly Bug Bounties through 4 different subscription plans to help you achieve this.
+
+BLT is 100% free to use, Open Source and a non-commercial, not for profit initiative.