From 50cfb36f87edaa9398254afb461f6a1bc9a29577 Mon Sep 17 00:00:00 2001 From: Darshan Acharya Date: Mon, 26 Feb 2024 14:36:34 -0800 Subject: [PATCH 1/4] use fire and improve cli input --- src/count.py | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/src/count.py b/src/count.py index 99ebd67..41610dc 100644 --- a/src/count.py +++ b/src/count.py @@ -1,7 +1,9 @@ import requests import time -import os +import os import sys +from typing import Optional +import fire # Constants @@ -22,6 +24,7 @@ def safe_request(url, headers, max_retries=MAX_RETRIES, timeout=10): sleep_time = max(reset_time - int(time.time()), 1) print(f"Rate limit exceeded. Waiting for {sleep_time} seconds.") time.sleep(sleep_time) + # Failed to fetch data after retries else: print(f"Request failed with status code {response.status_code}. Attempt {attempt + 1} of {max_retries}.") time.sleep(2 ** attempt) @@ -29,7 +32,6 @@ def safe_request(url, headers, max_retries=MAX_RETRIES, timeout=10): print(f"Request exception: {e}. Attempt {attempt + 1} of {max_retries}.") # Exponential backoff time.sleep(2 ** attempt) - # Failed to fetch data after retries return None def read_repos(file_path): @@ -37,7 +39,7 @@ def read_repos(file_path): repos = [line.strip() for line in file.readlines()] return repos -def get_commits_count(repo): +def get_commits_count(repo) -> Optional[int]: start_date = "2023-01-01T00:00:00Z" end_date = "2024-12-31T23:59:59Z" commits_url = f"{GITHUB_API}/repos/{repo}/commits?since={start_date}&until={end_date}&per_page=1" @@ -47,15 +49,14 @@ def get_commits_count(repo): last_page_url = response.links['last']['url'] last_page_number = int(last_page_url.split('=')[-1]) return last_page_number - elif response.json(): + elif response.json(): # Check if there's at least one commit in the specified range return 1 # No commits in the specified date range return 0 - return "Error" + return None - -def get_bugs_count(repo): +def get_bugs_count(repo) -> Optional[int]: bug_count = 0 page = 1 while True: @@ -66,30 +67,21 @@ def get_bugs_count(repo): if not issues: break for issue in issues: - # Get the date part of the timestamp created_at = issue['created_at'][:10] if "2023-01-01" <= created_at <= "2024-12-31": bug_count += 1 page += 1 else: - return "Error" + return None return bug_count - - def write_output(repos_info, output_file): with open(output_file, 'w') as file: for repo, commits_count, bugs_count in repos_info: file.write(f"{repo.replace('/', ',')} , {commits_count} , {bugs_count}\n") -def main(): - if len(sys.argv) < 2: - print("Usage: python script.py input_file_path") - sys.exit(1) - - file_path = sys.argv[1] - output_file = 'output4.csv' - repos = read_repos(file_path) +def main(input_file_path, output_file_path): + repos = read_repos(input_file_path) repos_info = [] for repo in repos: @@ -98,8 +90,8 @@ def main(): bugs_count = get_bugs_count(repo) repos_info.append((repo, commits_count, bugs_count)) - write_output(repos_info, output_file) - print("Finished. Output written to", output_file) + write_output(repos_info, output_file_path) + print("Finished. Output written to", output_file_path) if __name__ == "__main__": - main() \ No newline at end of file + fire.Fire(main) From 00199d0f8547a4860421b9824dffcf6989a79186 Mon Sep 17 00:00:00 2001 From: Darshan Acharya Date: Mon, 26 Feb 2024 14:38:27 -0800 Subject: [PATCH 2/4] remove format.py --- src/format.py | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 src/format.py diff --git a/src/format.py b/src/format.py deleted file mode 100644 index 43335e0..0000000 --- a/src/format.py +++ /dev/null @@ -1,17 +0,0 @@ -import csv - -input_filename = 'output3.csv' -output_filename = 'converted_analysis.csv' - -with open(input_filename, 'r', newline='') as infile, open(output_filename, 'w', newline='') as outfile: - reader = csv.reader(infile) - writer = csv.writer(outfile) - - for row in reader: - merged_column = row[0] + '/' + row[1] - - new_row = [merged_column] + row[2:] - - writer.writerow(new_row) - -print(f'Data merged and saved to {output_filename}') From e490801b8da06104ab76ef624ec3de2562dbd790 Mon Sep 17 00:00:00 2001 From: Darshan Acharya Date: Sat, 16 Mar 2024 11:30:08 -0700 Subject: [PATCH 3/4] add plot for repos --- src/plot.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 src/plot.py diff --git a/src/plot.py b/src/plot.py new file mode 100644 index 0000000..17d85fd --- /dev/null +++ b/src/plot.py @@ -0,0 +1,28 @@ +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +import fire + +def generate_charts(df_path, bug_chart_path, commit_chart_path): + df = pd.read_csv(df_path) + top_20_by_bug_count = df.nlargest(20, 'BugCount') + top_20_by_commit_count = df.nlargest(20, 'CommitCount') + + plt.figure(figsize=(10, 8)) + sns.barplot(x='BugCount', y='RepoName', data=top_20_by_bug_count, palette='viridis') + plt.title('Top 20 Repositories by Bug Count') + plt.xlabel('Bug Count') + plt.ylabel('Repository') + plt.tight_layout() + plt.savefig(bug_chart_path) + + plt.figure(figsize=(10, 8)) + sns.barplot(x='CommitCount', y='RepoName', data=top_20_by_commit_count, palette='viridis') + plt.title('Top 20 Repositories by Commit Count') + plt.xlabel('Commit Count') + plt.ylabel('Repository') + plt.tight_layout() + plt.savefig(commit_chart_path) + +if __name__ == '__main__': + fire.Fire(generate_charts) From 708ae2d7f84677aa176540cc1dc00724564537a3 Mon Sep 17 00:00:00 2001 From: Darshan Acharya Date: Sun, 17 Mar 2024 12:38:47 -0700 Subject: [PATCH 4/4] update to histogram plot --- src/plot.py | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/src/plot.py b/src/plot.py index 17d85fd..776f291 100644 --- a/src/plot.py +++ b/src/plot.py @@ -4,25 +4,27 @@ import fire def generate_charts(df_path, bug_chart_path, commit_chart_path): - df = pd.read_csv(df_path) - top_20_by_bug_count = df.nlargest(20, 'BugCount') - top_20_by_commit_count = df.nlargest(20, 'CommitCount') - - plt.figure(figsize=(10, 8)) - sns.barplot(x='BugCount', y='RepoName', data=top_20_by_bug_count, palette='viridis') - plt.title('Top 20 Repositories by Bug Count') - plt.xlabel('Bug Count') - plt.ylabel('Repository') - plt.tight_layout() - plt.savefig(bug_chart_path) - - plt.figure(figsize=(10, 8)) - sns.barplot(x='CommitCount', y='RepoName', data=top_20_by_commit_count, palette='viridis') - plt.title('Top 20 Repositories by Commit Count') - plt.xlabel('Commit Count') - plt.ylabel('Repository') - plt.tight_layout() - plt.savefig(commit_chart_path) + """Generates and saves histograms for bug counts and commit counts. + + Args: + df_path (str): Path to the CSV file containing the dataset. + bug_chart_path (str): Path where the bug count histogram will be saved. + commit_chart_path (str): Path where the commit count histogram will be saved. + """ + try: + df = pd.read_csv(df_path) + except Exception as e: + print(f"Failed to read {df_path}: {e}") + return + + for column, chart_path in [('BugCount', bug_chart_path), ('CommitCount', commit_chart_path)]: + plt.figure(figsize=(12, 8)) + sns.histplot(data=df, x=column, bins=20, kde=True, color='black') + plt.xlabel(column) + plt.ylabel('Frequency') + plt.tight_layout() + plt.savefig(chart_path, format='pdf') + plt.close() if __name__ == '__main__': fire.Fire(generate_charts)