Skip to content

Commit

Permalink
Merge pull request #1354 from Codium-ai/tr/3-way-prs
Browse files Browse the repository at this point in the history
Tr/3 way prs
  • Loading branch information
mrT23 authored Nov 12, 2024
2 parents e0c1540 + 0657770 commit b07f96d
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 33 deletions.
30 changes: 25 additions & 5 deletions pr_agent/algo/git_patch_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0,


def decode_if_bytes(original_file_str):
if isinstance(original_file_str, bytes):
if isinstance(original_file_str, (bytes, bytearray)):
try:
return original_file_str.decode('utf-8')
except UnicodeDecodeError:
Expand Down Expand Up @@ -61,23 +61,26 @@ def process_patch_lines(patch_str, original_file_str, patch_extra_lines_before,
patch_lines = patch_str.splitlines()
extended_patch_lines = []

is_valid_hunk = True
start1, size1, start2, size2 = -1, -1, -1, -1
RE_HUNK_HEADER = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
try:
for line in patch_lines:
for i,line in enumerate(patch_lines):
if line.startswith('@@'):
match = RE_HUNK_HEADER.match(line)
# identify hunk header
if match:
# finish processing previous hunk
if start1 != -1 and patch_extra_lines_after > 0:
if is_valid_hunk and (start1 != -1 and patch_extra_lines_after > 0):
delta_lines = [f' {line}' for line in original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after]]
extended_patch_lines.extend(delta_lines)

section_header, size1, size2, start1, start2 = extract_hunk_headers(match)

if patch_extra_lines_before > 0 or patch_extra_lines_after > 0:
is_valid_hunk = check_if_hunk_lines_matches_to_file(i, original_lines, patch_lines, start1)

if is_valid_hunk and (patch_extra_lines_before > 0 or patch_extra_lines_after > 0):
def _calc_context_limits(patch_lines_before):
extended_start1 = max(1, start1 - patch_lines_before)
extended_size1 = size1 + (start1 - extended_start1) + patch_extra_lines_after
Expand Down Expand Up @@ -138,7 +141,7 @@ def _calc_context_limits(patch_lines_before):
return patch_str

# finish processing last hunk
if start1 != -1 and patch_extra_lines_after > 0:
if start1 != -1 and patch_extra_lines_after > 0 and is_valid_hunk:
delta_lines = original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after]
# add space at the beginning of each extra line
delta_lines = [f' {line}' for line in delta_lines]
Expand All @@ -148,6 +151,23 @@ def _calc_context_limits(patch_lines_before):
return extended_patch_str


def check_if_hunk_lines_matches_to_file(i, original_lines, patch_lines, start1):
"""
Check if the hunk lines match the original file content. We saw cases where the hunk header line doesn't match the original file content, and then
extending the hunk with extra lines before the hunk header can cause the hunk to be invalid.
"""
is_valid_hunk = True
try:
if i + 1 < len(patch_lines) and patch_lines[i + 1][0] == ' ': # an existing line in the file
if patch_lines[i + 1].strip() != original_lines[start1 - 1].strip():
is_valid_hunk = False
get_logger().error(
f"Invalid hunk in PR, line {start1} in hunk header doesn't match the original file content")
except:
pass
return is_valid_hunk


def extract_hunk_headers(match):
res = list(match.groups())
for i in range(len(res)):
Expand Down
62 changes: 34 additions & 28 deletions pr_agent/git_providers/github_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,24 @@ def get_diff_files(self) -> list[FilePatchInfo]:
if avoid_load:
original_file_content_str = ""
else:
original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha)
# The base.sha will point to the current state of the base branch (including parallel merges), not the original base commit when the PR was created
# We can fix this by finding the merge base commit between the PR head and base branches
# Note that The pr.head.sha is actually correct as is - it points to the latest commit in your PR branch.
# This SHA isn't affected by parallel merges to the base branch since it's specific to your PR's branch.
repo = self.repo_obj
pr = self.pr
try:
compare = repo.compare(pr.base.sha, pr.head.sha)
merge_base_commit = compare.merge_base_commit
except Exception as e:
get_logger().error(f"Failed to get merge base commit: {e}")
merge_base_commit = pr.base
if merge_base_commit.sha != pr.base.sha:
get_logger().info(
f"Using merge base commit {merge_base_commit.sha} instead of base commit "
f"{pr.base.sha} for {file.filename}")
original_file_content_str = self._get_pr_file_content(file, merge_base_commit.sha)

if not patch:
patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str)

Expand Down Expand Up @@ -283,8 +300,7 @@ def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_
relevant_line_in_file,
absolute_position)
if position == -1:
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
subject_type = "FILE"
else:
subject_type = "LINE"
Expand All @@ -296,20 +312,17 @@ def publish_inline_comments(self, comments: list[dict], disable_fallback: bool =
# publish all comments in a single message
self.pr.create_review(commit=self.last_commit_id, comments=comments)
except Exception as e:
if get_settings().config.verbosity_level >= 2:
get_logger().error(f"Failed to publish inline comments")
get_logger().info(f"Initially failed to publish inline comments as committable")

if (getattr(e, "status", None) == 422
and get_settings().github.publish_inline_comments_fallback_with_verification and not disable_fallback):
if (getattr(e, "status", None) == 422 and not disable_fallback):
pass # continue to try _publish_inline_comments_fallback_with_verification
else:
raise e # will end up with publishing the comments one by one

try:
self._publish_inline_comments_fallback_with_verification(comments)
except Exception as e:
if get_settings().config.verbosity_level >= 2:
get_logger().error(f"Failed to publish inline code comments fallback, error: {e}")
get_logger().error(f"Failed to publish inline code comments fallback, error: {e}")
raise e

def _publish_inline_comments_fallback_with_verification(self, comments: list[dict]):
Expand All @@ -334,11 +347,9 @@ def _publish_inline_comments_fallback_with_verification(self, comments: list[dic
for comment in fixed_comments_as_one_liner:
try:
self.publish_inline_comments([comment], disable_fallback=True)
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"Published invalid comment as a single line comment: {comment}")
get_logger().info(f"Published invalid comment as a single line comment: {comment}")
except:
if get_settings().config.verbosity_level >= 2:
get_logger().error(f"Failed to publish invalid comment as a single line comment: {comment}")
get_logger().error(f"Failed to publish invalid comment as a single line comment: {comment}")

def _verify_code_comment(self, comment: dict):
is_verified = False
Expand Down Expand Up @@ -396,8 +407,7 @@ def _try_fix_invalid_inline_comments(self, invalid_comments: list[dict]) -> list
if fixed_comment != comment:
fixed_comments.append(fixed_comment)
except Exception as e:
if get_settings().config.verbosity_level >= 2:
get_logger().error(f"Failed to fix inline comment, error: {e}")
get_logger().error(f"Failed to fix inline comment, error: {e}")
return fixed_comments

def publish_code_suggestions(self, code_suggestions: list) -> bool:
Expand All @@ -412,16 +422,14 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
relevant_lines_end = suggestion['relevant_lines_end']

if not relevant_lines_start or relevant_lines_start == -1:
if get_settings().config.verbosity_level >= 2:
get_logger().exception(
f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}")
get_logger().exception(
f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}")
continue

if relevant_lines_end < relevant_lines_start:
if get_settings().config.verbosity_level >= 2:
get_logger().exception(f"Failed to publish code suggestion, "
f"relevant_lines_end is {relevant_lines_end} and "
f"relevant_lines_start is {relevant_lines_start}")
get_logger().exception(f"Failed to publish code suggestion, "
f"relevant_lines_end is {relevant_lines_end} and "
f"relevant_lines_start is {relevant_lines_start}")
continue

if relevant_lines_end > relevant_lines_start:
Expand All @@ -445,8 +453,7 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
self.publish_inline_comments(post_parameters_list)
return True
except Exception as e:
if get_settings().config.verbosity_level >= 2:
get_logger().error(f"Failed to publish code suggestion, error: {e}")
get_logger().error(f"Failed to publish code suggestion, error: {e}")
return False

def edit_comment(self, comment, body: str):
Expand Down Expand Up @@ -505,6 +512,7 @@ def publish_file_comments(self, file_comments: list) -> bool:
elif self.deployment_type == 'user':
same_comment_creator = self.github_user_id == existing_comment['user']['login']
if existing_comment['subject_type'] == 'file' and comment['path'] == existing_comment['path'] and same_comment_creator:

headers, data_patch = self.pr._requester.requestJsonAndCheck(
"PATCH", f"{self.base_url}/repos/{self.repo}/pulls/comments/{existing_comment['id']}", input={"body":comment['body']}
)
Expand All @@ -516,8 +524,7 @@ def publish_file_comments(self, file_comments: list) -> bool:
)
return True
except Exception as e:
if get_settings().config.verbosity_level >= 2:
get_logger().error(f"Failed to publish diffview file summary, error: {e}")
get_logger().error(f"Failed to publish diffview file summary, error: {e}")
return False

def remove_initial_comment(self):
Expand Down Expand Up @@ -805,8 +812,7 @@ def generate_link_to_relevant_line_number(self, suggestion) -> str:
link = f"{self.base_url_html}/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{absolute_position}"
return link
except Exception as e:
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"Failed adding line link, error: {e}")
get_logger().info(f"Failed adding line link, error: {e}")

return ""

Expand Down
2 changes: 2 additions & 0 deletions pr_agent/tools/pr_code_suggestions.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,8 @@ async def _prepare_prediction(self, model: str) -> dict:
model,
add_line_numbers_to_hunks=True,
disable_extra_lines=False)
self.patches_diff_list = [self.patches_diff]
self.patches_diff_no_line_number = self.remove_line_numbers([self.patches_diff])[0]

if self.patches_diff:
get_logger().debug(f"PR diff", artifact=self.patches_diff)
Expand Down

0 comments on commit b07f96d

Please sign in to comment.