From 719604f405a7faf4b46c8a5c7a1bc349c79e217f Mon Sep 17 00:00:00 2001 From: Animesh Kumar Date: Fri, 24 Apr 2020 17:00:56 +0530 Subject: [PATCH 1/3] [metadata] Remove metadata from empty enriched items This commit updates the enricher to prevent addition of metadata fields to empty enriched items. Signed-off-by: Animesh Kumar --- grimoire_elk/enriched/enrich.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/grimoire_elk/enriched/enrich.py b/grimoire_elk/enriched/enrich.py index b9c036658..15757a5d3 100644 --- a/grimoire_elk/enriched/enrich.py +++ b/grimoire_elk/enriched/enrich.py @@ -91,6 +91,10 @@ def metadata(func): @functools.wraps(func) def decorator(self, *args, **kwargs): eitem = func(self, *args, **kwargs) + + if not eitem: + return eitem + metadata = { 'metadata__gelk_version': self.gelk_version, 'metadata__gelk_backend_name': self.__class__.__name__, @@ -388,6 +392,8 @@ def enrich_items(self, ocean_backend, events=False): if not events: rich_item = self.get_rich_item(item) + if not rich_item: + continue data_json = json.dumps(rich_item) bulk_json += '{"index" : {"_id" : "%s" } }\n' % \ (item[self.get_field_unique_id()]) From 0574e55d9590d705ec527af4a58c38968f8d8d51 Mon Sep 17 00:00:00 2001 From: Animesh Kumar Date: Fri, 24 Apr 2020 17:07:35 +0530 Subject: [PATCH 2/3] [github] Remove metadata from non-enriched items This commit prevents addition of repository label and metadata filter raw to items which are not to be enrihced. This results in creation of empty items which are not uploaded to ElasticSearch. Tests have been added accrodingly. Signed-off-by: Animesh Kumar --- grimoire_elk/enriched/github.py | 1 + grimoire_elk/enriched/github2.py | 3 + tests/data/github.json | 225 +++++++++++++++++++++++++++++++ tests/data/github2.json | 225 +++++++++++++++++++++++++++++++ tests/test_github.py | 9 +- tests/test_github2.py | 9 +- 6 files changed, 464 insertions(+), 8 deletions(-) diff --git a/grimoire_elk/enriched/github.py b/grimoire_elk/enriched/github.py index e51e77970..0e24d2057 100644 --- a/grimoire_elk/enriched/github.py +++ b/grimoire_elk/enriched/github.py @@ -226,6 +226,7 @@ def get_rich_item(self, item): else: logger.error("[github] rich item not defined for GitHub category {}".format( item['category'])) + return rich_item self.add_repository_labels(rich_item) self.add_metadata_filter_raw(rich_item) diff --git a/grimoire_elk/enriched/github2.py b/grimoire_elk/enriched/github2.py index cb10c977a..91ec029c4 100644 --- a/grimoire_elk/enriched/github2.py +++ b/grimoire_elk/enriched/github2.py @@ -228,6 +228,7 @@ def get_rich_item(self, item): else: logger.error("[github] rich item not defined for GitHub category {}".format( item['category'])) + return rich_item self.add_repository_labels(rich_item) self.add_metadata_filter_raw(rich_item) @@ -386,6 +387,8 @@ def enrich_items(self, ocean_backend): eitems = [] eitem = self.get_rich_item(item) + if not eitem: + continue items_to_enrich.append(eitem) if item['category'] == ISSUE_TYPE: eitems = self.enrich_issue(item, eitem) diff --git a/tests/data/github.json b/tests/data/github.json index c56720dc6..2d72cba1a 100644 --- a/tests/data/github.json +++ b/tests/data/github.json @@ -3276,5 +3276,230 @@ "timestamp": 1579025969.338453, "updated_on": 1550852463.0, "uuid": "ce0a349fd6c16890c489c4eb3c2fe958910c034c" + }, + { + "backend_name": "GitHub", + "backend_version": "0.24.0", + "category": "message", + "classified_fields_filtered": [ + "user_data", + "merged_by_data", + "assignee_data", + "assignees_data", + "requested_reviewers_data", + "comments_data.user_data", + "comments_data.reactions_data.user_data", + "reviews_data.user_data", + "review_comments_data.user_data", + "review_comments_data.reactions_data.user_data" + ], + "data": { + "assignee": null, + "assignees": [], + "author_association": "MEMBER", + "body": "Add also the cache support for users.\n\nThe users in an issue are: \"user\", \"assignee\". Both of them have now extra user data retrieved from GitHub User API.\n", + "closed_at": "2016-01-26T19:26:43Z", + "comments": 3, + "comments_data": [ + { + "author_association": "MEMBER", + "body": "Reviewed it. @acs I miss the code where you add the information about who made a comment or a change.\n", + "created_at": "2016-01-22T12:17:18Z", + "html_url": "https://github.com/chaoss/grimoirelab-perceval/pull/7#issuecomment-173902372", + "id": 173902372, + "issue_url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/7", + "node_id": "MDEyOklzc3VlQ29tbWVudDE3MzkwMjM3Mg==", + "reactions": { + "+1": 0, + "-1": 0, + "confused": 0, + "eyes": 0, + "heart": 0, + "hooray": 0, + "laugh": 0, + "rocket": 0, + "total_count": 0, + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/comments/173902372/reactions" + }, + "reactions_data": [], + "updated_at": "2016-01-22T12:17:18Z", + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/comments/173902372", + "user": { + "avatar_url": "https://avatars3.githubusercontent.com/u/833352?v=4", + "events_url": "https://api.github.com/users/sduenas/events{/privacy}", + "followers_url": "https://api.github.com/users/sduenas/followers", + "following_url": "https://api.github.com/users/sduenas/following{/other_user}", + "gists_url": "https://api.github.com/users/sduenas/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/sduenas", + "id": 833352, + "login": "sduenas", + "node_id": "MDQ6VXNlcjgzMzM1Mg==", + "organizations_url": "https://api.github.com/users/sduenas/orgs", + "received_events_url": "https://api.github.com/users/sduenas/received_events", + "repos_url": "https://api.github.com/users/sduenas/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/sduenas/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/sduenas/subscriptions", + "type": "User", + "url": "https://api.github.com/users/sduenas" + } + }, + { + "author_association": "MEMBER", + "body": "@sduenas all review addressed except adding user data for people doing comments and changes. I am not sure we should complete user data also for this actors. The resulting JSON could be huge and with a lot of duplicates. What do you think?\n", + "created_at": "2016-01-26T04:45:20Z", + "html_url": "https://github.com/chaoss/grimoirelab-perceval/pull/7#issuecomment-174822113", + "id": 174822113, + "issue_url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/7", + "node_id": "MDEyOklzc3VlQ29tbWVudDE3NDgyMjExMw==", + "reactions": { + "+1": 0, + "-1": 0, + "confused": 0, + "eyes": 0, + "heart": 0, + "hooray": 0, + "laugh": 0, + "rocket": 0, + "total_count": 0, + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/comments/174822113/reactions" + }, + "reactions_data": [], + "updated_at": "2016-01-26T04:45:20Z", + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/comments/174822113", + "user": { + "avatar_url": "https://avatars0.githubusercontent.com/u/209533?v=4", + "events_url": "https://api.github.com/users/acs/events{/privacy}", + "followers_url": "https://api.github.com/users/acs/followers", + "following_url": "https://api.github.com/users/acs/following{/other_user}", + "gists_url": "https://api.github.com/users/acs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/acs", + "id": 209533, + "login": "acs", + "node_id": "MDQ6VXNlcjIwOTUzMw==", + "organizations_url": "https://api.github.com/users/acs/orgs", + "received_events_url": "https://api.github.com/users/acs/received_events", + "repos_url": "https://api.github.com/users/acs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/acs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/acs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/acs" + } + }, + { + "author_association": "MEMBER", + "body": "@acs you're right. We can keep these data away for the moment.\n\nCan you check the code that I added? Do you think it's better than your proposal?\n", + "created_at": "2016-01-26T12:43:46Z", + "html_url": "https://github.com/chaoss/grimoirelab-perceval/pull/7#issuecomment-174995322", + "id": 174995322, + "issue_url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/7", + "node_id": "MDEyOklzc3VlQ29tbWVudDE3NDk5NTMyMg==", + "reactions": { + "+1": 0, + "-1": 0, + "confused": 0, + "eyes": 0, + "heart": 0, + "hooray": 0, + "laugh": 0, + "rocket": 0, + "total_count": 0, + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/comments/174995322/reactions" + }, + "reactions_data": [], + "updated_at": "2016-01-26T12:43:46Z", + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/comments/174995322", + "user": { + "avatar_url": "https://avatars3.githubusercontent.com/u/833352?v=4", + "events_url": "https://api.github.com/users/sduenas/events{/privacy}", + "followers_url": "https://api.github.com/users/sduenas/followers", + "following_url": "https://api.github.com/users/sduenas/following{/other_user}", + "gists_url": "https://api.github.com/users/sduenas/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/sduenas", + "id": 833352, + "login": "sduenas", + "node_id": "MDQ6VXNlcjgzMzM1Mg==", + "organizations_url": "https://api.github.com/users/sduenas/orgs", + "received_events_url": "https://api.github.com/users/sduenas/received_events", + "repos_url": "https://api.github.com/users/sduenas/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/sduenas/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/sduenas/subscriptions", + "type": "User", + "url": "https://api.github.com/users/sduenas" + } + } + ], + "comments_url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/7/comments", + "created_at": "2016-01-21T19:57:25Z", + "events_url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/7/events", + "html_url": "https://github.com/chaoss/grimoirelab-perceval/pull/7", + "id": 128006801, + "labels": [], + "labels_url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/7/labels{/name}", + "locked": false, + "milestone": null, + "node_id": "MDExOlB1bGxSZXF1ZXN0NTY4MDA0OTg=", + "number": 7, + "pull_request": { + "diff_url": "https://github.com/chaoss/grimoirelab-perceval/pull/7.diff", + "html_url": "https://github.com/chaoss/grimoirelab-perceval/pull/7", + "patch_url": "https://github.com/chaoss/grimoirelab-perceval/pull/7.patch", + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/pulls/7" + }, + "reactions": { + "+1": 0, + "-1": 0, + "confused": 0, + "eyes": 0, + "heart": 0, + "hooray": 0, + "laugh": 0, + "rocket": 0, + "total_count": 0, + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/7/reactions" + }, + "reactions_data": [], + "repository_url": "https://api.github.com/repos/chaoss/grimoirelab-perceval", + "state": "closed", + "title": "[github] Add to issues user data from the GitHub User API (email, company ...)", + "updated_at": "2016-01-26T19:26:43Z", + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/7", + "user": { + "avatar_url": "https://avatars0.githubusercontent.com/u/209533?v=4", + "events_url": "https://api.github.com/users/acs/events{/privacy}", + "followers_url": "https://api.github.com/users/acs/followers", + "following_url": "https://api.github.com/users/acs/following{/other_user}", + "gists_url": "https://api.github.com/users/acs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/acs", + "id": 209533, + "login": "acs", + "node_id": "MDQ6VXNlcjIwOTUzMw==", + "organizations_url": "https://api.github.com/users/acs/orgs", + "received_events_url": "https://api.github.com/users/acs/received_events", + "repos_url": "https://api.github.com/users/acs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/acs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/acs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/acs" + } + }, + "origin": "https://github.com/chaoss/grimoirelab-perceval", + "perceval_version": "0.12.23", + "search_fields": { + "item_id": "128006801", + "owner": "chaoss", + "repo": "grimoirelab-perceval" + }, + "tag": "https://github.com/chaoss/grimoirelab-perceval", + "timestamp": 1571652244.817805, + "updated_on": 1453836403.0, + "uuid": "f18b8ff1f44b5e4e9c816844f9eca59973101ffa" } ] \ No newline at end of file diff --git a/tests/data/github2.json b/tests/data/github2.json index fff09bb3e..30ee9e91b 100644 --- a/tests/data/github2.json +++ b/tests/data/github2.json @@ -2812,5 +2812,230 @@ "timestamp": 1571652317.403738, "updated_on": 1451980476.0, "uuid": "7a9c57530aa603c51bf41b52360abdfb8458d3ee" + }, + { + "backend_name": "GitHub", + "backend_version": "0.24.0", + "category": "message", + "classified_fields_filtered": [ + "user_data", + "merged_by_data", + "assignee_data", + "assignees_data", + "requested_reviewers_data", + "comments_data.user_data", + "comments_data.reactions_data.user_data", + "reviews_data.user_data", + "review_comments_data.user_data", + "review_comments_data.reactions_data.user_data" + ], + "data": { + "assignee": null, + "assignees": [], + "author_association": "MEMBER", + "body": "Add also the cache support for users.\n\nThe users in an issue are: \"user\", \"assignee\". Both of them have now extra user data retrieved from GitHub User API.\n", + "closed_at": "2016-01-26T19:26:43Z", + "comments": 3, + "comments_data": [ + { + "author_association": "MEMBER", + "body": "Reviewed it. @acs I miss the code where you add the information about who made a comment or a change.\n", + "created_at": "2016-01-22T12:17:18Z", + "html_url": "https://github.com/chaoss/grimoirelab-perceval/pull/7#issuecomment-173902372", + "id": 173902372, + "issue_url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/7", + "node_id": "MDEyOklzc3VlQ29tbWVudDE3MzkwMjM3Mg==", + "reactions": { + "+1": 0, + "-1": 0, + "confused": 0, + "eyes": 0, + "heart": 0, + "hooray": 0, + "laugh": 0, + "rocket": 0, + "total_count": 0, + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/comments/173902372/reactions" + }, + "reactions_data": [], + "updated_at": "2016-01-22T12:17:18Z", + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/comments/173902372", + "user": { + "avatar_url": "https://avatars3.githubusercontent.com/u/833352?v=4", + "events_url": "https://api.github.com/users/sduenas/events{/privacy}", + "followers_url": "https://api.github.com/users/sduenas/followers", + "following_url": "https://api.github.com/users/sduenas/following{/other_user}", + "gists_url": "https://api.github.com/users/sduenas/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/sduenas", + "id": 833352, + "login": "sduenas", + "node_id": "MDQ6VXNlcjgzMzM1Mg==", + "organizations_url": "https://api.github.com/users/sduenas/orgs", + "received_events_url": "https://api.github.com/users/sduenas/received_events", + "repos_url": "https://api.github.com/users/sduenas/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/sduenas/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/sduenas/subscriptions", + "type": "User", + "url": "https://api.github.com/users/sduenas" + } + }, + { + "author_association": "MEMBER", + "body": "@sduenas all review addressed except adding user data for people doing comments and changes. I am not sure we should complete user data also for this actors. The resulting JSON could be huge and with a lot of duplicates. What do you think?\n", + "created_at": "2016-01-26T04:45:20Z", + "html_url": "https://github.com/chaoss/grimoirelab-perceval/pull/7#issuecomment-174822113", + "id": 174822113, + "issue_url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/7", + "node_id": "MDEyOklzc3VlQ29tbWVudDE3NDgyMjExMw==", + "reactions": { + "+1": 0, + "-1": 0, + "confused": 0, + "eyes": 0, + "heart": 0, + "hooray": 0, + "laugh": 0, + "rocket": 0, + "total_count": 0, + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/comments/174822113/reactions" + }, + "reactions_data": [], + "updated_at": "2016-01-26T04:45:20Z", + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/comments/174822113", + "user": { + "avatar_url": "https://avatars0.githubusercontent.com/u/209533?v=4", + "events_url": "https://api.github.com/users/acs/events{/privacy}", + "followers_url": "https://api.github.com/users/acs/followers", + "following_url": "https://api.github.com/users/acs/following{/other_user}", + "gists_url": "https://api.github.com/users/acs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/acs", + "id": 209533, + "login": "acs", + "node_id": "MDQ6VXNlcjIwOTUzMw==", + "organizations_url": "https://api.github.com/users/acs/orgs", + "received_events_url": "https://api.github.com/users/acs/received_events", + "repos_url": "https://api.github.com/users/acs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/acs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/acs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/acs" + } + }, + { + "author_association": "MEMBER", + "body": "@acs you're right. We can keep these data away for the moment.\n\nCan you check the code that I added? Do you think it's better than your proposal?\n", + "created_at": "2016-01-26T12:43:46Z", + "html_url": "https://github.com/chaoss/grimoirelab-perceval/pull/7#issuecomment-174995322", + "id": 174995322, + "issue_url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/7", + "node_id": "MDEyOklzc3VlQ29tbWVudDE3NDk5NTMyMg==", + "reactions": { + "+1": 0, + "-1": 0, + "confused": 0, + "eyes": 0, + "heart": 0, + "hooray": 0, + "laugh": 0, + "rocket": 0, + "total_count": 0, + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/comments/174995322/reactions" + }, + "reactions_data": [], + "updated_at": "2016-01-26T12:43:46Z", + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/comments/174995322", + "user": { + "avatar_url": "https://avatars3.githubusercontent.com/u/833352?v=4", + "events_url": "https://api.github.com/users/sduenas/events{/privacy}", + "followers_url": "https://api.github.com/users/sduenas/followers", + "following_url": "https://api.github.com/users/sduenas/following{/other_user}", + "gists_url": "https://api.github.com/users/sduenas/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/sduenas", + "id": 833352, + "login": "sduenas", + "node_id": "MDQ6VXNlcjgzMzM1Mg==", + "organizations_url": "https://api.github.com/users/sduenas/orgs", + "received_events_url": "https://api.github.com/users/sduenas/received_events", + "repos_url": "https://api.github.com/users/sduenas/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/sduenas/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/sduenas/subscriptions", + "type": "User", + "url": "https://api.github.com/users/sduenas" + } + } + ], + "comments_url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/7/comments", + "created_at": "2016-01-21T19:57:25Z", + "events_url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/7/events", + "html_url": "https://github.com/chaoss/grimoirelab-perceval/pull/7", + "id": 128006801, + "labels": [], + "labels_url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/7/labels{/name}", + "locked": false, + "milestone": null, + "node_id": "MDExOlB1bGxSZXF1ZXN0NTY4MDA0OTg=", + "number": 7, + "pull_request": { + "diff_url": "https://github.com/chaoss/grimoirelab-perceval/pull/7.diff", + "html_url": "https://github.com/chaoss/grimoirelab-perceval/pull/7", + "patch_url": "https://github.com/chaoss/grimoirelab-perceval/pull/7.patch", + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/pulls/7" + }, + "reactions": { + "+1": 0, + "-1": 0, + "confused": 0, + "eyes": 0, + "heart": 0, + "hooray": 0, + "laugh": 0, + "rocket": 0, + "total_count": 0, + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/7/reactions" + }, + "reactions_data": [], + "repository_url": "https://api.github.com/repos/chaoss/grimoirelab-perceval", + "state": "closed", + "title": "[github] Add to issues user data from the GitHub User API (email, company ...)", + "updated_at": "2016-01-26T19:26:43Z", + "url": "https://api.github.com/repos/chaoss/grimoirelab-perceval/issues/7", + "user": { + "avatar_url": "https://avatars0.githubusercontent.com/u/209533?v=4", + "events_url": "https://api.github.com/users/acs/events{/privacy}", + "followers_url": "https://api.github.com/users/acs/followers", + "following_url": "https://api.github.com/users/acs/following{/other_user}", + "gists_url": "https://api.github.com/users/acs/gists{/gist_id}", + "gravatar_id": "", + "html_url": "https://github.com/acs", + "id": 209533, + "login": "acs", + "node_id": "MDQ6VXNlcjIwOTUzMw==", + "organizations_url": "https://api.github.com/users/acs/orgs", + "received_events_url": "https://api.github.com/users/acs/received_events", + "repos_url": "https://api.github.com/users/acs/repos", + "site_admin": false, + "starred_url": "https://api.github.com/users/acs/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/acs/subscriptions", + "type": "User", + "url": "https://api.github.com/users/acs" + } + }, + "origin": "https://github.com/chaoss/grimoirelab-perceval", + "perceval_version": "0.12.23", + "search_fields": { + "item_id": "128006801", + "owner": "chaoss", + "repo": "grimoirelab-perceval" + }, + "tag": "https://github.com/chaoss/grimoirelab-perceval", + "timestamp": 1571652244.817805, + "updated_on": 1453836403.0, + "uuid": "ff9eca5e9c9973101ffa18b8ff1f44b5e4816844" } ] \ No newline at end of file diff --git a/tests/test_github.py b/tests/test_github.py index 0a67a1ee8..9ec73eff6 100644 --- a/tests/test_github.py +++ b/tests/test_github.py @@ -61,7 +61,7 @@ def test_raw_to_enrich(self): self.assertGreater(result['raw'], 0) self.assertGreater(result['enrich'], 0) - self.assertEqual(result['raw'], result['enrich']) + self.assertEqual(result['raw'] - 1, result['enrich']) enrich_backend = self.connectors[self.connector][2]() @@ -150,7 +150,8 @@ def test_enrich_repo_labels(self): for item in self.items: eitem = enrich_backend.get_rich_item(item) - self.assertIn(REPO_LABELS, eitem) + if eitem: + self.assertIn(REPO_LABELS, eitem) def test_raw_to_enrich_sorting_hat(self): """Test enrich with SortingHat""" @@ -158,7 +159,7 @@ def test_raw_to_enrich_sorting_hat(self): result = self._test_raw_to_enrich(sortinghat=True) self.assertGreater(result['raw'], 0) self.assertGreater(result['enrich'], 0) - self.assertEqual(result['raw'], result['enrich']) + self.assertEqual(result['raw'] - 1, result['enrich']) enrich_backend = self.connectors[self.connector][2]() @@ -298,7 +299,7 @@ def test_raw_to_enrich_anonymized(self): self.assertGreater(result['raw'], 0) self.assertGreater(result['enrich'], 0) - self.assertEqual(result['raw'], result['enrich']) + self.assertEqual(result['raw'] - 1, result['enrich']) enrich_backend = self.connectors[self.connector][2]() diff --git a/tests/test_github2.py b/tests/test_github2.py index 02fa7b489..c8d750817 100644 --- a/tests/test_github2.py +++ b/tests/test_github2.py @@ -48,15 +48,15 @@ def test_items_to_raw(self): result = self._test_items_to_raw() - self.assertEqual(result['items'], 7) - self.assertEqual(result['raw'], 7) + self.assertEqual(result['items'], 8) + self.assertEqual(result['raw'], 8) def test_raw_to_enrich(self): """Test whether the raw index is properly enriched""" result = self._test_raw_to_enrich() - self.assertEqual(result['raw'], 6) + self.assertEqual(result['raw'], 7) self.assertEqual(result['enrich'], 11) enrich_backend = self.connectors[self.connector][2]() @@ -146,7 +146,8 @@ def test_enrich_repo_labels(self): for item in self.items: eitem = enrich_backend.get_rich_item(item) - self.assertIn(REPO_LABELS, eitem) + if eitem: + self.assertIn(REPO_LABELS, eitem) def test_raw_to_enrich_sorting_hat(self): """Test enrich with SortingHat""" From 0036e06b3b4abdc6d9afaacf4cfcb34d0aa7ea76 Mon Sep 17 00:00:00 2001 From: Animesh Kumar Date: Fri, 24 Apr 2020 17:14:35 +0530 Subject: [PATCH 3/3] [gitlab] Remove metadata from non-enriched items This commit prevents addition of repository label and metadata filter raw to items which are not to be enrihced. This results in creation of empty items which are not uploaded to ElasticSearch. Tests have been added accrodingly. Signed-off-by: Animesh Kumar --- grimoire_elk/enriched/gitlab.py | 3 +- tests/data/gitlab.json | 74 ++++++++++++++++++++++++++++++++- tests/test_gitlab.py | 9 ++-- 3 files changed, 80 insertions(+), 6 deletions(-) diff --git a/grimoire_elk/enriched/gitlab.py b/grimoire_elk/enriched/gitlab.py index f5f51a045..ef8eea4f2 100644 --- a/grimoire_elk/enriched/gitlab.py +++ b/grimoire_elk/enriched/gitlab.py @@ -148,7 +148,8 @@ def get_rich_item(self, item): elif item['category'] == 'merge_request': rich_item = self.__get_rich_merge(item) else: - logger.error("[gerrit] rich item not defined for GitLab category {}".format(item['category'])) + logger.error("[gitlab] rich item not defined for GitLab category {}".format(item['category'])) + return rich_item self.add_repository_labels(rich_item) self.add_metadata_filter_raw(rich_item) diff --git a/tests/data/gitlab.json b/tests/data/gitlab.json index afd21fd0f..0164d1348 100644 --- a/tests/data/gitlab.json +++ b/tests/data/gitlab.json @@ -11229,5 +11229,77 @@ "tag": "https://gitlab.com/fdroid/fdroiddata", "timestamp": 1537198073.82518, "updated_on": 1526489019.747, - "uuid": "11a8636d4c423c4ef45720828ca37ab23e90c31e"} + "uuid": "11a8636d4c423c4ef45720828ca37ab23e90c31e" + }, + { + "backend_name": "GitLab", + "backend_version": "0.5.0", + "category": "message", + "data": { + "assignee": null, + "assignees": [], + "author": { + "avatar_url": "https://secure.gravatar.com/avatar/b8c8a858811dfece044c3818e21bf4f3?s=80&d=identicon", + "id": 1, + "name": "Timothy Engler", + "state": "active", + "username": "redfish64", + "web_url": "https://gitlab.com/redfish64" + }, + "award_emoji_data": [], + "closed_at": null, + "confidential": false, + "created_at": "2017-03-18T09:52:55.303Z", + "description": "Feeder (com.nononsenseapps.feeder) unnecessary require Android 7.0\n\nPlease compile it with lower minSDK.", + "discussion_locked": null, + "downvotes": 0, + "due_date": null, + "id": 2, + "iid": 2, + "labels": [], + "milestone": null, + "notes_data": [ + { + "attachment": null, + "author": { + "avatar_url": "https://secure.gravatar.com/avatar/b8c8a858811dfece044c3818e21bf4f3?s=80&d=identicon", + "id": 1, + "name": "Timothy Engler", + "state": "active", + "username": "redfish64", + "web_url": "https://gitlab.com/redfish64" + }, + "award_emoji_data": [], + "body": "https://github.com/spacecowboy/Feeder/issues/13", + "created_at": "2017-03-18T18:36:39.757Z", + "id": 1, + "noteable_id": 4756532, + "noteable_iid": 641, + "noteable_type": "Issue", + "system": false, + "updated_at": "2017-03-18T18:36:39.757Z" + } + ], + "project_id": 1, + "state": "closed", + "time_stats": { + "human_time_estimate": null, + "human_total_time_spent": null, + "time_estimate": 0, + "total_time_spent": 0 + }, + "title": "Feeder (com.nononsenseapps.feeder) unnecessary require Android 7.0", + "updated_at": "2017-03-18T18:36:39.764Z", + "upvotes": 0, + "user_notes_count": 2, + "web_url": "https://gitlab.com/fdroid/fdroiddata/-/issues/641", + "weight": null + }, + "origin": "https://gitlab.com/fdroid/fdroiddata", + "perceval_version": "0.11.10", + "tag": "https://gitlab.com/fdroid/fdroiddata", + "timestamp": 1536860519.370875, + "updated_on": 1489862199.764, + "uuid": "0777f3d8ed29fbfa61d897c9c9eefb519c1bce7d" + } ] \ No newline at end of file diff --git a/tests/test_gitlab.py b/tests/test_gitlab.py index e811f2bb2..ea7f4334d 100644 --- a/tests/test_gitlab.py +++ b/tests/test_gitlab.py @@ -57,7 +57,7 @@ def test_raw_to_enrich(self): self.assertGreater(result['raw'], 0) self.assertGreater(result['enrich'], 0) - self.assertEqual(result['raw'], result['enrich']) + self.assertEqual(result['raw'] - 1, result['enrich']) enrich_backend = self.connectors[self.connector][2]() @@ -161,7 +161,8 @@ def test_enrich_repo_labels(self): for item in self.items: eitem = enrich_backend.get_rich_item(item) - self.assertIn(REPO_LABELS, eitem) + if eitem: + self.assertIn(REPO_LABELS, eitem) def test_raw_to_enrich_sorting_hat(self): """Test enrich with SortingHat""" @@ -169,7 +170,7 @@ def test_raw_to_enrich_sorting_hat(self): result = self._test_raw_to_enrich(sortinghat=True) self.assertGreater(result['raw'], 0) self.assertGreater(result['enrich'], 0) - self.assertEqual(result['raw'], result['enrich']) + self.assertEqual(result['raw'] - 1, result['enrich']) enrich_backend = self.connectors[self.connector][2]() @@ -240,7 +241,7 @@ def test_raw_to_enrich_anonymized(self): self.assertGreater(result['raw'], 0) self.assertGreater(result['enrich'], 0) - self.assertEqual(result['raw'], result['enrich']) + self.assertEqual(result['raw'] - 1, result['enrich']) enrich_backend = self.connectors[self.connector][2]()