Skip to content

Commit

Permalink
Refactor meta analyzer to always fetch integrity data
Browse files Browse the repository at this point in the history
The API server no longer specified whether only the latest version, only integrity metadata, or both should be fetched.

Instead, we now always try to fetch both, relying on caching to prevent unnecessary repetitive calls to remote repositories.

To further reduce remote calls being made, we now also use caching for when a given repository does not contain the analyzed component. Previously, only successful retrievals would be cached.

Additionally, populates the MDC to include:

* The PURL of the component being analyzed
* The identifier of the repository being used

Signed-off-by: nscuro <[email protected]>
  • Loading branch information
nscuro committed Aug 14, 2024
1 parent c5c0020 commit f9fd919
Show file tree
Hide file tree
Showing 10 changed files with 304 additions and 276 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
syntax = "proto3";

Check failure on line 1 in proto/src/main/proto/org/dependencytrack/repometaanalysis/v1/repo_meta_analysis.proto

View workflow job for this annotation

GitHub Actions / Buf

Previously present enum "FetchMeta" was deleted from file.

// Public API for Hyades repository meta analysis.
// Public API for DependencyTrack repository meta analysis.
package org.dependencytrack.repometaanalysis.v1;

import "google/protobuf/timestamp.proto";
Expand All @@ -11,14 +11,8 @@ option java_package = "org.dependencytrack.proto.repometaanalysis.v1";
message AnalysisCommand {

Check failure on line 11 in proto/src/main/proto/org/dependencytrack/repometaanalysis/v1/repo_meta_analysis.proto

View workflow job for this annotation

GitHub Actions / Buf

Previously present field "2" with name "fetch_meta" on message "AnalysisCommand" was deleted.
// The component that shall be analyzed.
Component component = 1;
FetchMeta fetch_meta = 2;
}

enum FetchMeta{
FETCH_META_UNSPECIFIED = 0;
FETCH_META_INTEGRITY_DATA = 1;
FETCH_META_LATEST_VERSION = 2;
FETCH_META_INTEGRITY_DATA_AND_LATEST_VERSION = 3;
reserved 2; // fetch_meta; removed in 5.6.0
}

message AnalysisResult {
Expand All @@ -34,8 +28,33 @@ message AnalysisResult {
// When the latest version was published.
optional google.protobuf.Timestamp published = 4;

// Integrity metadata of the component.
optional IntegrityMeta integrity_meta = 5;

// When the latest version information was fetched from the repository.
optional google.protobuf.Timestamp fetched_at = 6;
}

message IntegrityMeta {
// The MD5 hash of the component.
optional string md5 = 1;

// The SHA1 hash of the component.
optional string sha1 = 2;

// The SHA256 hash of the component.
optional string sha256 = 3;

// The SHA512 hash of the component.
optional string sha512 = 4;

// When the component was published.
optional google.protobuf.Timestamp current_version_last_modified = 5;

// URL from where the information was sourced.
string meta_source_url = 6;

Check failure on line 54 in proto/src/main/proto/org/dependencytrack/repometaanalysis/v1/repo_meta_analysis.proto

View workflow job for this annotation

GitHub Actions / Buf

Field "6" with name "meta_source_url" on message "IntegrityMeta" changed cardinality from "optional with explicit presence" to "optional with implicit presence".

// When the integrity metadata was fetched.
google.protobuf.Timestamp fetched_at = 7;
}

message Component {
Expand All @@ -47,14 +66,3 @@ message Component {
optional bool internal = 2;
optional string uuid = 3;
}

message IntegrityMeta {
optional string md5 = 1;
optional string sha1 = 2;
optional string sha256 = 3;
optional string sha512 = 4;
// When the component current version last modified.
optional google.protobuf.Timestamp current_version_last_modified = 5;
// Complete URL to fetch integrity metadata of the component.
optional string meta_source_url = 6;
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ public Topology topology(final RepositoryAnalyzerFactory analyzerFactory,
.withName(processorNameConsume(KafkaTopic.REPO_META_ANALYSIS_COMMAND)))
.filter((key, scanCommand) -> scanCommand.hasComponent() && isValidPurl(scanCommand.getComponent().getPurl()),
Named.as("filter_components_with_valid_purl"))
// TODO: This repartition is no longer required as of API server 5.6.0.
// Remove this in Hyades v0.7.0 and consume from REPO_META_ANALYSIS_COMMAND directly instead.
// Re-key to PURL coordinates WITHOUT VERSION. As we are fetching data for packages,
// but not specific package versions, including the version here would make our caching
// largely ineffective. We want events for the same package to be sent to the same partition.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ public class IntegrityMeta implements Serializable {

private String metaSourceUrl;

private final Date fetchedAt = new Date();

public String getMd5() {
return md5;
}
Expand Down Expand Up @@ -82,4 +84,9 @@ public String getMetaSourceUrl() {
public void setMetaSourceUrl(String metaSourceUrl) {
this.metaSourceUrl = metaSourceUrl;
}

public Date getFetchedAt() {
return fetchedAt;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,12 @@ public class MetaModel implements Serializable {
private Component component;
private String latestVersion;
private Date publishedTimestamp;
private String repositoryIdentifier;
private final Date fetchedAt = new Date();

public MetaModel(){
public MetaModel() {
}

public MetaModel(final Component component) {
this.component = component;
}
Expand All @@ -54,4 +57,17 @@ public Date getPublishedTimestamp() {
public void setPublishedTimestamp(final Date publishedTimestamp) {
this.publishedTimestamp = publishedTimestamp;
}

public String getRepositoryIdentifier() {
return repositoryIdentifier;
}

public void setRepositoryIdentifier(final String repositoryIdentifier) {
this.repositoryIdentifier = repositoryIdentifier;
}

public Date getFetchedAt() {
return fetchedAt;
}

}
Loading

0 comments on commit f9fd919

Please sign in to comment.