Skip to content

Commit

Permalink
Add JVM cataloger (#3217)
Browse files Browse the repository at this point in the history
* add jvm cataloger

Signed-off-by: Alex Goodman <[email protected]>

* simplify version selection

Signed-off-by: Alex Goodman <[email protected]>

* CPEs from JVM cataloger should be declared

Signed-off-by: Alex Goodman <[email protected]>

* ensure package overlap is enabled for sensitive use cases

Signed-off-by: Alex Goodman <[email protected]>

* more permissive glob

Signed-off-by: Alex Goodman <[email protected]>

---------

Signed-off-by: Alex Goodman <[email protected]>
  • Loading branch information
wagoodman authored Sep 23, 2024
1 parent 7815d8e commit 01de99b
Show file tree
Hide file tree
Showing 25 changed files with 4,268 additions and 78 deletions.
5 changes: 5 additions & 0 deletions cmd/syft/internal/options/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,11 @@ func (cfg *Catalog) PostLoad() error {
return fmt.Errorf("bad scope value %q", cfg.Scope)
}

// the binary package exclusion code depends on the file overlap relationships being created upstream in processing
if !cfg.Relationships.PackageFileOwnershipOverlap && cfg.Package.ExcludeBinaryOverlapByOwnership {
return fmt.Errorf("cannot enable exclude-binary-overlap-by-ownership without enabling package-file-ownership-overlap")
}

return nil
}

Expand Down
8 changes: 8 additions & 0 deletions cmd/syft/internal/options/catalog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ func TestCatalog_PostLoad(t *testing.T) {
assert.Empty(t, options.Catalogers)
},
},
{
name: "must have package overlap flag when pruning binaries by overlap",
options: Catalog{
Package: packageConfig{ExcludeBinaryOverlapByOwnership: true},
Relationships: relationshipsConfig{PackageFileOwnershipOverlap: false},
},
wantErr: assert.Error,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion internal/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ package internal
const (
// JSONSchemaVersion is the current schema version output by the JSON encoder
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
JSONSchemaVersion = "16.0.16"
JSONSchemaVersion = "16.0.17"
)
91 changes: 69 additions & 22 deletions internal/relationship/exclude_binaries_by_file_ownership_overlap.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,55 +25,102 @@ var (
binaryMetadataTypes = []string{
reflect.TypeOf(pkg.ELFBinaryPackageNoteJSONPayload{}).Name(),
reflect.TypeOf(pkg.BinarySignature{}).Name(),
reflect.TypeOf(pkg.JavaVMInstallation{}).Name(),
}
)

func ExcludeBinariesByFileOwnershipOverlap(accessor sbomsync.Accessor) {
accessor.WriteToSBOM(func(s *sbom.SBOM) {
for _, r := range s.Relationships {
if excludeBinaryByFileOwnershipOverlap(r, s.Artifacts.Packages) {
s.Artifacts.Packages.Delete(r.To.ID())
s.Relationships = RemoveRelationshipsByID(s.Relationships, r.To.ID())
if idToRemove := excludeByFileOwnershipOverlap(r, s.Artifacts.Packages); idToRemove != "" {
s.Artifacts.Packages.Delete(idToRemove)
s.Relationships = RemoveRelationshipsByID(s.Relationships, idToRemove)
}
}
})
}

// excludeBinaryByFileOwnershipOverlap will remove packages from a collection given the following properties are true
// 1) the relationship between packages is OwnershipByFileOverlap
// 2) the parent is an "os" package
// 3) the child is a synthetic package generated by the binary cataloger
// 4) the package names are identical
// This was implemented as a way to help resolve: https://github.com/anchore/syft/issues/931
func excludeBinaryByFileOwnershipOverlap(r artifact.Relationship, c *pkg.Collection) bool {
// excludeByFileOwnershipOverlap will remove packages that should be overridden by a more authoritative package,
// such as an OS package or a package from a cataloger with more specific information being raised up.
func excludeByFileOwnershipOverlap(r artifact.Relationship, c *pkg.Collection) artifact.ID {
if artifact.OwnershipByFileOverlapRelationship != r.Type {
return false
return ""
}

parent := c.Package(r.From.ID())
if parent == nil {
return false
}

parentInExclusion := slices.Contains(osCatalogerTypes, parent.Type)
if !parentInExclusion {
return false
return ""
}

child := c.Package(r.To.ID())
if child == nil {
return false
return ""
}

if idToRemove := identifyOverlappingOSRelationship(parent, child); idToRemove != "" {
return idToRemove
}

if idToRemove := identifyOverlappingJVMRelationship(parent, child); idToRemove != "" {
return idToRemove
}

return ""
}

// identifyOverlappingJVMRelationship indicates the package to remove if this is a binary -> binary pkg relationship
// with a java binary signature package and a more authoritative JVM release package.
func identifyOverlappingJVMRelationship(parent *pkg.Package, child *pkg.Package) artifact.ID {
if !slices.Contains(binaryCatalogerTypes, parent.Type) {
return ""
}

if !slices.Contains(binaryCatalogerTypes, child.Type) {
return ""
}

if child.Metadata == nil {
return ""
}

var (
foundJVM bool
idToRemove artifact.ID
)
for _, p := range []*pkg.Package{parent, child} {
switch p.Metadata.(type) {
case pkg.JavaVMInstallation:
foundJVM = true
default:
idToRemove = p.ID()
}
}

if foundJVM {
return idToRemove
}

return ""
}

// identifyOverlappingOSRelationship indicates the package ID to remove if this is an OS pkg -> bin pkg relationship.
// This was implemented as a way to help resolve: https://github.com/anchore/syft/issues/931
func identifyOverlappingOSRelationship(parent *pkg.Package, child *pkg.Package) artifact.ID {
if !slices.Contains(osCatalogerTypes, parent.Type) {
return ""
}

if slices.Contains(binaryCatalogerTypes, child.Type) {
return true
return child.ID()
}

if child.Metadata == nil {
return false
return ""
}

childMetadataType := reflect.TypeOf(child.Metadata)
if !slices.Contains(binaryMetadataTypes, reflect.TypeOf(child.Metadata).Name()) {
return ""
}

return slices.Contains(binaryMetadataTypes, childMetadataType.Name())
return child.ID()
}
184 changes: 131 additions & 53 deletions internal/relationship/exclude_binaries_by_file_ownership_overlap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,17 @@ package relationship
import (
"testing"

"github.com/stretchr/testify/assert"

"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
)

func TestExclude(t *testing.T) {
func TestExcludeByFileOwnershipOverlap(t *testing.T) {
packageA := pkg.Package{Name: "package-a", Type: pkg.ApkPkg}
packageB := pkg.Package{Name: "package-a", Type: pkg.PythonPkg}
packageC := pkg.Package{Name: "package-a", Type: pkg.BinaryPkg}
packageD := pkg.Package{Name: "package-d", Type: pkg.BinaryPkg}
packageE := pkg.Package{Name: "package-e", Type: pkg.RpmPkg, Metadata: pkg.ELFBinaryPackageNoteJSONPayload{Type: "rpm"}}
packageF := pkg.Package{Name: "package-f", Type: pkg.RpmPkg, Metadata: pkg.BinarySignature{}}
for _, p := range []*pkg.Package{&packageA, &packageB, &packageC, &packageD, &packageE, &packageF} {
packageB := pkg.Package{Name: "package-b", Type: pkg.BinaryPkg, Metadata: pkg.JavaVMInstallation{}}
packageC := pkg.Package{Name: "package-c", Type: pkg.BinaryPkg, Metadata: pkg.ELFBinaryPackageNoteJSONPayload{Type: "rpm"}}
for _, p := range []*pkg.Package{&packageA, &packageB, &packageC} {
p := p
p.SetID()
}
Expand All @@ -26,73 +25,152 @@ func TestExclude(t *testing.T) {
shouldExclude bool
}{
{
name: "no exclusions from os -> python",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageB,
},
packages: pkg.NewCollection(packageA, packageB),
shouldExclude: false,
},
{
name: "exclusions from os -> binary",
// prove that OS -> bin exclusions are wired
name: "exclusions from os -> elf binary (as RPM)",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageC,
From: packageA, // OS
To: packageC, // ELF binary
},
packages: pkg.NewCollection(packageA, packageC),
shouldExclude: true,
},
{
name: "exclusions from os -> elf binary (as RPM)",
// prove that bin -> JVM exclusions are wired
name: "exclusions from binary -> binary with JVM metadata",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageE,
From: packageB, // binary with JVM metadata
To: packageC, // binary
},
packages: pkg.NewCollection(packageA, packageE),
packages: pkg.NewCollection(packageC, packageB),
shouldExclude: true,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actualExclude := excludeByFileOwnershipOverlap(test.relationship, test.packages)
didExclude := actualExclude != ""
if !didExclude && test.shouldExclude {
t.Errorf("expected to exclude relationship %+v", test.relationship)
}
})

}
}

func TestIdentifyOverlappingOSRelationship(t *testing.T) {
packageA := pkg.Package{Name: "package-a", Type: pkg.ApkPkg} // OS package
packageB := pkg.Package{Name: "package-b", Type: pkg.BinaryPkg}
packageC := pkg.Package{Name: "package-c", Type: pkg.BinaryPkg, Metadata: pkg.BinarySignature{}}
packageD := pkg.Package{Name: "package-d", Type: pkg.PythonPkg} // Language package
packageE := pkg.Package{Name: "package-e", Type: pkg.BinaryPkg, Metadata: pkg.ELFBinaryPackageNoteJSONPayload{}}

for _, p := range []*pkg.Package{&packageA, &packageB, &packageC, &packageD, &packageE} {
p.SetID()
}

tests := []struct {
name string
parent *pkg.Package
child *pkg.Package
expectedID artifact.ID
}{
{
name: "exclusions from os -> binary (masquerading as RPM)",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageF,
},
packages: pkg.NewCollection(packageA, packageF),
shouldExclude: true,
name: "OS -> binary without metadata",
parent: &packageA,
child: &packageB,
expectedID: packageB.ID(), // OS package to binary package, should return child ID
},
{
name: "no exclusions from python -> binary",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageB,
To: packageC,
},
packages: pkg.NewCollection(packageB, packageC),
shouldExclude: false,
name: "OS -> binary with binary metadata",
parent: &packageA,
child: &packageC,
expectedID: packageC.ID(), // OS package to binary package with binary metadata, should return child ID
},
{
name: "no exclusions for different package names",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageD,
},
packages: pkg.NewCollection(packageA, packageD),
shouldExclude: false,
name: "OS -> non-binary package",
parent: &packageA,
child: &packageD,
expectedID: "", // OS package to non-binary package, no exclusion
},
{
name: "OS -> binary with ELF metadata",
parent: &packageA,
child: &packageE,
expectedID: packageE.ID(), // OS package to binary package with ELF metadata, should return child ID
},
{
name: "non-OS parent",
parent: &packageD, // non-OS package
child: &packageC,
expectedID: "", // non-OS parent, no exclusion
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
if !excludeBinaryByFileOwnershipOverlap(test.relationship, test.packages) && test.shouldExclude {
t.Errorf("expected to exclude relationship %+v", test.relationship)
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
resultID := identifyOverlappingOSRelationship(tt.parent, tt.child)
assert.Equal(t, tt.expectedID, resultID)
})
}
}

func TestIdentifyOverlappingJVMRelationship(t *testing.T) {

packageA := pkg.Package{Name: "package-a", Type: pkg.BinaryPkg}
packageB := pkg.Package{Name: "package-b", Type: pkg.BinaryPkg, Metadata: pkg.BinarySignature{}}
packageC := pkg.Package{Name: "package-c", Type: pkg.BinaryPkg, Metadata: pkg.JavaVMInstallation{}}
packageD := pkg.Package{Name: "package-d", Type: pkg.PythonPkg}
packageE := pkg.Package{Name: "package-e", Type: pkg.BinaryPkg}

for _, p := range []*pkg.Package{&packageA, &packageB, &packageC, &packageD, &packageE} {
p.SetID()
}

tests := []struct {
name string
parent *pkg.Package
child *pkg.Package
expectedID artifact.ID
}{
{
name: "binary -> binary with JVM installation",
parent: &packageA,
child: &packageC,
expectedID: packageA.ID(), // JVM found, return BinaryPkg ID
},
{
name: "binary -> binary with binary signature",
parent: &packageA,
child: &packageB,
expectedID: "", // binary signatures only found, no exclusion
},
{
name: "binary -> python (non-binary child)",
parent: &packageA,
child: &packageD,
expectedID: "", // non-binary child, no exclusion
},
{
name: "no JVM or signature in binary -> binary",
parent: &packageA,
child: &packageE,
expectedID: "", // no JVM or binary signature, no exclusion
},
{
name: "non-binary parent",
parent: &packageD,
child: &packageC,
expectedID: "", // non-binary parent, no exclusion
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
resultID := identifyOverlappingJVMRelationship(tt.parent, tt.child)
assert.Equal(t, tt.expectedID, resultID)
})
}
}
1 change: 1 addition & 0 deletions internal/task/package_tasks.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ func DefaultPackageTaskFactories() PackageTaskFactories {
newSimplePackageTaskFactory(binary.NewELFPackageCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "binary", "elf-package"),
newSimplePackageTaskFactory(githubactions.NewActionUsageCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, "github", "github-actions"),
newSimplePackageTaskFactory(githubactions.NewWorkflowUsageCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, "github", "github-actions"),
newSimplePackageTaskFactory(java.NewJvmDistributionCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "java", "jvm", "jdk", "jre"),
newPackageTaskFactory(
func(cfg CatalogingFactoryConfig) pkg.Cataloger {
return kernel.NewLinuxKernelCataloger(cfg.PackagesConfig.LinuxKernel)
Expand Down
Loading

0 comments on commit 01de99b

Please sign in to comment.