Skip to content

Commit

Permalink
Merge pull request #12 from coder/mafredri/feat-fake-and-faster-repro…
Browse files Browse the repository at this point in the history
…ducible-builds

- util: Adds `NewReproducibleTar()` that unsets timestamps in the generated archive
- snapshot: Adds NewReproducibleSnapshotter that utilises NewReproducibleTar implementation 
- commands: Adds FakeExecuteCommand interface corresponding implementations for COPY and RUN
- executor: Adds DoCacheProbe() that attempts to build an image completely from the build cache, failing if any directives are missing from the cache.
  • Loading branch information
johnstcn authored Jun 10, 2024
2 parents 9d0d559 + 9d76d2f commit 0a73fcd
Show file tree
Hide file tree
Showing 8 changed files with 561 additions and 10 deletions.
9 changes: 8 additions & 1 deletion pkg/commands/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ limitations under the License.

package commands

import v1 "github.com/google/go-containerregistry/pkg/v1"
import (
"github.com/GoogleContainerTools/kaniko/pkg/dockerfile"
v1 "github.com/google/go-containerregistry/pkg/v1"
)

type Cached interface {
Layer() v1.Layer
Expand All @@ -29,3 +32,7 @@ type caching struct {
func (c caching) Layer() v1.Layer {
return c.layer
}

type FakeExecuteCommand interface {
FakeExecuteCommand(*v1.Config, *dockerfile.BuildArgs) error
}
23 changes: 23 additions & 0 deletions pkg/commands/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,29 @@ func (cr *CachingCopyCommand) ExecuteCommand(config *v1.Config, buildArgs *docke
return nil
}

func (cr *CachingCopyCommand) FakeExecuteCommand(config *v1.Config, buildArgs *dockerfile.BuildArgs) error {
logrus.Infof("Found cached layer, faking extraction to filesystem")
var err error

if cr.img == nil {
return errors.New(fmt.Sprintf("cached command image is nil %v", cr.String()))
}

layers, err := cr.img.Layers()
if err != nil {
return errors.Wrapf(err, "retrieve image layers")
}

if len(layers) != 1 {
return errors.New(fmt.Sprintf("expected %d layers but got %d", 1, len(layers)))
}

cr.layer = layers[0]
cr.extractedFiles = []string{}

return nil
}

func (cr *CachingCopyCommand) FilesUsedFromContext(config *v1.Config, buildArgs *dockerfile.BuildArgs) ([]string, error) {
return copyCmdFilesUsedFromContext(config, buildArgs, cr.cmd, cr.fileContext)
}
Expand Down
23 changes: 23 additions & 0 deletions pkg/commands/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,29 @@ func (cr *CachingRunCommand) ExecuteCommand(config *v1.Config, buildArgs *docker
return nil
}

func (cr *CachingRunCommand) FakeExecuteCommand(config *v1.Config, buildArgs *dockerfile.BuildArgs) error {
logrus.Infof("Found cached layer, faking extraction to filesystem")
var err error

if cr.img == nil {
return errors.New(fmt.Sprintf("command image is nil %v", cr.String()))
}

layers, err := cr.img.Layers()
if err != nil {
return errors.Wrap(err, "retrieving image layers")
}

if len(layers) != 1 {
return errors.New(fmt.Sprintf("expected %d layers but got %d", 1, len(layers)))
}

cr.layer = layers[0]
cr.extractedFiles = []string{}

return nil
}

func (cr *CachingRunCommand) FilesToSnapshot() []string {
f := cr.extractedFiles
logrus.Debugf("%d files extracted by caching run command", len(f))
Expand Down
234 changes: 232 additions & 2 deletions pkg/executor/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,12 @@ func newStageBuilder(args *dockerfile.BuildArgs, opts *config.KanikoOptions, sta
return nil, err
}
l := snapshot.NewLayeredMap(hasher)
snapshotter := snapshot.NewSnapshotter(l, config.RootDir)
var snapshotter snapShotter
if !opts.Reproducible {
snapshotter = snapshot.NewSnapshotter(l, config.RootDir)
} else {
snapshotter = snapshot.NewReproducibleSnapshotter(l, config.RootDir)
}

digest, err := sourceImage.Digest()
if err != nil {
Expand Down Expand Up @@ -444,6 +449,93 @@ func (s *stageBuilder) build() error {
return nil
}

// probeCache builds a stage entirely from the build cache.
// All COPY and RUN commands are faked.
// Note: USER and ENV commands are not supported.
func (s *stageBuilder) probeCache() error {
// Set the initial cache key to be the base image digest, the build args and the SrcContext.
var compositeKey *CompositeCache
if cacheKey, ok := s.digestToCacheKey[s.baseImageDigest]; ok {
compositeKey = NewCompositeCache(cacheKey)
} else {
compositeKey = NewCompositeCache(s.baseImageDigest)
}

// Apply optimizations to the instructions.
if err := s.optimize(*compositeKey, s.cf.Config); err != nil {
return errors.Wrap(err, "failed to optimize instructions")
}

for index, command := range s.cmds {
if command == nil {
continue
}

// If the command uses files from the context, add them.
files, err := command.FilesUsedFromContext(&s.cf.Config, s.args)
if err != nil {
return errors.Wrap(err, "failed to get files used from context")
}

if s.opts.Cache {
*compositeKey, err = s.populateCompositeKey(command, files, *compositeKey, s.args, s.cf.Config.Env)
if err != nil && s.opts.Cache {
return err
}
}

logrus.Info(command.String())

isCacheCommand := func() bool {
switch command.(type) {
case commands.Cached:
return true
default:
return false
}
}()

if c, ok := command.(commands.FakeExecuteCommand); ok {
if err := c.FakeExecuteCommand(&s.cf.Config, s.args); err != nil {
return errors.Wrap(err, "failed to execute fake command")
}
} else {
switch command.(type) {
case *commands.UserCommand:
default:
return errors.Errorf("uncached command %T is not supported in fake build", command)
}
if err := command.ExecuteCommand(&s.cf.Config, s.args); err != nil {
return errors.Wrap(err, "failed to execute command")
}
}
files = command.FilesToSnapshot()

if !s.shouldTakeSnapshot(index, command.MetadataOnly()) && !s.opts.ForceBuildMetadata {
logrus.Debugf("fakeBuild: skipping snapshot for [%v]", command.String())
continue
}
if isCacheCommand {
v := command.(commands.Cached)
layer := v.Layer()
if err := s.saveLayerToImage(layer, command.String()); err != nil {
return errors.Wrap(err, "failed to save layer")
}
} else {
tarPath, err := s.takeSnapshot(files, command.ShouldDetectDeletedFiles())
if err != nil {
return errors.Wrap(err, "failed to take snapshot")
}

if err := s.saveSnapshotToImage(command.String(), tarPath); err != nil {
return errors.Wrap(err, "failed to save snapshot to image")
}
}
}

return nil
}

func (s *stageBuilder) takeSnapshot(files []string, shdDelete bool) (string, error) {
var snapshot string
var err error
Expand Down Expand Up @@ -787,7 +879,9 @@ func DoBuild(opts *config.KanikoOptions) (v1.Image, error) {
return nil, err
}
if opts.Reproducible {
sourceImage, err = mutate.Canonical(sourceImage)
// If this option is enabled, we will use the canonical
// snapshotter to avoid having to modify the layers here.
sourceImage, err = mutateCanonicalWithoutLayerEdit(sourceImage)
if err != nil {
return nil, err
}
Expand All @@ -797,6 +891,7 @@ func DoBuild(opts *config.KanikoOptions) (v1.Image, error) {
return nil, err
}
}

timing.DefaultRun.Stop(t)
return sourceImage, nil
}
Expand Down Expand Up @@ -833,6 +928,141 @@ func DoBuild(opts *config.KanikoOptions) (v1.Image, error) {
return nil, err
}

// DoCacheProbe builds the Dockerfile relying entirely on the build
// cache without modifying the filesystem.
// Returns an error if any layers are missing from build cache.
func DoCacheProbe(opts *config.KanikoOptions) (v1.Image, error) {
digestToCacheKey := make(map[string]string)
stageIdxToDigest := make(map[string]string)

stages, metaArgs, err := dockerfile.ParseStages(opts)
if err != nil {
return nil, err
}

kanikoStages, err := dockerfile.MakeKanikoStages(opts, stages, metaArgs)
if err != nil {
return nil, err
}
stageNameToIdx := ResolveCrossStageInstructions(kanikoStages)

fileContext, err := util.NewFileContextFromDockerfile(opts.DockerfilePath, opts.SrcContext)
if err != nil {
return nil, err
}

// Some stages may refer to other random images, not previous stages
if err := fetchExtraStages(kanikoStages, opts); err != nil {
return nil, err
}
crossStageDependencies, err := CalculateDependencies(kanikoStages, opts, stageNameToIdx)
if err != nil {
return nil, err
}
logrus.Infof("Built cross stage deps: %v", crossStageDependencies)

var args *dockerfile.BuildArgs

for _, stage := range kanikoStages {
sb, err := newStageBuilder(
args, opts, stage,
crossStageDependencies,
digestToCacheKey,
stageIdxToDigest,
stageNameToIdx,
fileContext)
if err != nil {
return nil, err
}

args = sb.args
if err := sb.probeCache(); err != nil {
return nil, errors.Wrap(err, "error fake building stage")
}

reviewConfig(stage, &sb.cf.Config)

sourceImage, err := mutate.Config(sb.image, sb.cf.Config)
if err != nil {
return nil, err
}

configFile, err := sourceImage.ConfigFile()
if err != nil {
return nil, err
}
if opts.CustomPlatform == "" {
configFile.OS = runtime.GOOS
configFile.Architecture = runtime.GOARCH
} else {
configFile.OS = strings.Split(opts.CustomPlatform, "/")[0]
configFile.Architecture = strings.Split(opts.CustomPlatform, "/")[1]
}
sourceImage, err = mutate.ConfigFile(sourceImage, configFile)
if err != nil {
return nil, err
}

d, err := sourceImage.Digest()
if err != nil {
return nil, err
}
stageIdxToDigest[fmt.Sprintf("%d", sb.stage.Index)] = d.String()
logrus.Infof("Mapping stage idx %v to digest %v", sb.stage.Index, d.String())

digestToCacheKey[d.String()] = sb.finalCacheKey
logrus.Infof("Mapping digest %v to cachekey %v", d.String(), sb.finalCacheKey)

if stage.Final {
sourceImage, err = mutateCanonicalWithoutLayerEdit(sourceImage)
if err != nil {
return nil, err
}

return sourceImage, nil
}
}

return nil, err
}

// From mutate.Canonical with layer de/compress stripped out.
func mutateCanonicalWithoutLayerEdit(image v1.Image) (v1.Image, error) {
t := time.Time{}

ocf, err := image.ConfigFile()
if err != nil {
return nil, fmt.Errorf("setting config file: %w", err)
}

cfg := ocf.DeepCopy()

// Copy basic config over
cfg.Architecture = ocf.Architecture
cfg.OS = ocf.OS
cfg.OSVersion = ocf.OSVersion
cfg.Config = ocf.Config

// Strip away timestamps from the config file
cfg.Created = v1.Time{Time: t}

for i, h := range cfg.History {
h.Created = v1.Time{Time: t}
h.CreatedBy = ocf.History[i].CreatedBy
h.Comment = ocf.History[i].Comment
h.EmptyLayer = ocf.History[i].EmptyLayer
// Explicitly ignore Author field; which hinders reproducibility
h.Author = ""
cfg.History[i] = h
}

cfg.Container = ""
cfg.Config.Hostname = ""
cfg.DockerVersion = ""

return mutate.ConfigFile(image, cfg)
}

// filesToSave returns all the files matching the given pattern in deps.
// If a file is a symlink, it also returns the target file.
func filesToSave(deps []string) ([]string, error) {
Expand Down
Loading

0 comments on commit 0a73fcd

Please sign in to comment.