-
Notifications
You must be signed in to change notification settings - Fork 0
/
s3_file.go
382 lines (329 loc) · 10.7 KB
/
s3_file.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
// Package s3 brings S3 files handling to afero
package s3
import (
"context"
"errors"
"fmt"
"io"
"mime"
"os"
"path"
"path/filepath"
"strings"
"time"
"github.com/spf13/afero"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/feature/s3/manager"
"github.com/aws/aws-sdk-go-v2/service/s3"
)
// File represents a file in S3.
type File struct {
fs *Fs // Parent file system
name string // Name of the file
cachedInfo os.FileInfo // File info cached for later used
streamRead io.ReadCloser // streamRead is the underlying stream we are reading from
streamReadOffset int64 // streamReadOffset is the offset of the read-only stream
streamWrite io.WriteCloser // streamWrite is the underlying stream we are reading to
streamWriteErr error // streamWriteErr is the error that should be returned in case of a write
streamWriteCloseErr chan error // streamWriteCloseErr is the channel containing the underlying write error
readdirContinuationToken *string // readdirContinuationToken is used to perform files listing across calls
readdirNotTruncated bool // readdirNotTruncated is set when we shall continue reading
// I think readdirNotTruncated can be dropped. The continuation token is probably enough.
}
// NewFile initializes an File object.
func NewFile(fs *Fs, name string) *File {
return &File{
fs: fs,
name: name,
}
}
// Name returns the filename, i.e. S3 path without the bucket name.
func (f *File) Name() string { return f.name }
// Readdir reads the contents of the directory associated with file and
// returns a slice of up to n FileInfo values, as would be returned
// by ListObjects, in directory order. Subsequent calls on the same file will yield further FileInfos.
//
// If n > 0, Readdir returns at most n FileInfo structures. In this case, if
// Readdir returns an empty slice, it will return a non-nil error
// explaining why. At the end of a directory, the error is io.EOF.
//
// If n <= 0, Readdir returns all the FileInfo from the directory in
// a single slice. In this case, if Readdir succeeds (reads all
// the way to the end of the directory), it returns the slice and a
// nil error. If it encounters an error before the end of the
// directory, Readdir returns the FileInfo read until that point
// and a non-nil error.
func (f *File) Readdir(n int) ([]os.FileInfo, error) {
if f.readdirNotTruncated {
return nil, io.EOF
}
if n <= 0 {
return f.ReaddirAll()
}
// ListObjects treats leading slashes as part of the directory name
// It also needs a trailing slash to list contents of a directory.
name := strings.TrimPrefix(f.Name(), "/") // + "/"
// For the root of the bucket, we need to remove any prefix
if name != "" && !strings.HasSuffix(name, "/") {
name += "/"
}
output, err := f.fs.client.ListObjectsV2(context.Background(), &s3.ListObjectsV2Input{
ContinuationToken: f.readdirContinuationToken,
Bucket: aws.String(f.fs.bucket),
Prefix: &name,
Delimiter: aws.String("/"),
MaxKeys: int32(n),
})
if err != nil {
return nil, err
}
f.readdirContinuationToken = output.NextContinuationToken
if !output.IsTruncated {
f.readdirNotTruncated = true
}
var fis = make([]os.FileInfo, 0, len(output.CommonPrefixes)+len(output.Contents))
for _, subfolder := range output.CommonPrefixes {
fis = append(fis, NewFileInfo(path.Base("/"+*subfolder.Prefix), true, 0, time.Unix(0, 0)))
}
for k := range output.Contents {
fileObject := &output.Contents[k]
if strings.HasSuffix(*fileObject.Key, "/") {
// S3 includes <name>/ in the Contents listing for <name>
continue
}
fis = append(fis, NewFileInfo(path.Base("/"+*fileObject.Key), false, fileObject.Size, *fileObject.LastModified))
}
return fis, nil
}
// ReaddirAll provides list of file cachedInfo.
func (f *File) ReaddirAll() ([]os.FileInfo, error) {
var fileInfos []os.FileInfo
for {
infos, err := f.Readdir(100)
fileInfos = append(fileInfos, infos...)
if err != nil {
if errors.Is(err, io.EOF) {
break
} else {
return nil, err
}
}
}
return fileInfos, nil
}
// Readdirnames reads and returns a slice of names from the directory f.
//
// If n > 0, Readdirnames returns at most n names. In this case, if
// Readdirnames returns an empty slice, it will return a non-nil error
// explaining why. At the end of a directory, the error is io.EOF.
//
// If n <= 0, Readdirnames returns all the names from the directory in
// a single slice. In this case, if Readdirnames succeeds (reads all
// the way to the end of the directory), it returns the slice and a
// nil error. If it encounters an error before the end of the
// directory, Readdirnames returns the names read until that point and
// a non-nil error.
func (f *File) Readdirnames(n int) ([]string, error) {
fi, err := f.Readdir(n)
if err != nil {
return nil, err
}
// nolint: makezero // we know the exact length
names := make([]string, len(fi))
for i, f := range fi {
_, names[i] = path.Split(f.Name())
}
return names, nil
}
// Stat returns the FileInfo structure describing file.
// If there is an error, it will be of type *PathError.
func (f *File) Stat() (os.FileInfo, error) {
info, err := f.fs.Stat(f.Name())
if err == nil {
f.cachedInfo = info
}
return info, err
}
// Sync is a noop.
func (f *File) Sync() error {
return nil
}
// Truncate changes the size of the file.
// It does not change the I/O offset.
// If there is an error, it will be of type *PathError.
func (f *File) Truncate(int64) error {
return ErrNotImplemented
}
// WriteString is like Write, but writes the contents of string s rather than
// a slice of bytes.
func (f *File) WriteString(s string) (int, error) {
// nolint: gocritic // can't use f.WriteString because we are implemnting it
return f.Write([]byte(s))
}
// Close closes the File, rendering it unusable for I/O.
// It returns an error, if any.
func (f *File) Close() error {
// Closing a reading stream
if f.streamRead != nil {
// We try to close the Reader
defer func() {
f.streamRead = nil
}()
return f.streamRead.Close()
}
// Closing a writing stream
if f.streamWrite != nil {
defer func() {
f.streamWrite = nil
f.streamWriteCloseErr = nil
}()
// We try to close the Writer
if err := f.streamWrite.Close(); err != nil {
return err
}
// And more importantly, we wait for the actual writing performed in go-routine to finish.
// We might have at most 2*5=10MB of data waiting to be flushed before close returns. This
// might be rather slow.
err := <-f.streamWriteCloseErr
close(f.streamWriteCloseErr)
return err
}
// Or maybe we don't have anything to close
return nil
}
// Read reads up to len(b) bytes from the File.
// It returns the number of bytes read and an error, if any.
// EOF is signaled by a zero count with err set to io.EOF.
func (f *File) Read(p []byte) (int, error) {
if f.streamRead == nil {
return 0, io.EOF
}
n, err := f.streamRead.Read(p)
if err == nil {
f.streamReadOffset += int64(n)
}
return n, err
}
// ReadAt reads len(p) bytes from the file starting at byte offset off.
// It returns the number of bytes read and the error, if any.
// ReadAt always returns a non-nil error when n < len(b).
// At end of file, that error is io.EOF.
func (f *File) ReadAt(p []byte, off int64) (n int, err error) {
_, err = f.Seek(off, io.SeekStart)
if err != nil {
return
}
n, err = f.Read(p)
return n, err
}
// Seek sets the offset for the next Read or Write on file to offset, interpreted
// according to whence: 0 means relative to the origin of the file, 1 means
// relative to the current offset, and 2 means relative to the end.
// It returns the new offset and an error, if any.
// The behavior of Seek on a file opened with O_APPEND is not specified.
func (f *File) Seek(offset int64, whence int) (int64, error) {
// Write seek is not supported
if f.streamWrite != nil {
return 0, ErrNotSupported
}
// Read seek has its own implementation
if f.streamRead != nil {
return f.seekRead(offset, whence)
}
// Not having a stream
return 0, afero.ErrFileClosed
}
func (f *File) seekRead(offset int64, whence int) (int64, error) {
startByte := int64(0)
switch whence {
case io.SeekStart:
startByte = offset
case io.SeekCurrent:
startByte = f.streamReadOffset + offset
case io.SeekEnd:
startByte = f.cachedInfo.Size() - offset
}
if err := f.streamRead.Close(); err != nil {
return 0, fmt.Errorf("couldn't close previous stream: %w", err)
}
f.streamRead = nil
if startByte < 0 {
return startByte, ErrInvalidSeek
}
return startByte, f.openReadStream(startByte)
}
// Write writes len(b) bytes to the File.
// It returns the number of bytes written and an error, if any.
// Write returns a non-nil error when n != len(b).
func (f *File) Write(p []byte) (int, error) {
n, err := f.streamWrite.Write(p)
// If we have an error, it's only the "read/write on closed pipe" and we
// should report the underlying one
if err != nil {
return 0, f.streamWriteErr
}
return n, err
}
func (f *File) openWriteStream() error {
if f.streamWrite != nil {
return ErrAlreadyOpened
}
reader, writer := io.Pipe()
f.streamWriteCloseErr = make(chan error)
f.streamWrite = writer
uploader := manager.NewUploader(f.fs.client)
uploader.Concurrency = 1
go func() {
input := &s3.PutObjectInput{
Bucket: aws.String(f.fs.bucket),
Key: aws.String(f.name),
Body: reader,
}
if f.fs.FileProps != nil {
applyFileWriteProps(input, f.fs.FileProps)
}
// If no Content-Type was specified, we'll guess one
if input.ContentType == nil {
input.ContentType = aws.String(mime.TypeByExtension(filepath.Ext(f.name)))
}
_, err := uploader.Upload(context.Background(), input)
if err != nil {
f.streamWriteErr = err
_ = f.streamWrite.Close()
}
f.streamWriteCloseErr <- err
// close(f.streamWriteCloseErr)
}()
return nil
}
func (f *File) openReadStream(startAt int64) error {
if f.streamRead != nil {
return ErrAlreadyOpened
}
var streamRange *string
if startAt > 0 {
streamRange = aws.String(fmt.Sprintf("bytes=%d-%d", startAt, f.cachedInfo.Size()))
}
resp, err := f.fs.client.GetObject(context.Background(), &s3.GetObjectInput{
Bucket: aws.String(f.fs.bucket),
Key: aws.String(f.name),
Range: streamRange,
})
if err != nil {
return err
}
f.streamReadOffset = startAt
f.streamRead = resp.Body
return nil
}
// WriteAt writes len(p) bytes to the file starting at byte offset off.
// It returns the number of bytes written and an error, if any.
// WriteAt returns a non-nil error when n != len(p).
func (f *File) WriteAt(p []byte, off int64) (n int, err error) {
_, err = f.Seek(off, 0)
if err != nil {
return
}
n, err = f.Write(p)
return n, err
}