-
Notifications
You must be signed in to change notification settings - Fork 13
/
zinc.py
executable file
·3859 lines (3371 loc) · 155 KB
/
zinc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
#
# Copyright 2011-2012 BloomReach, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
'''
Zinc: Simple and scalable versioned data storage
Requirements: Python 2.5, Boto 1.3 (or 2.0+ for large file multipart upload support), lzop (for LZO compression)
Authors: Joshua Levy, Srinath Sridhar, Kazuyuki Tanimura, Nishant Deshpande
'''
#
# Revision history:
#
# 0.3.29 Add zinc diff command.
# 0.3.28 Fix: wildcard matching bug
# 0.3.27 Zinc locate multiple files with --recursive
# 0.3.26 Add exponential retry wait time
# 0.3.25 Wildcard support for track and untrack commands, ignore trailing slash of scope
# 0.3.24 Second robustness fix. Fix for bug introduced in 0.3.22 (only manifests on retry).
# 0.3.23 Second Fix: mtime float comparison
# 0.3.22 Be robust to IncompleteRead errors from current version of boto.
# 0.3.21 Fix: mtime float comparison and stringify bug
# 0.3.20 Add verbosity=0 (minimal) option and print stack trace on verbosity=1 (default)
# 0.3.19 Retry downloading for S3 exceptions
# 0.3.18 Add track/untrack commands, checkout --mode option, and status --full option
# 0.3.17 Zinc log command now shows the log from the checked out revision instead of tip.
# 0.3.16 Open source with Apache License.
# 0.3.15 Error for nested working directories. Fix error with copy to local file in local dir.
# 0.3.14 Very minor fixes.
# 0.3.13 Change signal handling; ignore SIGPIPE for better handling of socket errors.
# 0.3.12 Fix: Ignore .tmp-copy directories.
# 0.3.11 Turn on LZO compression by default.
# 0.3.10 Add --no-cache option.
# 0.3.9 Add --mod-all option.
# 0.3.8 Minor cleanup and docs.
# 0.3.7 Fix: Issue where unexpected item in cache aborts an update.
# 0.3.6 Adjust output of "locate" and "status".
# 0.3.5 Read S3 access keys from .s3cfg and environment.
# 0.3.4 Support LZO compression. Repo version 0.3.
#
from __future__ import with_statement
import sys, re, os, subprocess, shutil, hashlib, binascii, random, time, logging, optparse, functools, ConfigParser, calendar, cStringIO, errno, httplib, rfc822
from datetime import datetime
from contextlib import contextmanager
import boto
from boto.s3.connection import S3Connection # XXX seems to be needed to initialize boto module correctly
# Version of this code.
ZINC_VERSION = "0.3.29"
# Version of this repository implementation.
REPO_VERSION = "0.3"
REPO_VERSIONS_SUPPORTED = ["0.2", "0.3"]
BOTO_VERSION = boto.Version
##
## Setup
##
# Directory for temporary files. This should only be used for small files
# (repository file copies are always made to working directory).
TMP_DIR = "/tmp"
# Bits of randomness used in revision ids.
REV_BITS = 64
# Format for displayed dates.
DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S UTC'
UNKNOWN_USER = "unknown"
EMPTY_MESSAGE = ""
EMPTY_FILEPATH = "/dev/null"
# Used when keeping a copy of a previous file (as with revert).
BACKUP_SUFFIX = ".orig"
# Name prefix used when creating a temporary directory.
TMP_DIR_PREFIX = ".tmp-copy"
# Used in output to indicate that path preceding is the scope.
SCOPE_SEP = "//"
# Max file size to upload without S3 multipart upload
S3_SINGLEPART_MAX_SIZE = 4000000000
# Chunk size when using S3 multipart upload
S3_MULTIPART_CHUNK_SIZE = 50 << 20
# Environment variable names for S3 credentials.
S3_ACCESS_KEY_NAME = "S3_ACCESS_KEY"
S3_SECRET_KEY_NAME = "S3_SECRET_KEY"
# Metadata header name we use for our own MD5 content hash. We use our own header since S3's etag is not really a reliable MD5 hash.
ZINC_S3_MD5_NAME = "zinc-md5"
INVALID_MD5 = "---"
INVALID_SIZE = -1
INVALID_MTIME = -1.0
# Default scheme for file storage.
SCHEME_DEFAULT = "lzo"
# Compression level for LZO.
LZO_LEVEL = 7
# Diff command
DIFF_COMMAND='diff -u --label %(from_label)s %(from_path)s --label %(to_label)s %(to_path)s'
def log_setup(verbosity=1):
'''Logging verbosity: 0 = minimal, 1 = normal, 2 = verbose, 3 = debug.'''
global log
log = logging.Logger("logger")
# For convenient access elsewhere
log.verbosity = verbosity
log.stream = sys.stderr
log_handler = logging.StreamHandler(log.stream)
log.addHandler(log_handler)
if verbosity == 3:
log_handler.setFormatter(logging.Formatter(' (%(levelname).1s)%(filename)s:%(lineno)-4d %(message)s', DATETIME_FORMAT))
log.setLevel(logging.DEBUG)
elif verbosity == 2:
log_handler.setFormatter(logging.Formatter('%(message)s', DATETIME_FORMAT))
log.setLevel(logging.DEBUG)
elif verbosity == 0:
log_handler.setFormatter(logging.Formatter('%(message)s', DATETIME_FORMAT))
log.setLevel(logging.WARN)
else:
log_handler.setFormatter(logging.Formatter('%(message)s', DATETIME_FORMAT))
log.setLevel(logging.INFO)
# Always run initially to facilitate use as a library.
log_setup()
def general_setup():
# TODO fix deprecations about __new__ etc
import warnings
warnings.simplefilter("ignore", DeprecationWarning)
log_setup()
##
## Exceptions
##
class Failure(RuntimeError):
def __init__(self, message, suppressable=False):
self.suppressable = suppressable
RuntimeError.__init__(self, message)
class InvalidArgument(Failure):
pass
class InvalidOperation(Failure):
pass
class FileExists(InvalidOperation):
pass
class ShellError(InvalidOperation):
pass
class BadState(Failure):
pass
class MissingFile(Failure):
pass
##
## Basic utilities
##
class _Enum(object):
key_dict = {}
value_dict = {}
@classmethod
def has_key(cls, key):
return key in cls.key_dict
@classmethod
def has_value(cls, value):
return value in cls.value_dict
@classmethod
def value_for(cls, key):
if key in cls.value_dict:
return cls.key_dict[key]
else:
raise InvalidArgument("parse error: not a valid key for %s: %s" % (cls.__name__, key))
@classmethod
def key_for(cls, value):
if value in cls.value_dict:
return cls.value_dict[value]
else:
raise InvalidArgument("parse error: not a valid value for %s: %s" % (cls.__name__, value))
@classmethod
def check_value(cls, value):
cls.key_for(value)
return value
@classmethod
def values(cls):
return cls.value_dict.keys()
def enum(name, **enum_dict):
'''
Create an enum class. Accepts name for the class and key-value pairs for each
value. The returned type has the given keys as class variables, and some static
utility methods. By convention, use uppercase for the keys, to avoid name
conflicts.
'''
new_type = type(name, (_Enum,), enum_dict)
new_type.key_dict = enum_dict
new_type.value_dict = dict((v, k) for (k, v) in enum_dict.iteritems())
return new_type
def fail(message=None, prefix="error", exc_info=None, status=1, suppress=False):
'''Fatal error.'''
if suppress:
if message:
log.info("warning: %s", message)
sys.exit(0)
else:
if message:
log.error("%s: %s", prefix, message)
if exc_info and log.verbosity >= 1:
log.info("", exc_info=exc_info)
sys.exit(status)
def log_calls_with(severity):
'''Create a decorator to log calls and return values of any function, for debugging.'''
def decorator(fn):
@functools.wraps(fn)
def wrap(*params, **kwargs):
call_str = "%s(%s)" % (fn.__name__, ", ".join([repr(p) for p in params] + ["%s=%s" % (k, repr(v)) for (k, v) in kwargs.items()]))
# TODO Extract line number from caller and use that in logging.
log.log(severity, ">> %s", call_str)
ret = fn(*params, **kwargs)
log.log(severity, "<< %s: %s", call_str, repr(ret))
return ret
return wrap
return decorator
# Convenience decorators for logging.
log_calls_info = log_calls_with(logging.INFO)
log_calls = log_calls_with(logging.DEBUG)
def split_uri(uri_str):
'''Split "prefix://some/path" into ("prefix", "some/path").'''
m = re.match("([a-zA-Z0-9]+)://(\S*)", uri_str)
if not m:
raise InvalidArgument("invalid URI: '%s'" % uri_str)
return (m.group(1), m.group(2))
_WHITESPACE_RE = re.compile("\s+")
def split_whitespace(str):
'''Split on whitespace.'''
return re.split(_WHITESPACE_RE, str)
def parse_rev_range(rev_str):
'''Parse a revision or revision range of the form xxx or xxx:yyy or xxx: or :yyy.'''
l = [s.strip() for s in rev_str.split(":")]
if len(l) == 2:
return (l[0] if l[0] else None, l[1] if l[1] else None)
elif len(l) == 1:
return (l[0], None)
else:
raise InvalidArgument("invalid revision range: '%s'" % rev_str)
def determine_rev_range(options, config):
'''
Takes option parameters and return a (start, end) rev range tuple.
If a user specifies the rev options, it simply returns the user's rev range.
In case a user does not specify rev, it tries to return the checkedout rev.
If the scope is not checkedout, it returns ("tip", None).
'''
rev = "tip"
if options.work_dir:
rev = WorkingDir(options.work_dir, config).get_checkout_rev(options.scope) or rev
return parse_rev_range(options.rev) if options.rev else (rev, None)
def join_path(*elements):
'''Join path elements: a, b, c -> a/b/c. Also handles case where initial path is empty.'''
if elements and elements[0] == "":
return join_path(*elements[1:])
else:
return "/".join(elements)
_RANDOM = random.Random()
_RANDOM.seed()
def new_uid(bits=64):
'''
Return a unique id. Actually just random value with at least the specified
bits of randomness. Don't make it case sensitive, so we can use it in filenames,
etc.
'''
return "".join(_RANDOM.sample("0123456789abcdefghijklmnopqrstuvwxyz", int(bits / 5.16) + 1)) # log(26 + 10)/log(2) = 5.16
def group_pairs(pairs):
out = {}
for (x, y) in pairs:
if x in out:
out[x].append(y)
else:
out[x] = [y]
return out
def setattrs(self, others):
''' useful for saving others (function properties) as instance variables '''
for (k, v) in others.iteritems():
setattr(self, k, v)
##
## File utilities
##
def make_all_dirs(path):
'''Ensure local dir, with all its parent dirs, are created.'''
if not os.path.isdir(path):
os.makedirs(path)
def make_parent_dirs(path):
'''Ensure parent directories of a file are created as needed.'''
dir = os.path.dirname(path)
if dir and dir != "/":
make_all_dirs(dir)
def set_file_mtime(path, mtime, atime=None):
'''Set access and modification times on a file.'''
if not atime:
atime = mtime
f = file(path, 'a')
try:
os.utime(path, (atime, mtime))
finally:
f.close()
def move_to_backup(path, backup_suffix=BACKUP_SUFFIX):
if backup_suffix and os.path.exists(path):
shutil.move(path, path + backup_suffix)
def parent_dirs(path):
'''
Iterate upward through all possible parent directories of a path: "/a/b/c"
yields [("/a/b/c", ""), ("/a/b", "c"), ("/a", "b/c"), ("/", "a/b/c")].
'''
root = "/" if path.startswith("/") else ""
path = path.strip("/")
elements = path.split("/")
right_elements = []
while len(elements) > 0:
parent = "/".join(elements)
rel_path = "/".join(right_elements)
yield (root + parent, rel_path)
right_elements.insert(0, elements.pop())
yield (root, path)
def new_temp_filename(prefix=None, based_on=None, with_nonce=True):
'''
Return a new, temporary filename. If based_on is provided, return a path
adjacent to the given path, but starting with prefix; otherwise return a name
from the global temporary directory. If with_nonce is true, include a random
string to ensure uniqueness. XXX: This is not secure and shouldn't be used for
privileged execution.
'''
if based_on:
if not prefix:
prefix = ".tmp"
# Note dirname(based_on) will be empty if based_on is a path with no path component.
temp_path = "%s/%s.%s" % (os.path.dirname(based_on) or ".", prefix, os.path.basename(based_on))
else:
temp_path = "%s/zinc-tmp" % TMP_DIR
if prefix:
temp_path = "%s.%s" % (temp_path, prefix)
if with_nonce:
temp_path = "%s.%s" % (temp_path, new_uid())
return temp_path
@contextmanager
def atomic_output_file(dest_path, make_parents=False, backup_suffix=None, suffix=".partial.%s"):
'''
A context manager for convenience in writing a file in an atomic way. Set up
a temporary name, then rename it after the operation is done, optionally making
a backup of the previous file, if present.
'''
tmp_path = ("%s" + suffix) % (dest_path, new_uid())
if make_parents:
make_parent_dirs(tmp_path)
yield tmp_path
if not os.path.isfile(tmp_path):
raise BadState("failure in writing file: %s (temp location %s)" % (dest_path, tmp_path))
move_to_backup(dest_path, backup_suffix=backup_suffix)
shutil.move(tmp_path, dest_path)
@contextmanager
def temp_output_file(prefix=None, based_on=None, with_nonce=True):
'''
A context manager for convenience in using a temporary file, which is deleted when exiting the context.
'''
tmp_path = new_temp_filename(prefix=prefix, based_on=based_on, with_nonce=with_nonce)
yield tmp_path
try:
os.remove(tmp_path)
except OSError, e:
pass
@log_calls
def copyfile_atomic(source_path, dest_path, make_parents=False, backup_suffix=None):
'''Copy file on local filesystem in an atomic way, so partial copies never exist. Preserves timestamps.'''
with atomic_output_file(dest_path, make_parents=make_parents, backup_suffix=backup_suffix) as tmp_path:
# TODO catch IOError and re-raise as MissingFile
shutil.copyfile(source_path, tmp_path)
set_file_mtime(tmp_path, os.path.getmtime(source_path))
def movefile(source_path, dest_path, make_parents=False, backup_suffix=None):
'''Move file. With a few extra options.'''
# TODO catch IOError and re-raise as MissingFile
if make_parents:
make_parent_dirs(dest_path)
move_to_backup(dest_path, backup_suffix=backup_suffix)
shutil.move(source_path, dest_path)
def read_string_from_file(path):
'''Read entire contents of file into a string.'''
with open(path, "rb") as f:
value = f.read()
return value
def write_string_to_file(path, string, make_parents=False, backup_suffix=BACKUP_SUFFIX):
'''Write entire file with given string contents. Keeps backup by default.'''
with atomic_output_file(path, make_parents=make_parents, backup_suffix=backup_suffix) as tmp_path:
with open(tmp_path, "wb") as f:
f.write(string)
def file_md5(path, blocksize=2**23):
'''Compute MD5 of file, in hex format.'''
md5 = hashlib.md5()
size = os.path.getsize(path)
if size > 2**23:
log.debug("computing MD5 of file '%s' (size %s)", path, size)
with open(path, "rb") as f:
while True:
data = f.read(blocksize)
if not data:
break
md5.update(data)
value = binascii.hexlify(md5.digest())
return value
def strip_prefix(prefix, value):
'''Strip prefix from string value, or None if value does not start with prefix.'''
return value[len(prefix):] if value.startswith(prefix) else None
def list_files_recursive(path):
'''
Return all filenames, relative to path, in the current directory.
'''
for root, dirnames, filenames in os.walk(path):
for filename in filenames:
val = strip_prefix(path + "/", join_path(root, filename))
assert val
yield val
def list_dirs_recursive(path):
'''
Return all subdirectories, relative to path, in the current directory, bottom to top.
'''
for root, dirnames, filenames in os.walk(path, topdown=False):
for dirname in dirnames:
val = strip_prefix(path + "/", join_path(root, dirname))
assert val
yield val
@contextmanager
def temp_output_dir(output_dir, temp_dir=None, backup_suffix=None):
'''
If we want to write several files to a given output directory, and we would
like all to appear at nearly the same time, we can put all files in a temporary
location (by default a subdirectory of the output directory), and then move
them to the final desired location at the end. This does not provide perfect
transactionality, but helps to minimize chances of files being in an
inconsistent state following an interruption.
'''
try:
if not temp_dir:
temp_dir = join_path(output_dir, "%s.%s" % (TMP_DIR_PREFIX, new_uid()))
make_all_dirs(temp_dir)
yield temp_dir
# Now copy over all files, preserving paths. Note target may contain some
# of the same subdirectories, so we have to do this file by file.
for path in list_files_recursive(temp_dir):
src = join_path(temp_dir, path)
dest = join_path(output_dir, path)
log.debug("moving from temp dir: %s -> %s", src, dest)
movefile(src, dest, make_parents=True, backup_suffix=backup_suffix)
finally:
try:
for dir in list_dirs_recursive(temp_dir):
os.rmdir(join_path(temp_dir, dir))
os.rmdir(temp_dir)
except Exception, e:
log.warn("error cleaning up temporary directory: %s: %s", temp_dir, e)
def shell_command(command, wait=True):
'''
Call a shell command, translating common exceptions. Command is a list of
items to pass to the shell, e.g. ['ls', '-l'].
'''
try:
log.debug("shell command: %s", " ".join(command))
p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False)
if wait:
p.wait()
else:
return p
except subprocess.CalledProcessError, e:
raise ShellError("shell command '%s' returned status %s" % (" ".join(command), e.returncode))
except OSError, e:
raise ShellError("shell command '%s' failed: %s" % (" ".join(command), e))
def expand_wildcards(wildcards, candidates, recursive=False):
'''
wildcards is a list of paths that includes wildcards, and candidates are paths that the wildcards match.
Return a list of non-wildcard paths
'''
out = []
match_char = '.' if recursive else '[^/]'
for wildcard in wildcards:
if '*' in wildcard or '?' in wildcard:
pattern = r'^' + re.escape(wildcard).replace('\\*', match_char + '*').replace('\\?', match_char) + r'$'
out.extend([candidate for candidate in candidates if re.search(pattern, candidate) is not None])
else:
out.append(wildcard)
return out
# Color texts to display in Terminal
def color_text(text, color):
colors = {
'clear': '\033[0m',
'black': '\033[30m',
'red': '\033[31m',
'green': '\033[32m',
'yellow': '\033[33m',
'blue': '\033[34m',
'purple': '\033[35m',
'cyan': '\033[36m',
'under': '\033[4m',
}
return "".join([colors[color], text, colors['clear']])
# Show diff in Terminal
def print_diff(from_path, to_path, from_label=None, to_label=None):
if not from_label:
from_label = from_path
if not to_label:
to_label = to_path
command = [piece % {"from_label": from_label, "from_path": from_path, "to_label": to_label, "to_path": to_path} for piece in DIFF_COMMAND.split()]
p = shell_command(command, wait=False)
# color the result only when the stdout is terminal
use_colors = sys.stdout.isatty()
for line in p.stdout:
if use_colors:
if line.startswith('+'):
line = color_text(line, 'green')
elif line.startswith('-'):
line = color_text(line, 'red')
elif line.startswith('@@'):
line = color_text(line, 'cyan')
sys.stdout.write(line)
sys.stdout.write("\n")
##
## Date and time utilities
##
def current_repo_time():
'''
Time to be used for including in manifests. In the future, could be taken
from some service to ensure consistency. We use an integer, so the displayed
time is always as accurate as the recorded time, in case the user specifies
times on the command line.
'''
return int(time.time())
def format_seconds(seconds):
return datetime.utcfromtimestamp(seconds).strftime(DATETIME_FORMAT)
DATETIME_FORMATS = [
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %I:%M:%S%p',
'%Y-%m-%d %H:%M',
'%Y-%m-%d %I:%M%p',
'%Y-%m-%d',
'%a %b %d %H:%M:%S %Y',
'%a %b %d %I:%M:%S%p %Y',
'%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
'%b %d %H:%M:%S %Y',
'%b %d %I:%M:%S%p %Y',
'%b %d %H:%M:%S',
'%b %d %I:%M:%S%p',
'%b %d %H:%M',
'%b %d %I:%M%p',
'%b %d %Y',
'%b %d',
'%H:%M:%S',
'%I:%M:%S%p',
'%H:%M',
'%I:%M%p',
]
# Raw date format is seconds since epoch
_DATETIME_SECONDS_RE = re.compile("([0-9]{9,10})")
def parse_timezone(tz_str):
'''Parse a timezone suffix, as it appears on a date, returning offset in minutes.'''
try:
tz_str = tz_str.lower()
if tz_str[0] in "+-" and len(tz_str) == 5 and tz_str[1:].isdigit():
sign = 1 if (tz_str[0] == "+") else -1
hours = int(tz_str[1:3])
minutes = int(tz_str[3:5])
return -sign * (60 * hours + minutes)
elif tz_str == "gmt" or tz_str == "utc":
return 0
else:
return None
except (ValueError, IndexError):
return None
def parse_datetime(datetime_str, formats=DATETIME_FORMATS, tz_offset=None, assume_utc=False):
'''Parse a date in one of various formats, returning seconds since epoch.'''
if _DATETIME_SECONDS_RE.match(datetime_str):
return int(datetime_str)
# Unfornately, strptime doesn't handle timezones well, so we do it ourselves.
# If the last token looks like a timezone, put the offset into tz_offset.
if tz_offset is None:
bits = split_whitespace(datetime_str)
if len(bits) > 1:
tz_offset = parse_timezone(bits[-1])
if tz_offset is not None:
datetime_str = " ".join(bits[:-1])
datetime_str = datetime_str.strip()
for format in formats:
try:
tuple = time.strptime(datetime_str, format)
if tz_offset is not None:
# Specified offset
seconds = calendar.timegm(tuple)
seconds += tz_offset * 60
elif assume_utc:
# Unspecified offset, using UTC
seconds = calendar.timegm(tuple)
else:
# Unspecified offset, using local time
seconds = time.mktime(tuple)
except (ValueError, OverflowError):
continue
break
else:
raise InvalidArgument("could not parse date: '%s'" % datetime_str)
return seconds
_DATESPEC_RE1 = re.compile("([<>])\s*{(.*)}")
_DATESPEC_RE1A = re.compile("([<>])\s*(.*)")
_DATESPEC_RE2 = re.compile("{(.*)}\s*to\s*{(.*)}")
_DATESPEC_RE2A = re.compile("(.*)\s+to\s+(.*)")
def parse_datespec(datespec_str, assume_utc=False):
'''
Parse a date spec and return a filter function that returns true if the
supplied time satisfies the date spec.
Formats:
< [date expression]
> [date expression]
[date expression] to [date expression]
'''
datespec_str = datespec_str.strip()
m = None
if not m:
m = _DATESPEC_RE1.match(datespec_str) or _DATESPEC_RE1A.match(datespec_str)
if m:
direction = m.group(1)
time = parse_datetime(m.group(2), assume_utc=assume_utc)
if direction == "<":
filter = lambda t: t <= time
else:
filter = lambda t: t >= time
if not m:
m = _DATESPEC_RE2.match(datespec_str) or _DATESPEC_RE2A.match(datespec_str)
if m:
time1, time2 = parse_datetime(m.group(1)), parse_datetime(m.group(2))
filter = lambda t: time1 <= t and t <= time2
if not m:
raise InvalidArgument("invalid date spec: '%s'" % datespec_str)
return filter
##
## Common tools
##
# Simple serialization for short key/value pairs. We use our own for clarity, and so key order is deterministic.
def shortvals_to_str(shortvals):
'''
Convert [("key1", "val1"), ("key2", "val2")] or {"key1": "val1", "key2": "val2"} to "key1=val1;key2=val2".
Values that are None are skipped.
'''
if type(shortvals) == dict:
shortvals = [(k, shortvals[k]) for k in sorted(shortvals)]
for (k, v) in shortvals:
if ";" in str(k) or "=" in str(k) or (v is not None and (";" in str(v) or "=" in str(v))):
raise InvalidArgument("invalid shortval string: '%s'" % shortvals)
# XXX we cannot use str(float) since it truncates lower digits, especially on linux
return ";".join(["%s=%s" % (str(k), "%f" % v if isinstance(v, float) else str(v)) for (k,v) in shortvals if v is not None])
def shortvals_from_str(shortvals_str):
'''
Convert "key1=val1;key2=val2" to {"key1" : "val1", "key2" : "val2"}.
'''
return dict(pairstr.split("=") for pairstr in shortvals_str.split(";"))
class Values(object):
'''
Simple serialization for multiple named values. Like a property file, but
where values are potentially longer multi-line strings.
'''
# A magic char sequence to indicate start of a new record
RECORD_MAGIC = "%%>>"
VERSION = "znval 1.0"
def __init__(self, values=None, comment=None):
'''
Create a new Values object. First arg may be a dictionary or list of
pairs. Items with value None are skipped. If list of pairs is used, order is
preserved.
'''
if not values:
values = {}
self.values = []
if type(values) == dict:
self.values = [(k, str(values[k])) for k in sorted(values) if values[k] is not None]
elif type(values) == list:
self.values = [(k, str(v)) for (k, v) in values if v is not None]
else:
raise InvalidArgument("invalid values of type %s" % type(values))
self.comment = comment
def from_str(self, str_val):
lines = str_val.splitlines()
cur_rec = None
cur_val = "" # TODO use list
for line in lines:
line = line.strip()
if line.startswith("#"):
continue
if line.startswith(self.RECORD_MAGIC):
if cur_rec:
self.values.append((cur_rec, cur_val[:-1]))
cur_rec = line[len(self.RECORD_MAGIC):]
cur_val = ""
continue
cur_val += line
cur_val += "\n"
# Drop final newline
self.values.append((cur_rec, cur_val[:-1]))
def to_str(self):
out = "# %s\n" % self.VERSION
if self.comment:
for l in self.comment.splitlines():
out += "# %s\n" % l
for (key, value) in self.values:
assert value is not None
out += self.RECORD_MAGIC + key + "\n"
out += value
out += "\n"
return out
def clear(self):
self.values = {}
def values_to_str(values, comment=None):
'''Serialize dictionary or list of pairs in Values format.'''
v = Values(values, comment)
return v.to_str()
def values_from_str(values_str):
'''Deserialize string in Values format into a dictionary.'''
v = Values()
v.from_str(values_str)
return dict((key, value) for (key, value) in v.values)
class LockService(object):
'''
A lock service. Can acquire locks for any key, to make sure write operations
are globally synchronized across all users.
'''
def acquire(self, key):
raise NotImplementedError()
def release(self, key):
raise NotImplementedError()
@contextmanager
def context(self, key):
yield self.acquire(key)
self.release(key)
class NoLockService(LockService):
'''
Unimplemented lock service -- no locking at all is done.
'''
def acquire(self, key):
log.debug("acquiring lock for key: %s", key)
pass
def release(self, key):
log.debug("releasing lock for key: %s", key)
pass
##
## Repository
##
# Type of an item. Currently we only support files, but in the future there could by symlinks etc.
ItemType = enum("ItemType", FILE="file")
# Storage schemes for items.
Scheme = enum("Scheme", RAW="raw", LZO="lzo")
class Config(object):
'''
The Config holds general settings, like the user, the locking service, etc.
as well as per-repository or per-working directory settings.
'''
def __init__(self, options=None):
if not options:
options = {}
self.lock_service = NoLockService()
# Defaults read from options.
self.user = options.user if options.user else UNKNOWN_USER
self.compression = Scheme.check_value(options.compression) if options.compression else SCHEME_DEFAULT
self.s3_fill_keys(options)
self.s3_ssl = True if options.s3_ssl else False
self.no_cache = True if options.no_cache else False
def merge_in(self, values):
for (k, v) in values.items():
log.debug("config: '%s' = %s", k, repr(v))
setattr(self, k, v)
def s3_fill_keys(self, options):
'''Fill in S3 keys into config, from any available sources.'''
self.s3_access_key = None
self.s3_secret_key = None
keys = None
if hasattr(options, "s3_access_key") and hasattr(options, "s3_secret_key"):
log.debug("using S3 keys from options")
keys = (options.s3_access_key, options.s3_secret_key)
if not keys:
keys = Config.s3_keys_from_env()
if not keys and not options.no_s3cfg:
keys = Config.s3_keys_from_s3cmd()
if keys:
self.s3_access_key = keys[0]
self.s3_secret_key = keys[1]
@staticmethod
def s3_keys_from_env():
'''Retrieve S3 access keys from the environment, or None if not present.'''
env = os.environ
if S3_ACCESS_KEY_NAME in env and S3_SECRET_KEY_NAME in env:
keys = (env[S3_ACCESS_KEY_NAME], env[S3_SECRET_KEY_NAME])
log.debug("read S3 keys from environment")
return keys
else:
return None
@staticmethod
def s3_keys_from_s3cmd():
'''Retrieve S3 access key settings from s3cmd's config file, if present; otherwise return None.'''
s3cfg_path = None
try:
s3cfg_path = "%s/.s3cfg" % os.environ["HOME"]
if not os.path.exists(s3cfg_path):
return None
config = ConfigParser.ConfigParser()
config.read(s3cfg_path)
keys = config.get("default", "access_key"), config.get("default", "secret_key")
log.debug("read S3 keys from $HOME/.s3cfg file")
return keys
except Exception, e:
log.info("could not read S3 keys from $HOME/.s3cfg file; skipping (%s)", e)
return None
@staticmethod
def s3store_copyfile_retries():
'''Retrieve the number of retries for S3Store.s3_copyfile_atomic from
environment variable. If not present, return default.
'''
return int(os.getenv('BR_ZINC_S3STORE_COPYFILE_RETRIES', 3))
# Checkout mode for scopes.
# ToBeTracked is a state that a file locally exists and is tracked but not registered in manifest yet
FileState = enum("FileState", Tracked="tracked", Untracked="untracked", ToBeTracked="to_be_tracked")
FileStateTrackedList = [FileState.Tracked, FileState.ToBeTracked] # for convenience
class Fingerprint(object):
'''
The fingerprint of an item. Contains modification time (mtime), size, and/or
MD5. A Fingerprint may be incomplete, and not (initially) include a hash of
file contents, for efficiency. Generally, if an item's type, mtime, and size
are unchanged, we assume it is unchanged. This is not perfectly correct since
modification time has only one-second resolution on many filesystems, but it is
reasonably safe.
'''
DIFFER = "differ"
SAME = "match"
UNCERTAIN = "uncertain"
EPS = 0.00001
def __init__(self, type=ItemType.FILE, mtime=None, size=None, md5=None, state=FileState.Tracked, **others):
self.type = type
self.mtime = mtime
self.size = size
self.md5 = md5
self.state = state
# for future compatibility
setattrs(self, others) # this is tested locally by adding random attributes and they passed through
def __repr__(self):
return self.to_str()
@staticmethod
def of_file(local_path, md5=None, compute_md5=False, state=FileState.Tracked):
'''Get a Fingerprint for the item at the local path given. Uses supplied MD5 or computes MD5 if requested.'''
fp = Fingerprint()
fp.read_from_file(local_path, md5=md5, compute_md5=compute_md5, state=state)
return fp
def read_from_file(self, path, md5=None, compute_md5=False, state=FileState.Tracked):
if not os.path.isfile(path):
raise InvalidOperation("file not found: '%s'" % path)
if not os.path.isfile(path):
raise NotImplementedError("support only plan files: '%s'" % path)
self.type = ItemType.FILE
self.size = os.path.getsize(path)
self.mtime = os.path.getmtime(path)
if md5:
self.md5 = md5
elif compute_md5:
self.md5 = file_md5(path)
else:
self.md5 = None
self.state = state