forked from anusharanganathan/RecordSilo
-
Notifications
You must be signed in to change notification settings - Fork 0
RecordSilo - provides a local disc Silo (pairtree-based) to store digital objects in. Uses a JSON manifest and simple datastore. Doesn't provide indexing (but would suit being deployed with CouchDB).
License
dataflow/RecordSilo
Folders and files
Name | Name | Last commit message | Last commit date | |
---|---|---|---|---|
Repository files navigation
Usage Example: Creating a Silo and creating an object inside it >>> from recordsilo import Silo >>> >>> # store_directory will be created if it doesn't already exist ... s = Silo("removeme") >>> >>> # Get an object - this will be created if it doesn't already exist ... p = s.get_item("doi://10.2302/1232") Adding files >>> # Add some files - the content of the files will be the string values following the names. ... # The 'True' declaration flags that this file is metadata of some sort >>> p.put_stream("mods.xml", "<modsColl..... />", True) >>> p.put_stream("TXT.txt", "Textual item\n") >>> >>> # Add some binary files ... with open("/home/ben/important.rdf", "rb") as file: ... p.put_stream("important.rdf", file) ... >>> with open("/home/ben/Desktop/GDO.pdf", "rb") as file: ... p.put_stream("GDO.pdf", file) Versions >>> p.versions ['1'] >>> p.files ['mods.xml', 'TXT.txt', 'important.rdf', 'GDO.pdf'] >>> p.currentversion '1' Adding a new version >>> # Tries to autogen the next version. Uses the version cursor (p.currentversion) as a starting point >>> p.increment_version() '2' >>> p.put_stream("ERR.err", "Something we don't want") >>> p.put_stream("PDF", "Binary filehandle, something that supports .read()") >>> >>> p.del_stream("ERR.err") >>> p.versions ['1', '2'] >>> p.files ['PDF'] >>> p.currentversion '2' The version cursor >>> p.set_version_cursor("1") True >>> >>> p.currentversion '1' >>> p.files ['mods.xml', 'TXT.txt', 'important.rdf', 'GDO.pdf'] >>> p.set_version_cursor("2") True >>> p.files ['PDF'] >>> Cloning versions >>> p.clone_version("1", "temp") 'temp' >>> p.currentversion 'temp' >>> p.put_stream("some.doc", "Binary filehandle, something that supports .read()") >>> def print_mods(p): ... with p.get_stream("mods.xml") as mods: ... print mods.read() ... >>> print_mods(p) <AlternateMods/> >>> p.files ['mods.xml', 'TXT.txt', 'important.rdf', 'GDO.pdf', 'some.doc'] >>> p.set_version_cursor("1") True >>> print_mods(p) <modsColl..... /> >>> p.files ['mods.xml', 'TXT.txt', 'important.rdf', 'GDO.pdf'] Cloning versions - using symlinks for files in clone rather than do a hard copy >>> p.clone_version_delta('temp', 'temp2') 'temp2' Renaming versions >>> p.versions ['1', '2', 'temp'] >>> p.rename_version("temp", "3") '3' >>> p.versions ['1', '2', '3'] The JSON manifest: >>> p {'files': {'1': ['mods.xml', 'TXT.txt', 'important.rdf', 'GDO.pdf'], '3': ['mods.xml', 'TXT.txt', 'important.rdf', 'GDO.pdf', 'some.doc'], '2': ['PDF']}, 'versions': ['1', '2', '3'], 'item_id': 'doi://10.2302/1232', 'currentversion': '3', 'metadata_files': {'1': ['mods.xml', 'TXT.txt', 'important.rdf', 'GDO.pdf'], '3': ['mods.xml', 'TXT.txt', 'important.rdf', 'GDO.pdf', 'some.doc'], '2': ['PDF']}, 'date': u'2010-02-02T16:51:45.890655', 'version_dates': {'1': '2010-02-02T16:51:45.890655', '3': '2010-02-02T16:51:45.890655', '2': '2010-02-02T16:51:47.634459'}} Deleting >>> p.set_version_cursor("2") True >>> p.files ['PDF'] >>> p.del_stream("PDF") >>> p.files [] >>> p.del_version("2") >>> p.set_version_cursor("2") ERROR:RecordSilo:Version 2 does not exist False >>> Cloning while versioning up >>> p.increment_version(date=None, clone_previous_version=True) '4' >>> p.currentversion '4' >>> p.files ['mods.xml', 'TXT.txt', 'important.rdf', 'GDO.pdf', 'some.doc'] Cloning while versioning up, using symlinks for files in clone rather than do a hard copy >>> p.increment_version_delta(date=None, clone_previous_version=True) '5' >>> p.currentversion '5' >>> p.files ['mods.xml', 'TXT.txt', 'important.rdf', 'GDO.pdf', 'some.doc'] Dates >>> p.versions ['1', '3', '4'] >>> p.currentversion '4' >>> p.date '2010-02-02T17:28:20.926703' >>> p.set_version_cursor("1") True >>> p.date u'2010-02-02T16:51:45.890655' The JSON metadata >>> p.metadata {} >>> p.metadata['dc:title'] = "Shakespearean sonnet" >>> p.metadata {'dc:title': 'Shakespearean sonnet'} >>> p.sync() Syncing to and reverting from disc >>> # Metadata can be any data structure expressible in JSON >>> p.metadata['listandDictdata'] = {'foo':['bar', 'baa', 'baaaaa'], 'sna':'fu'} >>> p.metadata {'listandDictdata': {'foo': ['bar', 'baa', 'baaaaa'], 'sna': 'fu'}, 'dc:title': 'Shakespearean sonnet'} >>> The JSON manifest >>> p {'files': {'1': []}, 'versions': ['1'], 'date': '2010-02-02T17:39:50.282308', 'currentversion': '1', 'metadata_files': {'1': []}, 'item_id': '7', 'version_dates': {'1': '2010-02-02T17:39:50.282308'}, 'metadata': {'listandDictdata': {'foo': ['bar', 'baa', 'baaaaa'], 'sna': 'fu'}, 'dc:title': 'Shakespearean sonnet'}} >>> # Sync the JSON manifest to disc >>> p.sync() >>> p.metadata['rubbish'] = "Whoops" >>> p.metadata {'listandDictdata': {'foo': ['bar', 'baa', 'baaaaa'], 'sna': 'fu'}, 'dc:title': 'Shakespearean sonnet', 'rubbish': 'Whoops'} >>> # Reverting the manifest from the disc version. >>> p.revert() >>> p.metadata {'listandDictdata': {'foo': ['bar', 'baa', 'baaaaa'], 'sna': 'fu'}, 'dc:title': 'Shakespearean sonnet'} NB all stream writes cause a sync()
About
RecordSilo - provides a local disc Silo (pairtree-based) to store digital objects in. Uses a JSON manifest and simple datastore. Doesn't provide indexing (but would suit being deployed with CouchDB).
Resources
License
Stars
Watchers
Forks
Releases
No releases published
Packages 0
No packages published
Languages
- Python 100.0%