Source code for acidfs

import contextlib
import fcntl
import io
import logging
import os
import shutil
import sys
import subprocess
import tempfile
import traceback
import transaction
import weakref

log = logging.getLogger(__name__)


[docs]class AcidFS(object): """ An instance of `AcidFS` exposes a transactional filesystem view of a `Git` repository. Instances of `AcidFS` are not threadsafe and should not be shared across threads, greenlets, etc. **Paths** Many methods take a `path` as an argument. All paths use forward slash `/` as a separator, regardless of the path separator of the underlying operating system. The path `/` represents the root folder of the repository. Paths may be relative or absolute: paths beginning with a `/` are absolute with respect to the repository root, paths not beginning with a `/` are relative to the current working directory. The current working directory always starts at the root of the repository. The current working directory can be changed using the :meth:`chdir` and :meth:`cd` methods. **Constructor Arguments** ``repo`` The path to the repository in the real, local filesystem. ``head`` The name of a branch to use as the head for this transaction. Changes made using this instance will be merged to the given head. The default, if omitted, is to use the repository's current head. ``create`` If there is not a Git repository in the indicated directory, should one be created? The default is `True`. ``bare`` If the Git repository is to be created, create it as a bare repository. If the repository is already created or `create` is False, this argument has no effect. ``user_name`` If the Git repository is to be created, set the user name for the repository to this value. This is the same as creating the repository and running `git config user.name "<user_name>"`. ``user_email`` If the Git repository is to be created, set the user email for the repository to this value. This is the same as creating the repository and running `git config user.email "<user_email>"`. ``name`` Name to be used as a sort key when ordering the various databases (datamanagers in the parlance of the transaction package) during a commit. It is exceedingly rare that you would need to use anything other than the default, here. ``path_encoding`` Encode paths with this encoding. The default is `ascii`. """ session = None _cwd = () def __init__(self, repo, head='HEAD', create=True, bare=False, user_name=None, user_email=None, name='AcidFS', path_encoding='ascii'): wdpath = repo dbpath = os.path.join(repo, '.git') if not os.path.exists(dbpath): wdpath = None dbpath = repo if not os.path.exists(os.path.join(dbpath, 'HEAD')): if create: args = ['git', 'init', repo] if bare: args.append('--bare') else: wdpath = repo dbpath = os.path.join(repo, '.git') _check_output(args) if user_name: args = ['git', 'config', 'user.name', user_name] _check_output(args, cwd=dbpath) if user_email: args = ['git', 'config', 'user.email', user_email] _check_output(args, cwd=dbpath) args = ['git', 'config', 'core.quotepath', 'false'] _check_output(args, cwd=dbpath) else: raise ValueError('No database found in %s' % dbpath) self.wd = wdpath self.db = dbpath self.head = head self.name = name self.path_encoding = path_encoding def _session(self): """ Make sure we're in a session. """ if not self.session or self.session.closed: self.session = _Session(self.wd, self.db, self.head, self.name, self.path_encoding) return self.session def _mkpath(self, path): if path == '.': parsed = [] else: parsed = list(filter(None, path.split('/'))) if not path.startswith('/'): parsed = list(self._cwd) + parsed return parsed
[docs] def get_base(self): """ Returns the id of the commit that is the current base for the transaction. """ session = self._session() return session.prev_commit
[docs] def set_base(self, commit): """ Sets the base commit for the current transaction. The `commit` argument may be the SHA1 of a commit or the name of a reference (eg. branch or tag). The current transaction must be clean. If any changes have been made in the transaction, a ConflictError will be raised. """ session = self._session() session.set_base(commit)
[docs] def cwd(self): """ Returns the path to the current working directory in the repository. """ return '/' + '/'.join(self._cwd)
[docs] def chdir(self, path): """ Change the current working directory in repository. """ session = self._session() parsed = self._mkpath(path) obj = session.find(parsed) if not obj: raise _NoSuchFileOrDirectory(path) if not isinstance(obj, _TreeNode): raise _NotADirectory(path) self._cwd = parsed
[docs] @contextlib.contextmanager def cd(self, path): """ A context manager that changes the current working directory only in the scope of the 'with' context. Eg:: import acidfs fs = acidfs.AcidFS('myrepo') with fs.cd('some/folder'): fs.open('a/file') # relative to /some/folder fs.open('another/file') # relative to / """ prev = self._cwd self.chdir(path) yield self._cwd = prev
[docs] def open(self, path, mode='r', buffering=-1, encoding=None, errors=None, newline=None): """ Open a file for reading or writing. Implements the semantics of the `open` function in Python's `io module <http://docs.python.org/library/io.html#io.open>`_, which is the default implementation `in Python 3 <http://docs.python.org/py3k/library/functions.html#open>`_. Opening a file in text mode will return a file-like object which reads or writes unicode strings, while opening a file in binary mode will return a file-like object which reads or writes raw bytes. Because the underlying implementation uses a pipe to a `Git` plumbing command, opening for update (read and write) is not supported, nor is seeking. """ session = self._session() parsed = self._mkpath(path) if 'b' in mode: text = False if 't' in mode: raise ValueError("can't have text and binary mode at once") else: if not buffering: raise ValueError("can't have unbuffered text I/O") text = True if '+' in mode: raise ValueError("Read/write mode is not supported") mode = mode.replace('b', '') mode = mode.replace('t', '') if mode == 'a': mode = 'w' append = True else: append = False if mode == 'x': mode = 'w' exclusive = True else: exclusive = False if buffering < 0: buffer_size = io.DEFAULT_BUFFER_SIZE line_buffering = False elif buffering == 1: buffer_size = io.DEFAULT_BUFFER_SIZE line_buffering = True else: buffer_size = buffering line_buffering = False if mode == 'r': obj = session.find(parsed) if not obj: raise _NoSuchFileOrDirectory(path) if isinstance(obj, _TreeNode): raise _IsADirectory(path) stream = obj.open() if buffering: stream = io.BufferedReader(stream, buffer_size) if text: stream = io.TextIOWrapper( stream, encoding, errors, newline, line_buffering) return stream elif mode == 'w': if not parsed: raise _IsADirectory(path) name = parsed[-1] dirpath = parsed[:-1] obj = session.find(dirpath) if not obj: raise _NoSuchFileOrDirectory(path) if not isinstance(obj, _TreeNode): raise _NotADirectory(path) prev = obj.get(name) if isinstance(prev, _TreeNode): raise _IsADirectory(path) if prev and exclusive: raise _FileExists(path) blob = obj.new_blob(name, prev) if append and prev: shutil.copyfileobj(prev.open(), blob) if buffering: blob = io.BufferedWriter(blob, buffer_size) if text: blob = io.TextIOWrapper( blob, encoding, errors, newline, line_buffering) return blob raise ValueError("Bad mode: %s" % mode)
[docs] def hash(self, path=''): """ Returns the sha1 hash of the object referred to by `path`. If `path` is omitted the current working directory is used. """ session = self._session() obj = session.find(self._mkpath(path)) if not obj: raise _NoSuchFileOrDirectory(path) return obj.hash()
[docs] def listdir(self, path=''): """ Return list of files in indicated directory. If `path` is omitted, the current working directory is used. """ session = self._session() obj = session.find(self._mkpath(path)) if not obj: raise _NoSuchFileOrDirectory(path) if not isinstance(obj, _TreeNode): raise _NotADirectory(path) return list(obj.contents.keys())
[docs] def mkdir(self, path): """ Create a new directory. The parent of the new directory must already exist. """ session = self._session() parsed = self._mkpath(path) name = parsed[-1] parent = session.find(parsed[:-1]) if not parent: raise _NoSuchFileOrDirectory(path) if not isinstance(parent, _TreeNode): raise _NotADirectory(path) if name in parent.contents: raise _FileExists(path) parent.new_tree(name)
[docs] def mkdirs(self, path): """ Create a new directory, including any ancestors which need to be created in order to create the directory with the given `path`. """ session = self._session() parsed = self._mkpath(path) node = session.tree for name in parsed: next_node = node.get(name) if not next_node: next_node = node.new_tree(name) elif not isinstance(next_node, _TreeNode): raise _NotADirectory(path) node = next_node
[docs] def rm(self, path): """ Remove a single file. """ session = self._session() parsed = self._mkpath(path) obj = session.find(parsed) if not obj: raise _NoSuchFileOrDirectory(path) if isinstance(obj, _TreeNode): raise _IsADirectory(path) obj.parent.remove(obj.name)
[docs] def rmdir(self, path): """ Remove a single directory. The directory must be empty. """ session = self._session() parsed = self._mkpath(path) if not parsed: raise ValueError("Can't remove root directory.") obj = session.find(parsed) if not obj: raise _NoSuchFileOrDirectory(path) if not isinstance(obj, _TreeNode): raise _NotADirectory(path) if not obj.empty(): raise _DirectoryNotEmpty(path) obj.parent.remove(obj.name)
[docs] def rmtree(self, path): """ Remove a directory and any of its contents. """ session = self._session() parsed = self._mkpath(path) if not parsed: raise ValueError("Can't remove root directory.") obj = session.find(parsed) if not obj: raise _NoSuchFileOrDirectory(path) if not isinstance(obj, _TreeNode): raise _NotADirectory(path) obj.parent.remove(obj.name)
[docs] def mv(self, src, dst): """ Move a file or directory from `src` path to `dst` path. """ session = self._session() spath = self._mkpath(src) if not spath: raise _NoSuchFileOrDirectory(src) sname = spath[-1] sfolder = session.find(spath[:-1]) if not sfolder or not sname in sfolder: raise _NoSuchFileOrDirectory(src) dpath = self._mkpath(dst) dobj = session.find(dpath) if not dobj: if dpath: dname = dpath[-1] dfolder = session.find(dpath[:-1]) if dfolder: dfolder.set(dname, sfolder.remove(sname)) return raise _NoSuchFileOrDirectory(dst) if isinstance(dobj, _TreeNode): dobj.set(sname, sfolder.remove(sname)) else: dobj.parent.set(dobj.name, sfolder.remove(sname))
[docs] def exists(self, path): """ Returns boolean indicating whether a file or directory exists at the given `path`. """ session = self._session() return bool(session.find(self._mkpath(path)))
[docs] def isdir(self, path): """ Returns boolean indicating whether the given `path` is a directory. """ session = self._session() return isinstance(session.find(self._mkpath(path)), _TreeNode)
[docs] def empty(self, path): """ Returns boolean indicating whether the directory indicated by `path` is empty. """ session = self._session() obj = session.find(self._mkpath(path)) if not obj: raise _NoSuchFileOrDirectory(path) if not isinstance(obj, _TreeNode): raise _NotADirectory(path) return obj.empty()
class ConflictError(Exception): def __init__(self, msg='Unable to merge changes to repository.'): super(ConflictError, self).__init__(msg) class _Session(object): closed = False lockfd = None def __init__(self, wd, db, head, name, path_encoding): self.wd = wd self.db = db self.name = name self.path_encoding = path_encoding self.lock_file = os.path.join(db, 'acidfs.lock') transaction.get().join(self) curhead = open(os.path.join(db, 'HEAD')).read().strip()[16:] if head == curhead: head = 'HEAD' if head == 'HEAD': self.headref = os.path.join(db, 'refs', 'heads', curhead) else: self.headref = os.path.join(db, 'refs', 'heads', head) self.head = head if os.path.exists(self.headref): # Existing head, get head revision self.prev_commit = _check_output( ['git', 'rev-list', '--max-count=1', head], cwd=db).strip() self.tree = _TreeNode.read(db, self.prev_commit, path_encoding) else: # New head, no commits yet self.tree = _TreeNode(db, path_encoding) # empty tree self.prev_commit = None def set_base(self, ref): if self.tree.dirty: raise ConflictError( "Cannot set base when changes already made in transaction.") self.prev_commit = _check_output( ['git', 'rev-list', '--max-count=1', ref], cwd=self.db).strip() self.tree = _TreeNode.read(self.db, self.prev_commit, self.path_encoding) def find(self, path): assert isinstance(path, (list, tuple)) tree = self.tree if tree: return tree.find(path) def abort(self, tx): """ Part of datamanager API. """ self.close() def tpc_begin(self, tx): """ Part of datamanager API. """ def commit(self, tx): """ Part of datamanager API. """ def tpc_vote(self, tx): """ Part of datamanager API. """ if not self.tree.dirty: # Nothing to do return # Write tree to db tree_oid = self.tree.save() if self.prev_commit: parents = [self.prev_commit] else: parents = [] commit_oid = self.mkcommit(tx, tree_oid, parents) # Acquire an exclusive (aka write) lock for merge. self.acquire_lock() # If this is initial commit, there's not really anything to merge if not self.prev_commit: # Make sure there haven't been other commits if os.path.exists(self.headref): # This was to be the initial commit, but somebody got to it # first No idea how to try to resolve that one. Luckily it # will be very rare. raise ConflictError() # New commit is new head self.next_commit = commit_oid return # Find the merge base current = _check_output( ['git', 'rev-list', '--max-count=1', 'HEAD'], cwd=self.db).strip() merge_base = _check_output( ['git', 'merge-base', current, commit_oid], cwd=self.db).strip() # If the merge base is the current commit, it means there have been no # intervening changes and we can just fast forward to the new commit. # This is the most common case. if merge_base == current: self.next_commit = commit_oid return # Darn it, now we have to actually try to merge self.merge(merge_base, current, tree_oid) self.next_commit = self.mkcommit( tx, self.tree.save(), [current, commit_oid], "Merge") def tpc_finish(self, tx): """ Part of datamanager API. """ if not self.tree.dirty: # Nothing to do return # Make our commit the new head if self.head == 'HEAD': # Use git reset to update current head args = ['git', 'reset', self.next_commit] if self.wd: args.append('--hard') cwd = self.wd else: args.append('--soft') cwd = self.db _check_output(args, cwd=cwd) else: # If not updating current head, just write the commit to the ref # file directly. reffile = os.path.join(self.db, 'refs', 'heads', self.head) with open(reffile, 'wb') as f: f.write(self.next_commit) f.write(b'\n') self.close() def tpc_abort(self, tx): """ Part of datamanager API. """ self.close() def sortKey(self): return self.name def close(self): self.closed = True self.release_lock() def acquire_lock(self): assert not self.lockfd self.lockfd = fd = os.open(self.lock_file, os.O_WRONLY | os.O_CREAT) fcntl.lockf(fd, fcntl.LOCK_EX) def release_lock(self): fd = self.lockfd if fd is not None: fcntl.lockf(fd, fcntl.LOCK_UN) os.close(fd) self.lockfd = None def mkcommit(self, tx, tree_oid, parents, message=None): # Prepare metadata for commit if not message: message = tx.description if not message: message = 'AcidFS transaction' gitenv = os.environ.copy() extension = tx._extension # "Official" API despite underscore user = extension.get('acidfs_user') if not user: user = extension.get('user') if not user: user = tx.user if user: if user.startswith(' '): user = user[1:] else: # strip Zope's "path" user = user.split(None, 1) if len(user) == 2: user = user[1] else: user = user[0] if user: gitenv['GIT_AUTHOR_NAME'] = gitenv['GIT_COMMITER_NAME'] = user email = extension.get('acidfs_email') if not email: email = extension.get('email') if email: gitenv['GIT_AUTHOR_EMAIL'] = gitenv['GIT_COMMITTER_EMAIL'] = \ gitenv['EMAIL'] = email # Write commit to db args = ['git', 'commit-tree', tree_oid, '-m', message] for parent in parents: args.append('-p') args.append(parent) return _check_output(args, cwd=self.db, env=gitenv).strip() def merge(self, base_oid, current, tree_oid): """ This attempts to interpret the output of 'git merge-tree', given the current head, the tree we're currently working on, and the nearest common ancestor commit (base_oid). I haven't found any documentation on the format of the output of 'git merge-tree' so this is basically reverse engineered from studying its output in different situations. I try to be as conservative as possible here and bail as soon as I hit anything I'm not 100% sure about. It is far preferable to raise a ConflictError than incorrectly merge. As such, the code below is peppered with assertions using the 'expect' function, which will raise a ConflictError if any of our expectations aren't met. I also attempt to log as much useful debug information as possible in the case of an unmet expectation, so I can go back and take into account more cases as they are encountered. The basic algorithm here is a finite state machine operating on the output of 'git merge-tree' one line at a time. This should be fairly memory efficient for even large changesets, with the caveat there may have been added a large binary file which contains few or no line break characters, which could cause a buffer to get large while scanning through the merge data. One might ask, why not use the porcelain 'git merge' command? One reason is, in the context of the two phase commit protocol, we'd rather do pretty much everything we possibly can in the voting stage, leaving ourselves with nothing to do in the finish phase except updating the head to the commit we just created, and possibly updating the working directory--operations that are guaranteed to work. Since 'git merge' will update the head, we'd prefer to do it during the final phase of the commit, but we can't guarantee it will work. There is not a convenient way to do a merge dry run during the voting phase. Although I can conceive of ways to do the merge during the voting phase and roll back to the previous head if we need to, that feels a little riskier. Doing the merge ourselves, here, also frees us from having to work with a working directory, required by the porcelain 'git merge' command. This means we can use bare repositories and/or have transactions that use a head other than the repositories 'current' head. In general, tranactions will be short and will not have much a of a chance to get very far behind the head, so merges will tend not to be terribly complicated. We should be able to handle the vast majority of cases here, even if there are some rare corner cases the porcelain command might be able to handle that we can't. I think that's a reasonable trade off for the flexibility this approach provides. """ with _popen(['git', 'merge-tree', base_oid, tree_oid, current], cwd=self.db, stdout=subprocess.PIPE) as proc: # Messy finite state machine state = None extra_state = None stream = proc.stdout line = stream.readline() def expect(expectation, *msg): if not expectation: # pragma no cover log.debug("Unmet expectation during merge.") log.debug(''.join(traceback.format_stack())) if msg: log.debug(msg[0], *msg[1:]) if extra_state: log.debug("Extra state: %s", extra_state) raise ConflictError() while line: if state is None: # default, scanning for start of a change if _isalpha(line[0]): # If first column is a letter, then we have the first # line of a change, which describes the change. line = line.strip() if line in (b'added in local', b'removed in local', b'removed in both'): # We don't care about changes to our current tree. # We already know about those. pass elif line == b'added in remote': # The head got a new file, we should grab it state = _MERGE_ADDED_IN_REMOTE extra_state = [] elif line == b'removed in remote': # File got deleted from head, remove it state = _MERGE_REMOVED_IN_REMOTE extra_state = [] elif line == b'changed in both': # File was edited in both branches, see if we can # patch state = _MERGE_CHANGED_IN_BOTH extra_state = [] elif line == b'added in both': state = _MERGE_ADDED_IN_BOTH extra_state = [] else: # pragma NO COVER log.debug("Don't know how to merge: %s", line) raise ConflictError() elif state is _MERGE_ADDED_IN_REMOTE: if _isalpha(line[0]) or line.startswith(b'@'): # Done collecting tree lines, only expecting one expect(len(extra_state) == 1, 'Wrong number of lines') whose, mode, oid, path = _parsetree(extra_state[0]) expect(whose == b'their', 'Unexpected whose: %s', whose) expect(mode == b'100644', 'Unexpected mode: %s', mode) parsed = path.decode('ascii').split('/') folder = self.find(parsed[:-1]) expect(isinstance(folder, _TreeNode), 'Not a folder: %s', path) folder.set(parsed[-1], (b'blob', oid, None)) state = extra_state = None continue else: extra_state.append(line) elif state is _MERGE_REMOVED_IN_REMOTE: if _isalpha(line[0]) or line.startswith(b'@'): # Done collecting tree lines, expect two, one for base, # one for our copy, whose sha1s should match expect(len(extra_state) == 2, 'Wrong number of lines') whose, mode, oid, path = _parsetree(extra_state[0]) expect(whose in (b'our', b'base'), 'Unexpected whose: %s', whose) expect(mode == b'100644', 'Unexpected mode: %s', mode) whose, mode, oid2, path2 = _parsetree(extra_state[1]) expect(whose in (b'our', b'base'), 'Unexpected whose: %s', whose) expect(mode == b'100644', 'Unexpected mode: %s', mode) expect(oid == oid2, "SHA1s don't match") expect(path == path2, "Paths don't match") path = path.decode('ascii').split('/') folder = self.find(path[:-1]) expect(isinstance(folder, _TreeNode), "Not a folder") folder.remove(path[-1]) state = extra_state = None continue else: extra_state.append(line) elif state is _MERGE_CHANGED_IN_BOTH: if line.startswith(b'@'): # Done collecting tree lines, expect three, one for base # and one for each copy expect(len(extra_state) == 3, 'Wrong number of lines') whose, mode, oid, path = _parsetree(extra_state[0]) expect(whose in (b'base', b'our', b'their'), 'Unexpected whose: %s', whose) expect(mode == b'100644', 'Unexpected mode: %s', mode) for extra_line in extra_state[1:]: whose, mode, oid2, path2 = _parsetree(extra_line) expect(whose in (b'base', b'our', b'their'), 'Unexpected whose: %s', whose) expect(mode == b'100644', 'Unexpected mode: %s', mode) expect(path == path2, "Paths don't match") parsed = path.decode('ascii').split('/') folder = self.find(parsed[:-1]) expect(isinstance(folder, _TreeNode), "Not a folder") name = parsed[-1] blob = folder.get(name) expect(isinstance(blob, _Blob), "Not a blob") with _tempfile() as tmp: shutil.copyfileobj(blob.open(), open(tmp, 'wb')) with _popen(['patch', '-s', tmp, '-'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p: f = p.stdin while line and not _isalpha(line[0]): if line[1:9] == b'<<<<<<< ': raise ConflictError() f.write(line) line = stream.readline() newblob = folder.new_blob(name, blob) shutil.copyfileobj(open(tmp, 'rb'), newblob) state = extra_state = None continue else: extra_state.append(line) elif state is _MERGE_ADDED_IN_BOTH: if _isalpha(line[0]) or line.startswith(b'@'): # Done collecting tree lines, expect two, one for base, # one for our copy, whose sha1s should match expect(len(extra_state) == 2, 'Wrong number of lines') whose, mode, oid, path = _parsetree(extra_state[0]) expect(whose in (b'our', b'their'), 'Unexpected whose: %s', whose) expect(mode == b'100644', 'Unexpected mode: %s', mode) whose, mode, oid2, path2 = _parsetree(extra_state[1]) expect(whose in (b'our', b'their'), 'Unexpected whose: %s', whose) expect(mode == b'100644', 'Unexpected mode: %s', mode) expect(path == path2, "Paths don't match") # Either it's the same file or a different file. if oid != oid2: # Different files, can't merge raise ConflictError() # Same file, nothing to do state = extra_state = None continue else: extra_state.append(line) line = stream.readline() class _TreeNode(object): parent = None name = None dirty = False oid = None @classmethod def read(cls, db, oid, path_encoding): node = cls(db, path_encoding) node.oid = oid contents = node.contents with _popen(['git', 'ls-tree', oid], stdout=subprocess.PIPE, cwd=db) as lstree: for line in lstree.stdout.readlines(): mode, type, oid, name = _parsetree(line) name = name.decode(path_encoding) oid = _s(oid) contents[name] = (type, oid, None) return node def __init__(self, db, path_encoding): self.db = db self.path_encoding = path_encoding self.contents = {} def get(self, name): contents = self.contents obj = contents.get(name) if not obj: return None type, oid, obj = obj assert type in (b'tree', b'blob') if not obj: if type == b'tree': obj = _TreeNode.read(self.db, oid, self.path_encoding) else: obj = _Blob(self.db, oid) obj.parent = self obj.name = name contents[name] = (type, oid, obj) return obj def find(self, path): if not path: return self obj = self.get(path[0]) if obj: return obj.find(path[1:]) def new_blob(self, name, prev): obj = _NewBlob(self.db, prev) obj.parent = self obj.name = name self.contents[name] = (b'blob', None, weakref.proxy(obj)) self.set_dirty() return obj def new_tree(self, name): node = _TreeNode(self.db, self.path_encoding) node.parent = self node.name = name self.contents[name] = (b'tree', None, node) self.set_dirty() return node def remove(self, name): entry = self.contents.pop(name) self.set_dirty() return entry def set(self, name, entry): self.contents[name] = entry self.set_dirty() def set_dirty(self): node = self while node and not node.dirty: node.oid = None node.dirty = True node = node.parent def save(self): # Recursively save children, first for name, (type, oid, obj) in list(self.contents.items()): if not obj: continue # Nothing to do if isinstance(obj, _NewBlob): raise ValueError("Cannot commit transaction with open files.") elif type == b'tree' and (obj.dirty or not oid): new_oid = obj.save() self.contents[name] = (b'tree', new_oid, None) # Save tree object out to database with _popen(['git', 'mktree'], cwd=self.db, stdin=subprocess.PIPE, stdout=subprocess.PIPE) as proc: for name, (type, oid, obj) in self.contents.items(): proc.stdin.write(b'100644' if type == b'blob' else b'040000') proc.stdin.write(b' ') proc.stdin.write(type) proc.stdin.write(b' ') proc.stdin.write(_b(oid)) proc.stdin.write(b'\t') proc.stdin.write(name.encode(self.path_encoding)) proc.stdin.write(b'\n') proc.stdin.close() oid = proc.stdout.read().strip() self.oid = _s(oid) return oid def empty(self): return not self.contents def __contains__(self, name): return name in self.contents def hash(self): if not self.oid: self.save() return self.oid def _parsetree(line): return line.strip().split(None, 3) class _Blob(object): def __init__(self, db, oid): self.db = db self.oid = oid def open(self): return _BlobStream(self.db, self.oid) def find(self, path): if not path: return self def hash(self): return self.oid class _NewBlob(io.RawIOBase): def __init__(self, db, prev): self.db = db self.prev = prev self.proc = subprocess.Popen( ['git', 'hash-object', '-w', '--stdin'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=db) def write(self, b): self.proc.stdin.write(b) return len(b) def close(self): if not self.closed: super(_NewBlob, self).close() self.proc.stdin.close() oid = self.proc.stdout.read().strip() self.proc.stdout.close() retcode = self.proc.wait() if retcode != 0: raise subprocess.CalledProcessError( retcode, 'git hash-object -w --stdin') self.parent.contents[self.name] = (b'blob', _s(oid), None) def writable(self): return True def open(self): if self.prev: return self.prev.open() raise _NoSuchFileOrDirectory(_object_path(self)) def hash(self): if self.prev: return self.prev.hash() raise _NoSuchFileOrDirectory(_object_path(self)) def find(self, path): if not path: return self class _BlobStream(io.RawIOBase): def __init__(self, db, oid): self.proc = subprocess.Popen( ['git', 'cat-file', 'blob', oid], stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=db) self.oid = oid def readable(self): return True def read(self, n=-1): return self.proc.stdout.read(n) def readinto(self, b): """ Although the documentation asserts a default implementation of this method can be found in io.RawIOBase, there actually isn't one:: http://docs.python.org/py3k/library/io.html#io.RawIOBase See:: http://bugs.python.org/issue9858 """ return self.proc.stdout.readinto(b) def close(self): if not self.closed: super(_BlobStream, self).close() self.proc.stdout.close() self.proc.stderr.close() retcode = self.proc.wait() if retcode != 0: raise subprocess.CalledProcessError( retcode, 'git cat-file blob %s' % self.oid) def _object_path(obj): path = [] node = obj while node.parent: path.insert(0, node.name) node = node.parent return '/'.join(path) @contextlib.contextmanager def _popen(args, **kw): proc = subprocess.Popen(args, **kw) yield proc for stream in (proc.stdin, proc.stdout, proc.stderr): if stream is not None: stream.close() retcode = proc.wait() if retcode != 0: raise subprocess.CalledProcessError(retcode, repr(args)) @contextlib.contextmanager def _tempfile(): fd, tmp = tempfile.mkstemp('.acidfs-merge') os.close(fd) yield tmp os.remove(tmp) def _NoSuchFileOrDirectory(path): return IOError(2, 'No such file or directory', path) def _IsADirectory(path): return IOError(21, 'Is a directory', path) def _NotADirectory(path): return IOError(20, 'Not a directory', path) def _FileExists(path): return IOError(17, 'File exists', path) def _DirectoryNotEmpty(path): return IOError(39, 'Directory not empty', path) _MERGE_ADDED_IN_REMOTE = object() _MERGE_REMOVED_IN_REMOTE = object() _MERGE_CHANGED_IN_BOTH = object() _MERGE_ADDED_IN_BOTH = object() try: # Python >= 2.7 _check_output = subprocess.check_output except AttributeError: # pragma NO COVER def _check_output(*popenargs, **kwargs): """ Stolen straight from Python 2.7. """ if 'stdout' in kwargs: raise ValueError( 'stdout argument not allowed, it will be overridden.') process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) output, unused_err = process.communicate() retcode = process.poll() if retcode: cmd = kwargs.get("args") if cmd is None: cmd = popenargs[0] raise subprocess.CalledProcessError(retcode, cmd, output=output) return output if sys.version_info[0] == 2: # pragma NO COVER _b = lambda s: s _s = lambda b: b _isalpha = lambda s: s.isalpha() else: # pragma NO COVER def _b(s): if isinstance(s, str): s = bytes(s, 'ascii') return s def _s(b): if isinstance(b, bytes): b = str(b, 'ascii') return b aa, zz, AA, ZZ = ord('a'), ord('z'), ord('A'), ord('Z') def _isalpha(b): return aa <= b <= zz or AA <= b <= ZZ