import contextlib
import fcntl
import io
import logging
import os
import shutil
import sys
import subprocess
import tempfile
import traceback
import transaction
import weakref
log = logging.getLogger(__name__)
[docs]class AcidFS(object):
"""
An instance of `AcidFS` exposes a transactional filesystem view of a `Git`
repository. Instances of `AcidFS` are not threadsafe and should not be
shared across threads, greenlets, etc.
**Paths**
Many methods take a `path` as an argument. All paths use forward slash `/`
as a separator, regardless of the path separator of the
underlying operating system. The path `/` represents the root folder of
the repository. Paths may be relative or absolute: paths beginning with a
`/` are absolute with respect to the repository root, paths not beginning
with a `/` are relative to the current working directory. The current
working directory always starts at the root of the repository. The current
working directory can be changed using the :meth:`chdir` and
:meth:`cd` methods.
**Constructor Arguments**
``repo``
The path to the repository in the real, local filesystem.
``head``
The name of a branch to use as the head for this transaction. Changes
made using this instance will be merged to the given head. The default,
if omitted, is to use the repository's current head.
``create``
If there is not a Git repository in the indicated directory, should one
be created? The default is `True`.
``bare``
If the Git repository is to be created, create it as a bare repository.
If the repository is already created or `create` is False, this argument
has no effect.
``user_name``
If the Git repository is to be created, set the user name for the
repository to this value. This is the same as creating the repository
and running `git config user.name "<user_name>"`.
``user_email``
If the Git repository is to be created, set the user email for the
repository to this value. This is the same as creating the repository
and running `git config user.email "<user_email>"`.
``name``
Name to be used as a sort key when ordering the various databases
(datamanagers in the parlance of the transaction package) during a
commit. It is exceedingly rare that you would need to use anything other
than the default, here.
``path_encoding``
Encode paths with this encoding. The default is `ascii`.
"""
session = None
_cwd = ()
def __init__(self, repo, head='HEAD', create=True, bare=False,
user_name=None, user_email=None, name='AcidFS',
path_encoding='ascii'):
wdpath = repo
dbpath = os.path.join(repo, '.git')
if not os.path.exists(dbpath):
wdpath = None
dbpath = repo
if not os.path.exists(os.path.join(dbpath, 'HEAD')):
if create:
args = ['git', 'init', repo]
if bare:
args.append('--bare')
else:
wdpath = repo
dbpath = os.path.join(repo, '.git')
_check_output(args)
if user_name:
args = ['git', 'config', 'user.name', user_name]
_check_output(args, cwd=dbpath)
if user_email:
args = ['git', 'config', 'user.email', user_email]
_check_output(args, cwd=dbpath)
args = ['git', 'config', 'core.quotepath', 'false']
_check_output(args, cwd=dbpath)
else:
raise ValueError('No database found in %s' % dbpath)
self.wd = wdpath
self.db = dbpath
self.head = head
self.name = name
self.path_encoding = path_encoding
def _session(self):
"""
Make sure we're in a session.
"""
if not self.session or self.session.closed:
self.session = _Session(self.wd, self.db, self.head, self.name,
self.path_encoding)
return self.session
def _mkpath(self, path):
if path == '.':
parsed = []
else:
parsed = list(filter(None, path.split('/')))
if not path.startswith('/'):
parsed = list(self._cwd) + parsed
return parsed
[docs] def get_base(self):
"""
Returns the id of the commit that is the current base for the
transaction.
"""
session = self._session()
return session.prev_commit
[docs] def set_base(self, commit):
"""
Sets the base commit for the current transaction. The `commit`
argument may be the SHA1 of a commit or the name of a reference (eg.
branch or tag). The current transaction must be clean. If any changes
have been made in the transaction, a ConflictError will be raised.
"""
session = self._session()
session.set_base(commit)
[docs] def cwd(self):
"""
Returns the path to the current working directory in the repository.
"""
return '/' + '/'.join(self._cwd)
[docs] def chdir(self, path):
"""
Change the current working directory in repository.
"""
session = self._session()
parsed = self._mkpath(path)
obj = session.find(parsed)
if not obj:
raise _NoSuchFileOrDirectory(path)
if not isinstance(obj, _TreeNode):
raise _NotADirectory(path)
self._cwd = parsed
[docs] @contextlib.contextmanager
def cd(self, path):
"""
A context manager that changes the current working directory only in
the scope of the 'with' context. Eg::
import acidfs
fs = acidfs.AcidFS('myrepo')
with fs.cd('some/folder'):
fs.open('a/file') # relative to /some/folder
fs.open('another/file') # relative to /
"""
prev = self._cwd
self.chdir(path)
yield
self._cwd = prev
[docs] def open(self, path, mode='r', buffering=-1, encoding=None, errors=None,
newline=None):
"""
Open a file for reading or writing.
Implements the semantics of the `open` function in Python's `io module
<http://docs.python.org/library/io.html#io.open>`_, which is the
default implementation `in Python 3
<http://docs.python.org/py3k/library/functions.html#open>`_. Opening a
file in text mode will return a file-like object which reads or writes
unicode strings, while opening a file in binary mode will return a
file-like object which reads or writes raw bytes.
Because the underlying implementation uses a pipe to a `Git` plumbing
command, opening for update (read and write) is not supported, nor is
seeking.
"""
session = self._session()
parsed = self._mkpath(path)
if 'b' in mode:
text = False
if 't' in mode:
raise ValueError("can't have text and binary mode at once")
else:
if not buffering:
raise ValueError("can't have unbuffered text I/O")
text = True
if '+' in mode:
raise ValueError("Read/write mode is not supported")
mode = mode.replace('b', '')
mode = mode.replace('t', '')
if mode == 'a':
mode = 'w'
append = True
else:
append = False
if mode == 'x':
mode = 'w'
exclusive = True
else:
exclusive = False
if buffering < 0:
buffer_size = io.DEFAULT_BUFFER_SIZE
line_buffering = False
elif buffering == 1:
buffer_size = io.DEFAULT_BUFFER_SIZE
line_buffering = True
else:
buffer_size = buffering
line_buffering = False
if mode == 'r':
obj = session.find(parsed)
if not obj:
raise _NoSuchFileOrDirectory(path)
if isinstance(obj, _TreeNode):
raise _IsADirectory(path)
stream = obj.open()
if buffering:
stream = io.BufferedReader(stream, buffer_size)
if text:
stream = io.TextIOWrapper(
stream, encoding, errors, newline, line_buffering)
return stream
elif mode == 'w':
if not parsed:
raise _IsADirectory(path)
name = parsed[-1]
dirpath = parsed[:-1]
obj = session.find(dirpath)
if not obj:
raise _NoSuchFileOrDirectory(path)
if not isinstance(obj, _TreeNode):
raise _NotADirectory(path)
prev = obj.get(name)
if isinstance(prev, _TreeNode):
raise _IsADirectory(path)
if prev and exclusive:
raise _FileExists(path)
blob = obj.new_blob(name, prev)
if append and prev:
shutil.copyfileobj(prev.open(), blob)
if buffering:
blob = io.BufferedWriter(blob, buffer_size)
if text:
blob = io.TextIOWrapper(
blob, encoding, errors, newline, line_buffering)
return blob
raise ValueError("Bad mode: %s" % mode)
[docs] def hash(self, path=''):
"""
Returns the sha1 hash of the object referred to by `path`. If `path` is
omitted the current working directory is used.
"""
session = self._session()
obj = session.find(self._mkpath(path))
if not obj:
raise _NoSuchFileOrDirectory(path)
return obj.hash()
[docs] def listdir(self, path=''):
"""
Return list of files in indicated directory. If `path` is omitted, the
current working directory is used.
"""
session = self._session()
obj = session.find(self._mkpath(path))
if not obj:
raise _NoSuchFileOrDirectory(path)
if not isinstance(obj, _TreeNode):
raise _NotADirectory(path)
return list(obj.contents.keys())
[docs] def mkdir(self, path):
"""
Create a new directory. The parent of the new directory must already
exist.
"""
session = self._session()
parsed = self._mkpath(path)
name = parsed[-1]
parent = session.find(parsed[:-1])
if not parent:
raise _NoSuchFileOrDirectory(path)
if not isinstance(parent, _TreeNode):
raise _NotADirectory(path)
if name in parent.contents:
raise _FileExists(path)
parent.new_tree(name)
[docs] def mkdirs(self, path):
"""
Create a new directory, including any ancestors which need to be created
in order to create the directory with the given `path`.
"""
session = self._session()
parsed = self._mkpath(path)
node = session.tree
for name in parsed:
next_node = node.get(name)
if not next_node:
next_node = node.new_tree(name)
elif not isinstance(next_node, _TreeNode):
raise _NotADirectory(path)
node = next_node
[docs] def rm(self, path):
"""
Remove a single file.
"""
session = self._session()
parsed = self._mkpath(path)
obj = session.find(parsed)
if not obj:
raise _NoSuchFileOrDirectory(path)
if isinstance(obj, _TreeNode):
raise _IsADirectory(path)
obj.parent.remove(obj.name)
[docs] def rmdir(self, path):
"""
Remove a single directory. The directory must be empty.
"""
session = self._session()
parsed = self._mkpath(path)
if not parsed:
raise ValueError("Can't remove root directory.")
obj = session.find(parsed)
if not obj:
raise _NoSuchFileOrDirectory(path)
if not isinstance(obj, _TreeNode):
raise _NotADirectory(path)
if not obj.empty():
raise _DirectoryNotEmpty(path)
obj.parent.remove(obj.name)
[docs] def rmtree(self, path):
"""
Remove a directory and any of its contents.
"""
session = self._session()
parsed = self._mkpath(path)
if not parsed:
raise ValueError("Can't remove root directory.")
obj = session.find(parsed)
if not obj:
raise _NoSuchFileOrDirectory(path)
if not isinstance(obj, _TreeNode):
raise _NotADirectory(path)
obj.parent.remove(obj.name)
[docs] def mv(self, src, dst):
"""
Move a file or directory from `src` path to `dst` path.
"""
session = self._session()
spath = self._mkpath(src)
if not spath:
raise _NoSuchFileOrDirectory(src)
sname = spath[-1]
sfolder = session.find(spath[:-1])
if not sfolder or not sname in sfolder:
raise _NoSuchFileOrDirectory(src)
dpath = self._mkpath(dst)
dobj = session.find(dpath)
if not dobj:
if dpath:
dname = dpath[-1]
dfolder = session.find(dpath[:-1])
if dfolder:
dfolder.set(dname, sfolder.remove(sname))
return
raise _NoSuchFileOrDirectory(dst)
if isinstance(dobj, _TreeNode):
dobj.set(sname, sfolder.remove(sname))
else:
dobj.parent.set(dobj.name, sfolder.remove(sname))
[docs] def exists(self, path):
"""
Returns boolean indicating whether a file or directory exists at the
given `path`.
"""
session = self._session()
return bool(session.find(self._mkpath(path)))
[docs] def isdir(self, path):
"""
Returns boolean indicating whether the given `path` is a directory.
"""
session = self._session()
return isinstance(session.find(self._mkpath(path)), _TreeNode)
[docs] def empty(self, path):
"""
Returns boolean indicating whether the directory indicated by `path` is
empty.
"""
session = self._session()
obj = session.find(self._mkpath(path))
if not obj:
raise _NoSuchFileOrDirectory(path)
if not isinstance(obj, _TreeNode):
raise _NotADirectory(path)
return obj.empty()
class ConflictError(Exception):
def __init__(self, msg='Unable to merge changes to repository.'):
super(ConflictError, self).__init__(msg)
class _Session(object):
closed = False
lockfd = None
def __init__(self, wd, db, head, name, path_encoding):
self.wd = wd
self.db = db
self.name = name
self.path_encoding = path_encoding
self.lock_file = os.path.join(db, 'acidfs.lock')
transaction.get().join(self)
curhead = open(os.path.join(db, 'HEAD')).read().strip()[16:]
if head == curhead:
head = 'HEAD'
if head == 'HEAD':
self.headref = os.path.join(db, 'refs', 'heads', curhead)
else:
self.headref = os.path.join(db, 'refs', 'heads', head)
self.head = head
if os.path.exists(self.headref):
# Existing head, get head revision
self.prev_commit = _check_output(
['git', 'rev-list', '--max-count=1', head], cwd=db).strip()
self.tree = _TreeNode.read(db, self.prev_commit, path_encoding)
else:
# New head, no commits yet
self.tree = _TreeNode(db, path_encoding) # empty tree
self.prev_commit = None
def set_base(self, ref):
if self.tree.dirty:
raise ConflictError(
"Cannot set base when changes already made in transaction.")
self.prev_commit = _check_output(
['git', 'rev-list', '--max-count=1', ref], cwd=self.db).strip()
self.tree = _TreeNode.read(self.db, self.prev_commit,
self.path_encoding)
def find(self, path):
assert isinstance(path, (list, tuple))
tree = self.tree
if tree:
return tree.find(path)
def abort(self, tx):
"""
Part of datamanager API.
"""
self.close()
def tpc_begin(self, tx):
"""
Part of datamanager API.
"""
def commit(self, tx):
"""
Part of datamanager API.
"""
def tpc_vote(self, tx):
"""
Part of datamanager API.
"""
if not self.tree.dirty:
# Nothing to do
return
# Write tree to db
tree_oid = self.tree.save()
if self.prev_commit:
parents = [self.prev_commit]
else:
parents = []
commit_oid = self.mkcommit(tx, tree_oid, parents)
# Acquire an exclusive (aka write) lock for merge.
self.acquire_lock()
# If this is initial commit, there's not really anything to merge
if not self.prev_commit:
# Make sure there haven't been other commits
if os.path.exists(self.headref):
# This was to be the initial commit, but somebody got to it
# first No idea how to try to resolve that one. Luckily it
# will be very rare.
raise ConflictError()
# New commit is new head
self.next_commit = commit_oid
return
# Find the merge base
current = _check_output(
['git', 'rev-list', '--max-count=1', 'HEAD'], cwd=self.db).strip()
merge_base = _check_output(
['git', 'merge-base', current, commit_oid], cwd=self.db).strip()
# If the merge base is the current commit, it means there have been no
# intervening changes and we can just fast forward to the new commit.
# This is the most common case.
if merge_base == current:
self.next_commit = commit_oid
return
# Darn it, now we have to actually try to merge
self.merge(merge_base, current, tree_oid)
self.next_commit = self.mkcommit(
tx, self.tree.save(), [current, commit_oid], "Merge")
def tpc_finish(self, tx):
"""
Part of datamanager API.
"""
if not self.tree.dirty:
# Nothing to do
return
# Make our commit the new head
if self.head == 'HEAD':
# Use git reset to update current head
args = ['git', 'reset', self.next_commit]
if self.wd:
args.append('--hard')
cwd = self.wd
else:
args.append('--soft')
cwd = self.db
_check_output(args, cwd=cwd)
else:
# If not updating current head, just write the commit to the ref
# file directly.
reffile = os.path.join(self.db, 'refs', 'heads', self.head)
with open(reffile, 'wb') as f:
f.write(self.next_commit)
f.write(b'\n')
self.close()
def tpc_abort(self, tx):
"""
Part of datamanager API.
"""
self.close()
def sortKey(self):
return self.name
def close(self):
self.closed = True
self.release_lock()
def acquire_lock(self):
assert not self.lockfd
self.lockfd = fd = os.open(self.lock_file, os.O_WRONLY | os.O_CREAT)
fcntl.lockf(fd, fcntl.LOCK_EX)
def release_lock(self):
fd = self.lockfd
if fd is not None:
fcntl.lockf(fd, fcntl.LOCK_UN)
os.close(fd)
self.lockfd = None
def mkcommit(self, tx, tree_oid, parents, message=None):
# Prepare metadata for commit
if not message:
message = tx.description
if not message:
message = 'AcidFS transaction'
gitenv = os.environ.copy()
extension = tx._extension # "Official" API despite underscore
user = extension.get('acidfs_user')
if not user:
user = extension.get('user')
if not user:
user = tx.user
if user:
if user.startswith(' '):
user = user[1:]
else:
# strip Zope's "path"
user = user.split(None, 1)
if len(user) == 2:
user = user[1]
else:
user = user[0]
if user:
gitenv['GIT_AUTHOR_NAME'] = gitenv['GIT_COMMITER_NAME'] = user
email = extension.get('acidfs_email')
if not email:
email = extension.get('email')
if email:
gitenv['GIT_AUTHOR_EMAIL'] = gitenv['GIT_COMMITTER_EMAIL'] = \
gitenv['EMAIL'] = email
# Write commit to db
args = ['git', 'commit-tree', tree_oid, '-m', message]
for parent in parents:
args.append('-p')
args.append(parent)
return _check_output(args, cwd=self.db, env=gitenv).strip()
def merge(self, base_oid, current, tree_oid):
"""
This attempts to interpret the output of 'git merge-tree', given the
current head, the tree we're currently working on, and the nearest
common ancestor commit (base_oid).
I haven't found any documentation on the format of the output of
'git merge-tree' so this is basically reverse engineered from studying
its output in different situations. I try to be as conservative as
possible here and bail as soon as I hit anything I'm not 100% sure
about. It is far preferable to raise a ConflictError than incorrectly
merge. As such, the code below is peppered with assertions using the
'expect' function, which will raise a ConflictError if any of our
expectations aren't met. I also attempt to log as much useful debug
information as possible in the case of an unmet expectation, so I can go
back and take into account more cases as they are encountered.
The basic algorithm here is a finite state machine operating on the
output of 'git merge-tree' one line at a time. This should be fairly
memory efficient for even large changesets, with the caveat there may
have been added a large binary file which contains few or no line break
characters, which could cause a buffer to get large while scanning
through the merge data.
One might ask, why not use the porcelain 'git merge' command? One
reason is, in the context of the two phase commit protocol, we'd rather
do pretty much everything we possibly can in the voting stage, leaving
ourselves with nothing to do in the finish phase except updating the
head to the commit we just created, and possibly updating the working
directory--operations that are guaranteed to work. Since 'git merge'
will update the head, we'd prefer to do it during the final phase of the
commit, but we can't guarantee it will work. There is not a convenient
way to do a merge dry run during the voting phase. Although I can
conceive of ways to do the merge during the voting phase and roll back
to the previous head if we need to, that feels a little riskier. Doing
the merge ourselves, here, also frees us from having to work with a
working directory, required by the porcelain 'git merge' command. This
means we can use bare repositories and/or have transactions that use
a head other than the repositories 'current' head.
In general, tranactions will be short and will not have much a of a
chance to get very far behind the head, so merges will tend not to be
terribly complicated. We should be able to handle the vast majority of
cases here, even if there are some rare corner cases the porcelain
command might be able to handle that we can't. I think that's a
reasonable trade off for the flexibility this approach provides.
"""
with _popen(['git', 'merge-tree', base_oid, tree_oid, current],
cwd=self.db, stdout=subprocess.PIPE) as proc:
# Messy finite state machine
state = None
extra_state = None
stream = proc.stdout
line = stream.readline()
def expect(expectation, *msg):
if not expectation: # pragma no cover
log.debug("Unmet expectation during merge.")
log.debug(''.join(traceback.format_stack()))
if msg:
log.debug(msg[0], *msg[1:])
if extra_state:
log.debug("Extra state: %s", extra_state)
raise ConflictError()
while line:
if state is None: # default, scanning for start of a change
if _isalpha(line[0]):
# If first column is a letter, then we have the first
# line of a change, which describes the change.
line = line.strip()
if line in (b'added in local', b'removed in local',
b'removed in both'):
# We don't care about changes to our current tree.
# We already know about those.
pass
elif line == b'added in remote':
# The head got a new file, we should grab it
state = _MERGE_ADDED_IN_REMOTE
extra_state = []
elif line == b'removed in remote':
# File got deleted from head, remove it
state = _MERGE_REMOVED_IN_REMOTE
extra_state = []
elif line == b'changed in both':
# File was edited in both branches, see if we can
# patch
state = _MERGE_CHANGED_IN_BOTH
extra_state = []
elif line == b'added in both':
state = _MERGE_ADDED_IN_BOTH
extra_state = []
else: # pragma NO COVER
log.debug("Don't know how to merge: %s", line)
raise ConflictError()
elif state is _MERGE_ADDED_IN_REMOTE:
if _isalpha(line[0]) or line.startswith(b'@'):
# Done collecting tree lines, only expecting one
expect(len(extra_state) == 1, 'Wrong number of lines')
whose, mode, oid, path = _parsetree(extra_state[0])
expect(whose == b'their', 'Unexpected whose: %s', whose)
expect(mode == b'100644', 'Unexpected mode: %s', mode)
parsed = path.decode('ascii').split('/')
folder = self.find(parsed[:-1])
expect(isinstance(folder, _TreeNode),
'Not a folder: %s', path)
folder.set(parsed[-1], (b'blob', oid, None))
state = extra_state = None
continue
else:
extra_state.append(line)
elif state is _MERGE_REMOVED_IN_REMOTE:
if _isalpha(line[0]) or line.startswith(b'@'):
# Done collecting tree lines, expect two, one for base,
# one for our copy, whose sha1s should match
expect(len(extra_state) == 2, 'Wrong number of lines')
whose, mode, oid, path = _parsetree(extra_state[0])
expect(whose in (b'our', b'base'),
'Unexpected whose: %s', whose)
expect(mode == b'100644', 'Unexpected mode: %s', mode)
whose, mode, oid2, path2 = _parsetree(extra_state[1])
expect(whose in (b'our', b'base'),
'Unexpected whose: %s', whose)
expect(mode == b'100644', 'Unexpected mode: %s', mode)
expect(oid == oid2, "SHA1s don't match")
expect(path == path2, "Paths don't match")
path = path.decode('ascii').split('/')
folder = self.find(path[:-1])
expect(isinstance(folder, _TreeNode), "Not a folder")
folder.remove(path[-1])
state = extra_state = None
continue
else:
extra_state.append(line)
elif state is _MERGE_CHANGED_IN_BOTH:
if line.startswith(b'@'):
# Done collecting tree lines, expect three, one for base
# and one for each copy
expect(len(extra_state) == 3, 'Wrong number of lines')
whose, mode, oid, path = _parsetree(extra_state[0])
expect(whose in (b'base', b'our', b'their'),
'Unexpected whose: %s', whose)
expect(mode == b'100644', 'Unexpected mode: %s', mode)
for extra_line in extra_state[1:]:
whose, mode, oid2, path2 = _parsetree(extra_line)
expect(whose in (b'base', b'our', b'their'),
'Unexpected whose: %s', whose)
expect(mode == b'100644', 'Unexpected mode: %s',
mode)
expect(path == path2, "Paths don't match")
parsed = path.decode('ascii').split('/')
folder = self.find(parsed[:-1])
expect(isinstance(folder, _TreeNode), "Not a folder")
name = parsed[-1]
blob = folder.get(name)
expect(isinstance(blob, _Blob), "Not a blob")
with _tempfile() as tmp:
shutil.copyfileobj(blob.open(), open(tmp, 'wb'))
with _popen(['patch', '-s', tmp, '-'],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE) as p:
f = p.stdin
while line and not _isalpha(line[0]):
if line[1:9] == b'<<<<<<< ':
raise ConflictError()
f.write(line)
line = stream.readline()
newblob = folder.new_blob(name, blob)
shutil.copyfileobj(open(tmp, 'rb'), newblob)
state = extra_state = None
continue
else:
extra_state.append(line)
elif state is _MERGE_ADDED_IN_BOTH:
if _isalpha(line[0]) or line.startswith(b'@'):
# Done collecting tree lines, expect two, one for base,
# one for our copy, whose sha1s should match
expect(len(extra_state) == 2, 'Wrong number of lines')
whose, mode, oid, path = _parsetree(extra_state[0])
expect(whose in (b'our', b'their'), 'Unexpected whose: %s',
whose)
expect(mode == b'100644', 'Unexpected mode: %s', mode)
whose, mode, oid2, path2 = _parsetree(extra_state[1])
expect(whose in (b'our', b'their'),
'Unexpected whose: %s', whose)
expect(mode == b'100644', 'Unexpected mode: %s', mode)
expect(path == path2, "Paths don't match")
# Either it's the same file or a different file.
if oid != oid2:
# Different files, can't merge
raise ConflictError()
# Same file, nothing to do
state = extra_state = None
continue
else:
extra_state.append(line)
line = stream.readline()
class _TreeNode(object):
parent = None
name = None
dirty = False
oid = None
@classmethod
def read(cls, db, oid, path_encoding):
node = cls(db, path_encoding)
node.oid = oid
contents = node.contents
with _popen(['git', 'ls-tree', oid],
stdout=subprocess.PIPE, cwd=db) as lstree:
for line in lstree.stdout.readlines():
mode, type, oid, name = _parsetree(line)
name = name.decode(path_encoding)
oid = _s(oid)
contents[name] = (type, oid, None)
return node
def __init__(self, db, path_encoding):
self.db = db
self.path_encoding = path_encoding
self.contents = {}
def get(self, name):
contents = self.contents
obj = contents.get(name)
if not obj:
return None
type, oid, obj = obj
assert type in (b'tree', b'blob')
if not obj:
if type == b'tree':
obj = _TreeNode.read(self.db, oid, self.path_encoding)
else:
obj = _Blob(self.db, oid)
obj.parent = self
obj.name = name
contents[name] = (type, oid, obj)
return obj
def find(self, path):
if not path:
return self
obj = self.get(path[0])
if obj:
return obj.find(path[1:])
def new_blob(self, name, prev):
obj = _NewBlob(self.db, prev)
obj.parent = self
obj.name = name
self.contents[name] = (b'blob', None, weakref.proxy(obj))
self.set_dirty()
return obj
def new_tree(self, name):
node = _TreeNode(self.db, self.path_encoding)
node.parent = self
node.name = name
self.contents[name] = (b'tree', None, node)
self.set_dirty()
return node
def remove(self, name):
entry = self.contents.pop(name)
self.set_dirty()
return entry
def set(self, name, entry):
self.contents[name] = entry
self.set_dirty()
def set_dirty(self):
node = self
while node and not node.dirty:
node.oid = None
node.dirty = True
node = node.parent
def save(self):
# Recursively save children, first
for name, (type, oid, obj) in list(self.contents.items()):
if not obj:
continue # Nothing to do
if isinstance(obj, _NewBlob):
raise ValueError("Cannot commit transaction with open files.")
elif type == b'tree' and (obj.dirty or not oid):
new_oid = obj.save()
self.contents[name] = (b'tree', new_oid, None)
# Save tree object out to database
with _popen(['git', 'mktree'], cwd=self.db,
stdin=subprocess.PIPE, stdout=subprocess.PIPE) as proc:
for name, (type, oid, obj) in self.contents.items():
proc.stdin.write(b'100644' if type == b'blob' else b'040000')
proc.stdin.write(b' ')
proc.stdin.write(type)
proc.stdin.write(b' ')
proc.stdin.write(_b(oid))
proc.stdin.write(b'\t')
proc.stdin.write(name.encode(self.path_encoding))
proc.stdin.write(b'\n')
proc.stdin.close()
oid = proc.stdout.read().strip()
self.oid = _s(oid)
return oid
def empty(self):
return not self.contents
def __contains__(self, name):
return name in self.contents
def hash(self):
if not self.oid:
self.save()
return self.oid
def _parsetree(line):
return line.strip().split(None, 3)
class _Blob(object):
def __init__(self, db, oid):
self.db = db
self.oid = oid
def open(self):
return _BlobStream(self.db, self.oid)
def find(self, path):
if not path:
return self
def hash(self):
return self.oid
class _NewBlob(io.RawIOBase):
def __init__(self, db, prev):
self.db = db
self.prev = prev
self.proc = subprocess.Popen(
['git', 'hash-object', '-w', '--stdin'],
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, cwd=db)
def write(self, b):
self.proc.stdin.write(b)
return len(b)
def close(self):
if not self.closed:
super(_NewBlob, self).close()
self.proc.stdin.close()
oid = self.proc.stdout.read().strip()
self.proc.stdout.close()
retcode = self.proc.wait()
if retcode != 0:
raise subprocess.CalledProcessError(
retcode, 'git hash-object -w --stdin')
self.parent.contents[self.name] = (b'blob', _s(oid), None)
def writable(self):
return True
def open(self):
if self.prev:
return self.prev.open()
raise _NoSuchFileOrDirectory(_object_path(self))
def hash(self):
if self.prev:
return self.prev.hash()
raise _NoSuchFileOrDirectory(_object_path(self))
def find(self, path):
if not path:
return self
class _BlobStream(io.RawIOBase):
def __init__(self, db, oid):
self.proc = subprocess.Popen(
['git', 'cat-file', 'blob', oid],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=db)
self.oid = oid
def readable(self):
return True
def read(self, n=-1):
return self.proc.stdout.read(n)
def readinto(self, b):
"""
Although the documentation asserts a default implementation of this
method can be found in io.RawIOBase, there actually isn't one::
http://docs.python.org/py3k/library/io.html#io.RawIOBase
See::
http://bugs.python.org/issue9858
"""
return self.proc.stdout.readinto(b)
def close(self):
if not self.closed:
super(_BlobStream, self).close()
self.proc.stdout.close()
self.proc.stderr.close()
retcode = self.proc.wait()
if retcode != 0:
raise subprocess.CalledProcessError(
retcode, 'git cat-file blob %s' % self.oid)
def _object_path(obj):
path = []
node = obj
while node.parent:
path.insert(0, node.name)
node = node.parent
return '/'.join(path)
@contextlib.contextmanager
def _popen(args, **kw):
proc = subprocess.Popen(args, **kw)
yield proc
for stream in (proc.stdin, proc.stdout, proc.stderr):
if stream is not None:
stream.close()
retcode = proc.wait()
if retcode != 0:
raise subprocess.CalledProcessError(retcode, repr(args))
@contextlib.contextmanager
def _tempfile():
fd, tmp = tempfile.mkstemp('.acidfs-merge')
os.close(fd)
yield tmp
os.remove(tmp)
def _NoSuchFileOrDirectory(path):
return IOError(2, 'No such file or directory', path)
def _IsADirectory(path):
return IOError(21, 'Is a directory', path)
def _NotADirectory(path):
return IOError(20, 'Not a directory', path)
def _FileExists(path):
return IOError(17, 'File exists', path)
def _DirectoryNotEmpty(path):
return IOError(39, 'Directory not empty', path)
_MERGE_ADDED_IN_REMOTE = object()
_MERGE_REMOVED_IN_REMOTE = object()
_MERGE_CHANGED_IN_BOTH = object()
_MERGE_ADDED_IN_BOTH = object()
try:
# Python >= 2.7
_check_output = subprocess.check_output
except AttributeError: # pragma NO COVER
def _check_output(*popenargs, **kwargs):
"""
Stolen straight from Python 2.7.
"""
if 'stdout' in kwargs:
raise ValueError(
'stdout argument not allowed, it will be overridden.')
process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
output, unused_err = process.communicate()
retcode = process.poll()
if retcode:
cmd = kwargs.get("args")
if cmd is None:
cmd = popenargs[0]
raise subprocess.CalledProcessError(retcode, cmd, output=output)
return output
if sys.version_info[0] == 2: # pragma NO COVER
_b = lambda s: s
_s = lambda b: b
_isalpha = lambda s: s.isalpha()
else: # pragma NO COVER
def _b(s):
if isinstance(s, str):
s = bytes(s, 'ascii')
return s
def _s(b):
if isinstance(b, bytes):
b = str(b, 'ascii')
return b
aa, zz, AA, ZZ = ord('a'), ord('z'), ord('A'), ord('Z')
def _isalpha(b):
return aa <= b <= zz or AA <= b <= ZZ