Finish, I believe, issue #353 ("Remote Subversion repositories not

fully honoring authz rules").  In doing so, this makes the svn_ra
module the most accurate it has ever been, hopefully without too
terribly much extra cost.

* lib/vclib/svn/svn_ra.py
  (RemoteSubversionRepository.listdir): No longer check access here.
    That's handled down in _get_dirents() now.
  (RemoteSubversionRepository.dirlogs): Don't check dirent access here.
    Just ensure that the returned entries are those which are
    represented in the (filtered) set return from _get_dirents().
    Also, replace the revision metadata stored on each dirent with
    values from the revinfo cache (which does authz sanitizing).
  (RemoteSubversionRepository._get_dirents): Do authz-checking of
    dirents here, and use _get_last_history_rev() to populate a useful
    created_rev.
  (RemoteSubversionRepository._get_last_history_rev): Because
    Subversion's RA layer doesn't consider copy events when
    determining an item's last-committed-rev, compensate for this with
    a bounded log operation which does.  This brings created-rev
    parity with the svn_repos module (at the cost of the extra RA
    work, but with the benefit of, you know, accuracy.
  (RemoteSubversionRepository._revinfo_raw): Leave a TODO about a
    future optimization.
  (RemoteSubversionRepository._log_cb): Set found_unreadable when
    sanitizing an unreadable copyfrom path, too.
  (RemoteSubversionRepository.created_rev): Now just a thin wrapper
    around the beefed-up _get_last_history_rev() function.


git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@2760 8cb11bc2-c004-0410-86c3-e597b4017df7
trunk
cmpilato 2012-06-15 23:34:01 +00:00
parent ada79bc6e7
commit fee21df5d7
1 changed files with 60 additions and 23 deletions

View File

@ -268,7 +268,7 @@ class RemoteSubversionRepository(vclib.Repository):
if self.itemtype(path_parts, rev) != vclib.DIR: # does auth-check
raise vclib.Error("Path '%s' is not a directory." % path)
rev = self._getrev(rev)
entries = [ ]
entries = []
dirents, locks = self._get_dirents(path, rev)
for name in dirents.keys():
entry = dirents[name]
@ -276,8 +276,9 @@ class RemoteSubversionRepository(vclib.Repository):
kind = vclib.DIR
elif entry.kind == core.svn_node_file:
kind = vclib.FILE
if vclib.check_path_access(self, path_parts + [name], kind, rev):
entries.append(vclib.DirEntry(name, kind))
else:
kind = None
entries.append(vclib.DirEntry(name, kind))
return entries
def dirlogs(self, path_parts, rev, entries, options):
@ -288,9 +289,11 @@ class RemoteSubversionRepository(vclib.Repository):
dirents, locks = self._get_dirents(path, rev)
for entry in entries:
entry_path_parts = path_parts + [entry.name]
if not vclib.check_path_access(self, entry_path_parts, entry.kind, rev):
dirent = dirents.get(entry.name, None)
# dirents is authz-sanitized, so ensure the entry is found therein.
if dirent is None:
continue
dirent = dirents[entry.name]
# Get authz-sanitized revision metadata.
entry.date, entry.author, entry.log, revprops, changes = \
self.revinfo(dirent.created_rev)
entry.rev = str(dirent.created_rev)
@ -333,6 +336,10 @@ class RemoteSubversionRepository(vclib.Repository):
revs.sort()
prev = None
for rev in revs:
# Swap out revision info with stuff from the cache (which is
# authz-sanitized).
rev.date, rev.author, rev.log, revprops, changes \
= self.revinfo(rev.number)
rev.prev = prev
prev = rev
revs.reverse()
@ -452,32 +459,73 @@ class RemoteSubversionRepository(vclib.Repository):
def _get_dirents(self, path, rev):
"""Return a 2-type of dirents and locks, possibly reading/writing
from a local cache of that information."""
from a local cache of that information. This functions performs
authz checks, stripping out unreadable dirents."""
dir_url = self._geturl(path)
if path:
key = str(rev) + '/' + path
else:
key = str(rev)
# Ensure that the cache gets filled...
dirents_locks = self._dirent_cache.get(key)
if not dirents_locks:
dirents, locks = list_directory(dir_url, _rev2optrev(rev),
_rev2optrev(rev), 0, self.ctx)
tmp_dirents, locks = list_directory(dir_url, _rev2optrev(rev),
_rev2optrev(rev), 0, self.ctx)
dirents = {}
for name, dirent in tmp_dirents.items():
dirent_parts = _path_parts(path) + [name]
kind = dirent.kind
if (kind == core.svn_node_dir or kind == core.svn_node_file) \
and vclib.check_path_access(self, dirent_parts,
kind == core.svn_node_dir \
and vclib.DIR or vclib.FILE, rev):
dirent.created_rev = self._get_last_history_rev(dirent_parts, rev)
dirents[name] = dirent
dirents_locks = [dirents, locks]
self._dirent_cache[key] = dirents_locks
# ...then return the goodies from the cache.
return dirents_locks[0], dirents_locks[1]
def _get_last_history_rev(self, path_parts, rev):
"""Return the last interesting revision equal to or older than REV
in the history of PATH_PARTS."""
path = self._getpath(path_parts)
url = self._geturl(self._getpath(path_parts))
optrev = _rev2optrev(rev)
# Get the last-changed-rev.
revisions = []
def _info_cb(path, info, pool, retval=revisions):
revisions.append(info.last_changed_rev)
client.svn_client_info(url, optrev, optrev, _info_cb, 0, self.ctx)
return revisions[0]
last_changed_rev = revisions[0]
# Now, this object might not have been directly edited since the
# last-changed-rev, but it might have been the child of a copy.
# To determine this, we'll run a potentially no-op log between
# LAST_CHANGED_REV and REV.
lc = LogCollector(path, 1, None, None)
client_log(url, optrev, _rev2optrev(last_changed_rev), 1, 0,
lc.add_log, self.ctx)
revs = lc.logs
if revs:
revs.sort()
return revs[0].number
else:
return last_changed_rev
def _revinfo_raw(self, rev):
# return 5-tuple (date, author, msg, revprops, changes)
### TODO: This function and its related cache would benefit from
### optimizations such as what the matching svn_repos functions
### have, where the 'changes' information is only fully
### calculated/authz-sanitized when the caller actually needs it.
optrev = _rev2optrev(rev)
revs = []
@ -542,10 +590,11 @@ class RemoteSubversionRepository(vclib.Repository):
if vclib.check_path_access(self, parts, vclib.FILE, revision):
if is_copy and base_path and (base_path != path):
parts = _path_parts(base_path)
if vclib.check_path_access(self, parts, vclib.FILE, base_rev):
if not vclib.check_path_access(self, parts, vclib.FILE, base_rev):
is_copy = 0
base_path = None
base_rev = None
found_unreadable = 1
changes.append(SVNChangedPath(path, revision, pathtype, base_path,
base_rev, action, is_copy,
text_modified, props_modified))
@ -588,19 +637,7 @@ class RemoteSubversionRepository(vclib.Repository):
return old_path
def created_rev(self, path, rev):
# NOTE: We can't use svn_client_propget here because the
# interfaces in that layer strip out the properties not meant for
# human consumption (such as svn:entry:committed-rev, which we are
# using here to get the created revision of PATH@REV).
kind = ra.svn_ra_check_path(self.ra_session, path, rev)
if kind == core.svn_node_none:
raise vclib.ItemNotFound(_path_parts(path))
elif kind == core.svn_node_dir:
props = get_directory_props(self.ra_session, path, rev)
elif kind == core.svn_node_file:
fetched_rev, props = ra.svn_ra_get_file(self.ra_session, path, rev, None)
return int(props.get(core.SVN_PROP_ENTRY_COMMITTED_REV,
SVN_INVALID_REVNUM))
return self._get_last_history_rev(_path_parts(path), rev)
def last_rev(self, path, peg_revision, limit_revision=None):
"""Given PATH, known to exist in PEG_REVISION, find the youngest