viewvc-4intranet/lib/blame.py

593 lines
20 KiB
Python
Raw Normal View History

#!/usr/local/bin/python
# -*-python-*-
#
# Copyright (C) 2000-2002 The ViewCVS Group. All Rights Reserved.
# Copyright (C) 2000 Curt Hagenlocher <curt@hagenlocher.org>
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE.html file which can be found at the top level of the ViewCVS
# distribution or at http://viewcvs.sourceforge.net/license-1.html.
#
# Contact information:
# Greg Stein, PO Box 760, Palo Alto, CA, 94302
# gstein@lyra.org, http://viewcvs.sourceforge.net/
#
# -----------------------------------------------------------------------
#
# blame.py: Annotate each line of a CVS file with its author,
# revision #, date, etc.
#
# -----------------------------------------------------------------------
#
# This software is being maintained as part of the ViewCVS project.
# Information is available at:
# http://viewcvs.sourceforge.net/
#
# This file is based on the cvsblame.pl portion of the Bonsai CVS tool,
# developed by Steve Lamm for Netscape Communications Corporation. More
# information about Bonsai can be found at
# http://www.mozilla.org/bonsai.html
#
# cvsblame.pl, in turn, was based on Scott Furman's cvsblame script
#
# -----------------------------------------------------------------------
#
import string
import os
import re
import time
import math
import cgi
import vclib
from vclib.ccvs import rcsparse
class CVSParser(rcsparse.Sink):
# Precompiled regular expressions
trunk_rev = re.compile('^[0-9]+\\.[0-9]+$')
last_branch = re.compile('(.*)\\.[0-9]+')
is_branch = re.compile('^(.*)\\.0\\.([0-9]+)$')
d_command = re.compile('^d(\d+)\\s(\\d+)')
a_command = re.compile('^a(\d+)\\s(\\d+)')
SECONDS_PER_DAY = 86400
def __init__(self):
self.Reset()
def Reset(self):
self.last_revision = {}
self.prev_revision = {}
self.revision_date = {}
self.revision_author = {}
self.revision_branches = {}
self.next_delta = {}
self.prev_delta = {}
self.tag_revision = {}
self.timestamp = {}
self.revision_ctime = {}
self.revision_age = {}
self.revision_log = {}
self.revision_deltatext = {}
self.revision_map = [] # map line numbers to revisions
self.lines_added = {}
self.lines_removed = {}
# Map a tag to a numerical revision number. The tag can be a symbolic
# branch tag, a symbolic revision tag, or an ordinary numerical
# revision number.
def map_tag_to_revision(self, tag_or_revision):
try:
revision = self.tag_revision[tag_or_revision]
match = self.is_branch.match(revision)
if match:
branch = match.group(1) + '.' + match.group(2)
if self.last_revision.get(branch):
return self.last_revision[branch]
else:
return match.group(1)
else:
return revision
except:
return ''
# Construct an ordered list of ancestor revisions to the given
# revision, starting with the immediate ancestor and going back
# to the primordial revision (1.1).
#
# Note: The generated path does not traverse the tree the same way
# that the individual revision deltas do. In particular,
# the path traverses the tree "backwards" on branches.
def ancestor_revisions(self, revision):
ancestors = []
revision = self.prev_revision.get(revision)
while revision:
ancestors.append(revision)
revision = self.prev_revision.get(revision)
return ancestors
# Split deltatext specified by rev to each line.
def deltatext_split(self, rev):
lines = string.split(self.revision_deltatext[rev], '\n')
if lines[-1] == '':
del lines[-1]
return lines
# Extract the given revision from the digested RCS file.
# (Essentially the equivalent of cvs up -rXXX)
def extract_revision(self, revision):
path = []
add_lines_remaining = 0
start_line = 0
count = 0
while revision:
path.append(revision)
revision = self.prev_delta.get(revision)
path.reverse()
path = path[1:] # Get rid of head revision
text = self.deltatext_split(self.head_revision)
# Iterate, applying deltas to previous revision
for revision in path:
adjust = 0
diffs = self.deltatext_split(revision)
self.lines_added[revision] = 0
self.lines_removed[revision] = 0
lines_added_now = 0
lines_removed_now = 0
for command in diffs:
dmatch = self.d_command.match(command)
amatch = self.a_command.match(command)
if add_lines_remaining > 0:
# Insertion lines from a prior "a" command
text.insert(start_line + adjust, command)
add_lines_remaining = add_lines_remaining - 1
adjust = adjust + 1
elif dmatch:
# "d" - Delete command
start_line = string.atoi(dmatch.group(1))
count = string.atoi(dmatch.group(2))
begin = start_line + adjust - 1
del text[begin:begin + count]
adjust = adjust - count
lines_removed_now = lines_removed_now + count
elif amatch:
# "a" - Add command
start_line = string.atoi(amatch.group(1))
count = string.atoi(amatch.group(2))
add_lines_remaining = count
lines_added_now = lines_added_now + count
else:
raise RuntimeError, 'Error parsing diff commands'
self.lines_added[revision] = self.lines_added[revision] + lines_added_now
self.lines_removed[revision] = self.lines_removed[revision] + lines_removed_now
return text
def set_head_revision(self, revision):
self.head_revision = revision
def set_principal_branch(self, branch_name):
self.principal_branch = branch_name
def define_tag(self, name, revision):
# Create an associate array that maps from tag name to
# revision number and vice-versa.
self.tag_revision[name] = revision
def set_comment(self, comment):
self.file_description = comment
def set_description(self, description):
self.rcs_file_description = description
# Construct dicts that represent the topology of the RCS tree
# and other arrays that contain info about individual revisions.
#
# The following dicts are created, keyed by revision number:
# self.revision_date -- e.g. "96.02.23.00.21.52"
# self.timestamp -- seconds since 12:00 AM, Jan 1, 1970 GMT
# self.revision_author -- e.g. "tom"
# self.revision_branches -- descendant branch revisions, separated by spaces,
# e.g. "1.21.4.1 1.21.2.6.1"
# self.prev_revision -- revision number of previous *ancestor* in RCS tree.
# Traversal of this array occurs in the direction
# of the primordial (1.1) revision.
# self.prev_delta -- revision number of previous revision which forms
# the basis for the edit commands in this revision.
# This causes the tree to be traversed towards the
# trunk when on a branch, and towards the latest trunk
# revision when on the trunk.
# self.next_delta -- revision number of next "delta". Inverts prev_delta.
#
# Also creates self.last_revision, keyed by a branch revision number, which
# indicates the latest revision on a given branch,
# e.g. self.last_revision{"1.2.8"} == 1.2.8.5
def define_revision(self, revision, timestamp, author, state,
branches, next):
self.tag_revision[revision] = revision
branch = self.last_branch.match(revision).group(1)
self.last_revision[branch] = revision
#self.revision_date[revision] = date
self.timestamp[revision] = timestamp
# Pretty print the date string
ltime = time.localtime(self.timestamp[revision])
formatted_date = time.strftime("%d %b %Y %H:%M", ltime)
self.revision_ctime[revision] = formatted_date
# Save age
self.revision_age[revision] = ((time.time() - self.timestamp[revision])
/ self.SECONDS_PER_DAY)
# save author
self.revision_author[revision] = author
# ignore the state
# process the branch information
branch_text = ''
for branch in branches:
self.prev_revision[branch] = revision
self.next_delta[revision] = branch
self.prev_delta[branch] = revision
branch_text = branch_text + branch + ''
self.revision_branches[revision] = branch_text
# process the "next revision" information
if next:
self.next_delta[revision] = next
self.prev_delta[next] = revision
is_trunk_revision = self.trunk_rev.match(revision) is not None
if is_trunk_revision:
self.prev_revision[revision] = next
else:
self.prev_revision[next] = revision
# Construct associative arrays containing info about individual revisions.
#
# The following associative arrays are created, keyed by revision number:
# revision_log -- log message
# revision_deltatext -- Either the complete text of the revision,
# in the case of the head revision, or the
# encoded delta between this revision and another.
# The delta is either with respect to the successor
# revision if this revision is on the trunk or
# relative to its immediate predecessor if this
# revision is on a branch.
def set_revision_info(self, revision, log, text):
self.revision_log[revision] = log
self.revision_deltatext[revision] = text
def parse_cvs_file(self, rcs_pathname, opt_rev = None, opt_m_timestamp = None):
# Args in: opt_rev - requested revision
# opt_m - time since modified
# Args out: revision_map
# timestamp
# revision_deltatext
# CheckHidden(rcs_pathname)
try:
rcsfile = open(rcs_pathname, 'rb')
except:
raise RuntimeError, ('error: %s appeared to be under CVS control, ' +
'but the RCS file is inaccessible.') % rcs_pathname
rcsparse.Parser().parse(rcsfile, self)
rcsfile.close()
if opt_rev in [None, '', 'HEAD']:
# Explicitly specified topmost revision in tree
revision = self.head_revision
else:
# Symbolic tag or specific revision number specified.
revision = self.map_tag_to_revision(opt_rev)
if revision == '':
raise RuntimeError, 'error: -r: No such revision: ' + opt_rev
# The primordial revision is not always 1.1! Go find it.
primordial = revision
while self.prev_revision.get(primordial):
primordial = self.prev_revision[primordial]
# Don't display file at all, if -m option is specified and no
# changes have been made in the specified file.
if opt_m_timestamp and self.timestamp[revision] < opt_m_timestamp:
return ''
# Figure out how many lines were in the primordial, i.e. version 1.1,
# check-in by moving backward in time from the head revision to the
# first revision.
line_count = 0
if self.revision_deltatext.get(self.head_revision):
tmp_array = self.deltatext_split(self.head_revision)
line_count = len(tmp_array)
skip = 0
rev = self.prev_revision.get(self.head_revision)
while rev:
diffs = self.deltatext_split(rev)
for command in diffs:
dmatch = self.d_command.match(command)
amatch = self.a_command.match(command)
if skip > 0:
# Skip insertion lines from a prior "a" command
skip = skip - 1
elif dmatch:
# "d" - Delete command
start_line = string.atoi(dmatch.group(1))
count = string.atoi(dmatch.group(2))
line_count = line_count - count
elif amatch:
# "a" - Add command
start_line = string.atoi(amatch.group(1))
count = string.atoi(amatch.group(2))
skip = count
line_count = line_count + count
else:
raise RuntimeError, 'error: illegal RCS file'
rev = self.prev_revision.get(rev)
# Now, play the delta edit commands *backwards* from the primordial
# revision forward, but rather than applying the deltas to the text of
# each revision, apply the changes to an array of revision numbers.
# This creates a "revision map" -- an array where each element
# represents a line of text in the given revision but contains only
# the revision number in which the line was introduced rather than
# the line text itself.
#
# Note: These are backward deltas for revisions on the trunk and
# forward deltas for branch revisions.
# Create initial revision map for primordial version.
self.revision_map = [primordial] * line_count
ancestors = [revision, ] + self.ancestor_revisions(revision)
ancestors = ancestors[:-1] # Remove "1.1"
last_revision = primordial
ancestors.reverse()
for revision in ancestors:
is_trunk_revision = self.trunk_rev.match(revision) is not None
if is_trunk_revision:
diffs = self.deltatext_split(last_revision)
# Revisions on the trunk specify deltas that transform a
# revision into an earlier revision, so invert the translation
# of the 'diff' commands.
for command in diffs:
if skip > 0:
skip = skip - 1
else:
dmatch = self.d_command.match(command)
amatch = self.a_command.match(command)
if dmatch:
start_line = string.atoi(dmatch.group(1))
count = string.atoi(dmatch.group(2))
temp = []
while count > 0:
temp.append(revision)
count = count - 1
self.revision_map = (self.revision_map[:start_line - 1] +
temp + self.revision_map[start_line - 1:])
elif amatch:
start_line = string.atoi(amatch.group(1))
count = string.atoi(amatch.group(2))
del self.revision_map[start_line:start_line + count]
skip = count
else:
raise RuntimeError, 'Error parsing diff commands'
else:
# Revisions on a branch are arranged backwards from those on
# the trunk. They specify deltas that transform a revision
# into a later revision.
adjust = 0
diffs = self.deltatext_split(revision)
for command in diffs:
if skip > 0:
skip = skip - 1
else:
dmatch = self.d_command.match(command)
amatch = self.a_command.match(command)
if dmatch:
start_line = string.atoi(dmatch.group(1))
count = string.atoi(dmatch.group(2))
del self.revision_map[start_line + adjust - 1:start_line + adjust - 1 + count]
adjust = adjust - count
elif amatch:
start_line = string.atoi(amatch.group(1))
count = string.atoi(amatch.group(2))
skip = count
temp = []
while count > 0:
temp.append(revision)
count = count - 1
self.revision_map = (self.revision_map[:start_line + adjust] +
temp + self.revision_map[start_line + adjust:])
adjust = adjust + skip
else:
raise RuntimeError, 'Error parsing diff commands'
last_revision = revision
return revision
re_includes = re.compile('\\#(\\s*)include(\\s*)"(.*?)"')
def link_includes(text, repos, path_parts, include_url):
match = re_includes.match(text)
if match:
incfile = match.group(3)
# check current directory and parent directory for file
for depth in (-1, -2):
include_path = path_parts[:depth] + [incfile]
try:
repos.rcsfile(include_path) # will throw if path doesn't exist
break
except vclib.ItemNotFound:
pass
else:
include_path = None
if include_path:
url = string.replace(include_url, '/WHERE/',
string.join(include_path, '/'))
return '#%sinclude%s"<a href="%s">%s</a>"' % \
(match.group(1), match.group(2), url, incfile)
return text
class BlameSource:
def __init__(self, repos, path_parts, diff_url, include_url, opt_rev = None):
# Parse the CVS file
parser = CVSParser()
revision = parser.parse_cvs_file(repos.rcsfile(path_parts, 1), opt_rev)
count = len(parser.revision_map)
lines = parser.extract_revision(revision)
if len(lines) != count:
raise RuntimeError, 'Internal consistency error'
# set up some state variables
self.repos = repos
self.path_parts = path_parts
self.diff_url = diff_url
self.include_url = include_url
self.revision = revision
self.lines = lines
self.num_lines = count
self.parser = parser
# keep track of where we are during an iteration
self.idx = -1
self.last = None
def __getitem__(self, idx):
if idx == self.idx:
return self.last
if idx >= self.num_lines:
raise IndexError("No more annotations")
if idx != self.idx + 1:
raise BlameSequencingError()
# Get the line, escape HTML, and (maybe) add include links.
rev = self.parser.revision_map[idx]
prev_rev = self.parser.prev_revision.get(rev)
line_number = idx + 1
author = self.parser.revision_author[rev]
diff_url = None
if prev_rev:
diff_url = '%sr1=%s&amp;r2=%s' % (self.diff_url, prev_rev, rev)
thisline = self.lines[idx]
thisline = cgi.escape(thisline)
if 1: #cfg.options.blame_link_includes:
thisline = link_includes(thisline, self.repos, self.path_parts,
self.include_url)
item = _item(text=thisline, line_number=line_number, rev=rev,
prev_rev=prev_rev, diff_url=diff_url, author=author)
self.last = item
self.idx = idx
return item
class BlameSequencingError(Exception):
pass
class _item:
def __init__(self, **kw):
vars(self).update(kw)
def make_html(root, rcs_path, opt_rev = None, sticky = None):
bs = BlameSource(root, rcs_path, opt_rev, sticky)
count = bs.num_lines
if count == 0:
count = 1
line_num_width = int(math.log10(count)) + 1
revision_width = 3
author_width = 5
line = 0
old_revision = 0
row_color = ''
inMark = 0
rev_count = 0
open_table_tag = '<table border="0" cellpadding="0" cellspacing="0" width="100%">'
startOfRow = '<tr><td colspan="3"%s><pre>'
endOfRow = '</td></tr>'
print open_table_tag + (startOfRow % '')
for line_data in bs:
revision = line_data.rev
thisline = line_data.text
line = line_data.line_number
author = line_data.author
prev_rev = line_data.prev_rev
diff_url = line_data.diff_url
output = ''
if old_revision != revision and line != 1:
if row_color == '':
row_color = ' bgcolor="#e7e7e7"'
else:
row_color = ''
if not inMark:
output = output + endOfRow + (startOfRow % row_color)
output = output + '<a name="%d">%*d</a>' % (line, line_num_width, line)
if old_revision != revision or rev_count > 20:
revision_width = max(revision_width, len(revision))
output = output + ' '
if diff_url:
output = output + '<a href="%s">' % diff_url
author_width = max(author_width, len(author))
output = output + ('%-*s ' % (author_width, author))
output = output + revision
if prev_rev:
output = output + '</a>'
output = output + (' ' * (revision_width - len(revision) + 1))
old_revision = revision
rev_count = 0
else:
output = output + ' ' + (' ' * (author_width + revision_width))
rev_count = rev_count + 1
output = output + thisline
# Close the highlighted section
#if (defined $mark_cmd and mark_cmd != 'begin'):
# chop($output)
# output = output + endOfRow + (startOfRow % row_color)
# inMark = 0
print output
print endOfRow + '</table>'
def main():
import sys
if len(sys.argv) != 3:
print 'USAGE: %s cvsroot rcs-file' % sys.argv[0]
sys.exit(1)
make_html(sys.argv[1], sys.argv[2])
if __name__ == '__main__':
main()