diff --git a/lib/blame.py b/lib/blame.py index 342ff169..a6a681ca 100644 --- a/lib/blame.py +++ b/lib/blame.py @@ -40,17 +40,10 @@ import re import time import math import cgi +import rcsparse -path_sep = os.path.normpath('/')[-1] - -class CVSParser: +class CVSParser(rcsparse.Sink): # Precompiled regular expressions - nonws_token = re.compile('^([^;@][^;\\s]*)\\s*') - semic_token = re.compile('^;\\s*') - rcsen_token = re.compile('^@([^@]*)') - undo_escape = re.compile('@@') - odd_at = re.compile('(([^@]|^)(@@)*)@([^@]|$)') - rcs_tree = re.compile('^\\d') trunk_rev = re.compile('^[0-9]+\\.[0-9]+$') last_branch = re.compile('(.*)\\.[0-9]+') is_branch = re.compile('(.*)\\.0\\.([0-9]+)') @@ -63,9 +56,6 @@ class CVSParser: self.Reset() def Reset(self): - self.line_buffer = '' - self.rcsfile = None - self.debug = 0 self.last_revision = {} self.prev_revision = {} self.revision_date = {} @@ -73,7 +63,6 @@ class CVSParser: self.revision_branches = {} self.next_delta = {} self.prev_delta = {} - self.feof = 0 self.tag_revision = {} self.revision_symbolic_name = {} self.timestamp = {} @@ -85,66 +74,6 @@ class CVSParser: self.lines_added = {} self.lines_removed = {} - # Get the next token from the RCS file - def get_token(self): - # Erase all-whitespace lines - while len(self.line_buffer) == 0: - self.line_buffer = self.rcsfile.readline() - if self.line_buffer == '': - raise RuntimeError, 'EOF' - self.line_buffer = string.lstrip(self.line_buffer) - - # A string of non-whitespace characters is a token - match = self.nonws_token.match(self.line_buffer) - if match: - self.line_buffer = self.nonws_token.sub('', self.line_buffer) - return match.group(1) - - # ...and so is a single semicolon - if self.semic_token.match(self.line_buffer): - self.line_buffer = self.semic_token.sub('', self.line_buffer) - return ';' - - # ...or an RCS-encoded string that starts with an @ character - match = self.rcsen_token.match(self.line_buffer) - self.line_buffer = self.rcsen_token.sub('', self.line_buffer) - token = match.group(1) - - # Detect odd @ character used to close RCS-encoded string - while string.find(self.line_buffer, '@') < 0 or not self.odd_at.search(self.line_buffer): - token = token + self.line_buffer - self.line_buffer = self.rcsfile.readline() - if self.line_buffer == '': - raise RuntimeError, 'EOF' - - # Retain the remainder of the line after the terminating @ character - i = self.odd_at.search(self.line_buffer).end(1) - token = token + self.line_buffer[:i] - self.line_buffer = self.line_buffer[i+1:] - - # Undo escape-coding of @ characters. - token = self.undo_escape.sub('@', token) - - # Digest any extra blank lines - while len(self.line_buffer) == 0 or self.line_buffer == '\n': - self.line_buffer = self.rcsfile.readline() - if self.line_buffer == '': - self.feof = 1 - break - - return token - - # Try to match the next token from the input buffer - def match_token(self, match): - token = self.get_token() - if token != match: - raise RuntimeError, ('Unexpected parsing error in RCS file.\n' + - 'Expected token: %s, but saw: %s' % (match, token)) - - # Push RCS token back into the input buffer. - def unget_token(self, token): - self.line_buffer = token + " " + self.line_buffer - # Map a tag to a numerical revision number. The tag can be a symbolic # branch tag, a symbolic revision tag, or an ordinary numerical # revision number. @@ -239,49 +168,25 @@ class CVSParser: self.lines_removed[revision] = self.lines_removed[revision] + lines_removed_now return text - def parse_rcs_admin(self): - while 1: - # Read initial token at beginning of line - token = self.get_token() + def set_head_revision(self, revision): + self.head_revision = revision - # We're done once we reach the description of the RCS tree - if self.rcs_tree.match(token): - self.unget_token(token) - return + def set_principal_branch(self, branch_name): + self.principal_branch = branch_name - # print "token:", token + def define_tag(self, name, revision): + # Create an associate array that maps from tag name to + # revision number and vice-versa. + self.tag_revision[name] = revision - if token == "head": - self.head_revision = self.get_token() - self.get_token() # Eat semicolon - elif token == "branch": - self.principal_branch = self.get_token() - self.get_token() # Eat semicolon - elif token == "symbols": - # Create an associate array that maps from tag name to - # revision number and vice-versa. - while 1: - tag = self.get_token() - if tag == ';': - break - (tag_name, tag_rev) = string.split(tag, ':') - self.tag_revision[tag_name] = tag_rev - self.revision_symbolic_name[tag_rev] = tag_name - elif token == "comment": - self.file_description = self.get_token() - self.get_token() # Eat semicolon + ### actually, this is a bit bogus... a rev can have multiple names + self.revision_symbolic_name[revision] = name - # Ignore all these other fields - We don't care about them. - elif token in ("locks", "strict", "expand", "access"): - while 1: - tag = self.get_token() - if tag == ';': - break - else: - pass - # warn("Unexpected RCS token: $token\n") + def set_comment(self, comment): + self.file_description = comment - raise RuntimeError, "Unexpected EOF"; + def set_description(self, description): + self.rcs_file_description = description # Construct dicts that represent the topology of the RCS tree # and other arrays that contain info about individual revisions. @@ -305,91 +210,47 @@ class CVSParser: # Also creates self.last_revision, keyed by a branch revision number, which # indicates the latest revision on a given branch, # e.g. self.last_revision{"1.2.8"} == 1.2.8.5 + def define_revision(self, revision, timestamp, author, state, + branches, next): + self.tag_revision[revision] = revision + branch = self.last_branch.match(revision).group(1) + self.last_revision[branch] = revision - def parse_rcs_tree(self): - while 1: - revision = self.get_token() + #self.revision_date[revision] = date + self.timestamp[revision] = timestamp - # End of RCS tree description ? - if revision == 'desc': - self.unget_token(revision) - return + # Pretty print the date string + ltime = time.localtime(self.timestamp[revision]) + formatted_date = time.strftime("%d %b %Y %H:%M", ltime) + self.revision_ctime[revision] = formatted_date + # Save age + self.revision_age[revision] = ((time.time() - self.timestamp[revision]) + / self.SECONDS_PER_DAY) + + # save author + self.revision_author[revision] = author + + # ignore the state + + # process the branch information + branch_text = '' + for branch in branches: + self.prev_revision[branch] = revision + self.next_delta[revision] = branch + self.prev_delta[branch] = revision + branch_text = branch_text + branch + '' + self.revision_branches[revision] = branch_text + + # process the "next revision" information + if next: + self.next_delta[revision] = next + self.prev_delta[next] = revision is_trunk_revision = self.trunk_rev.match(revision) is not None - - self.tag_revision[revision] = revision - branch = self.last_branch.match(revision).group(1) - self.last_revision[branch] = revision - - # Parse date - self.match_token('date') - date = self.get_token() - self.revision_date[revision] = date - self.match_token(';') - - # Convert date into timestamp - date_fields = string.split(date, '.') + ['0', '0', '0'] - date_fields = map(string.atoi, date_fields) - if date_fields[0] < 100: - date_fields[0] = date_fields[0] + 1900 - self.timestamp[revision] = time.mktime(date_fields) - - # Pretty print the date string - ltime = time.localtime(self.timestamp[revision]) - formatted_date = time.strftime("%d %b %Y %H:%M", ltime) - self.revision_ctime[revision] = formatted_date - - # Save age - self.revision_age[revision] = ( - (time.time() - self.timestamp[revision]) / self.SECONDS_PER_DAY) - - # Parse author - self.match_token('author') - author = self.get_token() - self.revision_author[revision] = author - self.match_token(';') - - # Parse state - self.match_token('state') - while self.get_token() != ';': - pass - - # Parse branches - self.match_token('branches') - branches = '' - while 1: - token = self.get_token() - if token == ';': - break - self.prev_revision[token] = revision - self.prev_delta[token] = revision - branches = branches + token + ' ' - self.revision_branches[revision] = branches - - # Parse revision of next delta in chain - self.match_token('next') - next = '' - token = self.get_token() - if token != ';': - next = token - self.get_token() # Eat semicolon - self.next_delta[revision] = next - self.prev_delta[next] = revision - if is_trunk_revision: - self.prev_revision[revision] = next - else: - self.prev_revision[next] = revision - - if self.debug >= 3: - print "
revision =", revision
-        print "date     = ", date
-        print "author   = ", author
-        print "branches = ", branches
-        print "next     = ", next + "
\n" - - def parse_rcs_description(self): - self.match_token('desc') - self.rcs_file_description = self.get_token() + if is_trunk_revision: + self.prev_revision[revision] = next + else: + self.prev_revision[next] = revision # Construct associative arrays containing info about individual revisions. # @@ -402,36 +263,9 @@ class CVSParser: # revision if this revision is on the trunk or # relative to its immediate predecessor if this # revision is on a branch. - def parse_rcs_deltatext(self): - while not self.feof: - revision = self.get_token() - if self.debug >= 3: - print "Reading delta for revision:", revision - self.match_token('log') - self.revision_log[revision] = self.get_token() - self.match_token('text') - self.revision_deltatext[revision] = self.get_token() - - def parse_rcs_file(self): - if self.debug >= 2: - print "Reading RCS admin..." - self.parse_rcs_admin() - if self.debug >= 2: - print "Reading RCS revision tree topology..." - self.parse_rcs_tree() - - if self.debug >= 3: - print "
Keys:\n"
-      for i in self.tag_revision.keys():
-        print "yoyuo %s: %s" % (i, self.tag_revision[i])
-      print "
" - - self.parse_rcs_description() - if self.debug >= 2: - print "Reading RCS revision deltas..." - self.parse_rcs_deltatext() - if self.debug >= 2: - print "Done reading RCS file..." + def set_revision_info(self, revision, log, text): + self.revision_log[revision] = log + self.revision_deltatext[revision] = text def parse_cvs_file(self, rcs_pathname, opt_rev = None, opt_m_timestamp = None): # Args in: opt_rev - requested revision @@ -442,13 +276,13 @@ class CVSParser: # CheckHidden(rcs_pathname); try: - self.rcsfile = open(rcs_pathname, 'r') + rcsfile = open(rcs_pathname, 'r') except: raise RuntimeError, ('error: %s appeared to be under CVS control, ' + 'but the RCS file is inaccessible.') % rcs_pathname - self.parse_rcs_file() - self.rcsfile.close() + rcsparse.Parser().parse(rcsfile, self) + rcsfile.close() if opt_rev in [None, '', 'HEAD']: # Explicitly specified topmost revision in tree @@ -599,7 +433,8 @@ def link_includes(text, root, rcs_path): incfile = match.group(3) for rel_path in ('', 'Attic', '..'): trial_root = os.path.join(rcs_path, rel_path) - file = os.path.normpath('%s%s%s%s%s,v' % (root, path_sep, trial_root, path_sep, incfile)) + file = os.path.join(root, trial_root) + file = os.path.normpath(os.path.join(file, incfile + ',v')) if os.access(file, os.F_OK): return '#%sinclude%s"%s"' % \ (match.group(1), match.group(2), @@ -607,7 +442,7 @@ def link_includes(text, root, rcs_path): return text def make_html(root, rcs_path, opt_rev = None, sticky = None): - filename = root + path_sep + rcs_path + filename = os.path.join(root, rcs_path) parser = CVSParser() revision = parser.parse_cvs_file(filename, opt_rev) count = len(parser.revision_map) diff --git a/lib/rcsparse.py b/lib/rcsparse.py new file mode 100644 index 00000000..1c7b6b68 --- /dev/null +++ b/lib/rcsparse.py @@ -0,0 +1,293 @@ +# +# Copyright (C) 2000 The ViewCVS Group. All Rights Reserved. +# Copyright (C) 2000 Curt Hagenlocher +# +# By using this file, you agree to the terms and conditions set forth in +# the LICENSE.html file which can be found at the top level of the ViewCVS +# distribution or at http://www.lyra.org/viewcvs/license-1.html. +# +# Contact information: +# Greg Stein, PO Box 760, Palo Alto, CA, 94302 +# gstein@lyra.org, http://www.lyra.org/viewcvs/ +# +# ----------------------------------------------------------------------- +# +# This software is being maintained as part of the ViewCVS project. +# Information is available at: +# http://viewcvs.sourceforge.net/ +# +# This file was originally based on the cvsblame.pl portion of the Bonsai +# CVS tool, developed by Steve Lamm for Netscape Communications Corporation. +# More information about Bonsai can be found at +# http://www.mozilla.org/bonsai.html +# +# cvsblame.pl, in turn, was based on Scott Furman's cvsblame script +# +# ----------------------------------------------------------------------- + +import re +import string +import time + + +class Parser: + # Precompiled regular expressions + nonws_token = re.compile('^([^;@][^;\\s]*)\\s*') + semic_token = re.compile('^;\\s*') + rcsen_token = re.compile('^@([^@]*)') + undo_escape = re.compile('@@') + odd_at = re.compile('(([^@]|^)(@@)*)@([^@]|$)') + rcs_tree = re.compile('^\\d') + + # Get the next token from the RCS file + def get_token(self): + # Erase all-whitespace lines + while len(self.line_buffer) == 0: + self.line_buffer = self.rcsfile.readline() + if self.line_buffer == '': + raise RuntimeError, 'EOF' + self.line_buffer = string.lstrip(self.line_buffer) + + # A string of non-whitespace characters is a token + match = self.nonws_token.match(self.line_buffer) + if match: + self.line_buffer = self.nonws_token.sub('', self.line_buffer) + return match.group(1) + + # ...and so is a single semicolon + if self.semic_token.match(self.line_buffer): + self.line_buffer = self.semic_token.sub('', self.line_buffer) + return ';' + + # ...or an RCS-encoded string that starts with an @ character + match = self.rcsen_token.match(self.line_buffer) + self.line_buffer = self.rcsen_token.sub('', self.line_buffer) + token = match.group(1) + + # Detect odd @ character used to close RCS-encoded string + while string.find(self.line_buffer, '@') < 0 or not self.odd_at.search(self.line_buffer): + token = token + self.line_buffer + self.line_buffer = self.rcsfile.readline() + if self.line_buffer == '': + raise RuntimeError, 'EOF' + + # Retain the remainder of the line after the terminating @ character + i = self.odd_at.search(self.line_buffer).end(1) + token = token + self.line_buffer[:i] + self.line_buffer = self.line_buffer[i+1:] + + # Undo escape-coding of @ characters. + token = self.undo_escape.sub('@', token) + + # Digest any extra blank lines + while len(self.line_buffer) == 0 or self.line_buffer == '\n': + self.line_buffer = self.rcsfile.readline() + if self.line_buffer == '': + self.feof = 1 + break + + return token + + # Try to match the next token from the input buffer + def match_token(self, match): + token = self.get_token() + if token != match: + raise RuntimeError, ('Unexpected parsing error in RCS file.\n' + + 'Expected token: %s, but saw: %s' % (match, token)) + + # Push RCS token back into the input buffer. + def unget_token(self, token): + self.line_buffer = token + " " + self.line_buffer + + def parse_rcs_admin(self): + while 1: + # Read initial token at beginning of line + token = self.get_token() + + # We're done once we reach the description of the RCS tree + if self.rcs_tree.match(token): + self.unget_token(token) + return + + # print "token:", token + + if token == "head": + self.sink.set_head_revision(self.get_token()) + self.match_token(';') + elif token == "branch": + self.sink.set_principal_branch(self.get_token()) + self.match_token(';') + elif token == "symbols": + while 1: + tag = self.get_token() + if tag == ';': + break + (tag_name, tag_rev) = string.split(tag, ':') + self.sink.define_tag(tag_name, tag_rev) + elif token == "comment": + self.sink.set_comment(self.get_token()) + self.match_token(';') + + # Ignore all these other fields - We don't care about them. + elif token in ("locks", "strict", "expand", "access"): + while 1: + tag = self.get_token() + if tag == ';': + break + else: + pass + # warn("Unexpected RCS token: $token\n") + + raise RuntimeError, "Unexpected EOF"; + + def parse_rcs_tree(self): + while 1: + revision = self.get_token() + + # End of RCS tree description ? + if revision == 'desc': + self.unget_token(revision) + return + + # Parse date + self.match_token('date') + date = self.get_token() + self.match_token(';') + + # Convert date into timestamp + date_fields = string.split(date, '.') + ['0', '0', '0'] + date_fields = map(string.atoi, date_fields) + if date_fields[0] < 100: + date_fields[0] = date_fields[0] + 1900 + timestamp = time.mktime(date_fields) + + # Parse author + self.match_token('author') + author = self.get_token() + self.match_token(';') + + # Parse state + self.match_token('state') + state = '' + while 1: + token = self.get_token() + if token == ';': + break + state = state + token + ' ' + state = state[:-1] # toss the trailing space + + # Parse branches + self.match_token('branches') + branches = [ ] + while 1: + token = self.get_token() + if token == ';': + break + branches.append(token) + + # Parse revision of next delta in chain + self.match_token('next') + next = self.get_token() + if next == ';': + next = None + else: + self.match_token(';') + + # there are some files with extra tags in them. for example: + # owner 640; + # group 15; + # permissions 644; + # hardlinks @configure.in@; + # we just want to skip over these + while 1: + token = self.get_token() + if token == 'desc' or self.rcs_tree.match(token): + self.unget_token(token) + break + # consume everything up to the semicolon + while self.get_token() != ';': + pass + + self.sink.define_revision(revision, timestamp, author, state, branches, + next) + + def parse_rcs_description(self): + self.match_token('desc') + self.sink.set_description(self.get_token()) + + def parse_rcs_deltatext(self): + while not self.feof: + revision = self.get_token() + self.match_token('log') + log = self.get_token() + self.match_token('text') + text = self.get_token() + self.sink.set_revision_info(revision, log, text) + + def parse(self, file, sink): + self.rcsfile = file + self.sink = sink + self.line_buffer = '' + self.feof = 0 + + self.parse_rcs_admin() + self.parse_rcs_tree() + + # many sinks want to know when the tree has been completed so they can + # do some work to prep for the arrival of the deltatext + self.sink.tree_completed() + + self.parse_rcs_description() + self.parse_rcs_deltatext() + + self.rcsfile = self.sink = None + + +class Sink: + def set_head_revision(self, revision): + pass + def set_principal_branch(self, branch_name): + pass + def define_tag(self, name, revision): + pass + def set_comment(self, comment): + pass + def set_description(self, description): + pass + def define_revision(self, revision, timestamp, author, state, + branches, next): + pass + def set_revision_info(self, revision, log, text): + pass + def tree_completed(self): + pass + +class DebugSink: + def set_head_revision(self, revision): + print 'head:', revision + + def set_principal_branch(self, branch_name): + print 'branch:', branch_name + + def define_tag(self, name, revision): + print 'tag:', name, '=', revision + + def set_comment(self, comment): + print 'comment:', comment + + def set_description(self, description): + print 'description:', description + + def define_revision(self, revision, timestamp, author, state, + branches, next): + print 'revision:', revision + print ' timestamp:', timestamp + print ' author:', author + print ' state:', state + print ' branches:', branches + print ' next:', next + + def set_revision_info(self, revision, log, text): + print 'revision:', revision + print ' log:', log + print ' text:', text[:100], '...'