viewvc-4intranet/lib/vclib/ccvs/blame.py

#!/usr/bin/env python
# -*-python-*-
#
# Copyright (C) 1999-2013 The ViewCVS Group. All Rights Reserved.
# Copyright (C) 2000 Curt Hagenlocher <curt@hagenlocher.org>
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE.html file which can be found at the top level of the ViewVC
# distribution or at http://viewvc.org/license-1.html.
#
# For more information, visit http://viewvc.org/
#
# -----------------------------------------------------------------------
#
# blame.py: Annotate each line of a CVS file with its author,
#           revision #, date, etc.
#
# -----------------------------------------------------------------------
#
# This file is based on the cvsblame.pl portion of the Bonsai CVS tool,
# developed by Steve Lamm for Netscape Communications Corporation.  More
# information about Bonsai can be found at
#    http://www.mozilla.org/bonsai.html
#
# cvsblame.pl, in turn, was based on Scott Furman's cvsblame script
#
# -----------------------------------------------------------------------

import re
import time
import math
import rcsparse
import vclib

class CVSParser(rcsparse.Sink):
  # Precompiled regular expressions
  trunk_rev   = re.compile('^[0-9]+\\.[0-9]+$')
  last_branch = re.compile('(.*)\\.[0-9]+')
  is_branch   = re.compile('^(.*)\\.0\\.([0-9]+)$')
  d_command   = re.compile('^d(\d+)\\s(\\d+)')
  a_command   = re.compile('^a(\d+)\\s(\\d+)')

  SECONDS_PER_DAY = 86400

  def __init__(self):
    self.Reset()

  def Reset(self):
    self.last_revision = {}
    self.prev_revision = {}
    self.revision_date = {}
    self.revision_author = {}
    self.revision_branches = {}
    self.next_delta = {}
    self.prev_delta = {}
    self.tag_revision = {}
    self.timestamp = {}
    self.revision_ctime = {}
    self.revision_age = {}
    self.revision_log = {}
    self.revision_deltatext = {}
    self.revision_map = []    # map line numbers to revisions
    self.lines_added  = {}
    self.lines_removed = {}

  # Map a tag to a numerical revision number.  The tag can be a symbolic
  # branch tag, a symbolic revision tag, or an ordinary numerical
  # revision number.
  def map_tag_to_revision(self, tag_or_revision):
    try:
      revision = self.tag_revision[tag_or_revision]
      match = self.is_branch.match(revision)
      if match:
        branch = match.group(1) + '.' + match.group(2)
        if self.last_revision.get(branch):
          return self.last_revision[branch]
        else:
          return match.group(1)
      else:
        return revision
    except:
      return ''

  # Construct an ordered list of ancestor revisions to the given
  # revision, starting with the immediate ancestor and going back
  # to the primordial revision (1.1).
  #
  # Note: The generated path does not traverse the tree the same way
  #       that the individual revision deltas do.  In particular,
  #       the path traverses the tree "backwards" on branches.
  def ancestor_revisions(self, revision):
    ancestors = []
    revision = self.prev_revision.get(revision)
    while revision:
      ancestors.append(revision)
      revision = self.prev_revision.get(revision)

    return ancestors

  # Split deltatext specified by rev to each line.
  def deltatext_split(self, rev):
    lines = self.revision_deltatext[rev].split('\n')
    if lines[-1] == '':
      del lines[-1]
    return lines

  # Extract the given revision from the digested RCS file.
  # (Essentially the equivalent of cvs up -rXXX)
  def extract_revision(self, revision):
    path = []
    add_lines_remaining = 0
    start_line = 0
    count = 0
    while revision:
      path.append(revision)
      revision = self.prev_delta.get(revision)
    path.reverse()
    path = path[1:]  # Get rid of head revision

    text = self.deltatext_split(self.head_revision)

    # Iterate, applying deltas to previous revision
    for revision in path:
      adjust = 0
      diffs = self.deltatext_split(revision)
      self.lines_added[revision]   = 0
      self.lines_removed[revision] = 0
      lines_added_now = 0
      lines_removed_now = 0

      for command in diffs:
        dmatch = self.d_command.match(command)
        amatch = self.a_command.match(command)
        if add_lines_remaining > 0:
          # Insertion lines from a prior "a" command
          text.insert(start_line + adjust, command)
          add_lines_remaining = add_lines_remaining - 1
          adjust = adjust + 1
        elif dmatch:
          # "d" - Delete command
          start_line = int(dmatch.group(1))
          count      = int(dmatch.group(2))
          begin = start_line + adjust - 1
          del text[begin:begin + count]
          adjust = adjust - count
          lines_removed_now = lines_removed_now + count
        elif amatch:
          # "a" - Add command
          start_line = int(amatch.group(1))
          count      = int(amatch.group(2))
          add_lines_remaining = count
          lines_added_now = lines_added_now + count
        else:
          raise RuntimeError, 'Error parsing diff commands'

      self.lines_added[revision]   = self.lines_added[revision]   + lines_added_now
      self.lines_removed[revision] = self.lines_removed[revision] + lines_removed_now
    return text

  def set_head_revision(self, revision):
    self.head_revision = revision

  def set_principal_branch(self, branch_name):
    self.principal_branch = branch_name

  def define_tag(self, name, revision):
    # Create an associate array that maps from tag name to
    # revision number and vice-versa.
    self.tag_revision[name] = revision

  def set_comment(self, comment):
    self.file_description = comment

  def set_description(self, description):
    self.rcs_file_description = description

  # Construct dicts that represent the topology of the RCS tree
  # and other arrays that contain info about individual revisions.
  #
  # The following dicts are created, keyed by revision number:
  #   self.revision_date     -- e.g. "96.02.23.00.21.52"
  #   self.timestamp         -- seconds since 12:00 AM, Jan 1, 1970 GMT
  #   self.revision_author   -- e.g. "tom"
  #   self.revision_branches -- descendant branch revisions, separated by spaces,
  #                             e.g. "1.21.4.1 1.21.2.6.1"
  #   self.prev_revision     -- revision number of previous *ancestor* in RCS tree.
  #                             Traversal of this array occurs in the direction
  #                             of the primordial (1.1) revision.
  #   self.prev_delta        -- revision number of previous revision which forms
  #                             the basis for the edit commands in this revision.
  #                             This causes the tree to be traversed towards the
  #                             trunk when on a branch, and towards the latest trunk
  #                             revision when on the trunk.
  #   self.next_delta        -- revision number of next "delta".  Inverts prev_delta.
  #
  # Also creates self.last_revision, keyed by a branch revision number, which
  # indicates the latest revision on a given branch,
  #   e.g. self.last_revision{"1.2.8"} == 1.2.8.5
  def define_revision(self, revision, timestamp, author, state,
                      branches, next):
    self.tag_revision[revision] = revision
    branch = self.last_branch.match(revision).group(1)
    self.last_revision[branch] = revision

    #self.revision_date[revision] = date
    self.timestamp[revision] = timestamp

    # Pretty print the date string
    ltime = time.localtime(self.timestamp[revision])
    formatted_date = time.strftime("%d %b %Y %H:%M", ltime)
    self.revision_ctime[revision] = formatted_date

    # Save age
    self.revision_age[revision] = ((time.time() - self.timestamp[revision])
                                   / self.SECONDS_PER_DAY)

    # save author
    self.revision_author[revision] = author

    # ignore the state

    # process the branch information
    branch_text = ''
    for branch in branches:
      self.prev_revision[branch] = revision
      self.next_delta[revision] = branch
      self.prev_delta[branch] = revision
      branch_text = branch_text + branch + ''
    self.revision_branches[revision] = branch_text

    # process the "next revision" information
    if next:
      self.next_delta[revision] = next
      self.prev_delta[next] = revision
      is_trunk_revision = self.trunk_rev.match(revision) is not None
      if is_trunk_revision:
        self.prev_revision[revision] = next
      else:
        self.prev_revision[next] = revision

  # Construct associative arrays containing info about individual revisions.
  #
  # The following associative arrays are created, keyed by revision number:
  #   revision_log        -- log message
  #   revision_deltatext  -- Either the complete text of the revision,
  #                          in the case of the head revision, or the
  #                          encoded delta between this revision and another.
  #                          The delta is either with respect to the successor
  #                          revision if this revision is on the trunk or
  #                          relative to its immediate predecessor if this
  #                          revision is on a branch.
  def set_revision_info(self, revision, log, text):
    self.revision_log[revision] = log
    self.revision_deltatext[revision] = text

  def parse_cvs_file(self, rcs_pathname, opt_rev = None, opt_m_timestamp = None):
    # Args in:  opt_rev - requested revision
    #           opt_m - time since modified
    # Args out: revision_map
    #           timestamp
    #           revision_deltatext

    # CheckHidden(rcs_pathname)
    try:
      rcsfile = open(rcs_pathname, 'rb')
    except:
      raise RuntimeError, ('error: %s appeared to be under CVS control, ' +
              'but the RCS file is inaccessible.') % rcs_pathname

    rcsparse.parse(rcsfile, self)
    rcsfile.close()

    if opt_rev in [None, '', 'HEAD']:
      # Explicitly specified topmost revision in tree
      revision = self.head_revision
    else:
      # Symbolic tag or specific revision number specified.
      revision = self.map_tag_to_revision(opt_rev)
      if revision == '':
        raise RuntimeError, 'error: -r: No such revision: ' + opt_rev

    # The primordial revision is not always 1.1!  Go find it.
    primordial = revision
    while self.prev_revision.get(primordial):
      primordial = self.prev_revision[primordial]

    # Don't display file at all, if -m option is specified and no
    # changes have been made in the specified file.
    if opt_m_timestamp and self.timestamp[revision] < opt_m_timestamp:
      return ''

    # Figure out how many lines were in the primordial, i.e. version 1.1,
    # check-in by moving backward in time from the head revision to the
    # first revision.
    line_count = 0
    if self.revision_deltatext.get(self.head_revision):
      tmp_array = self.deltatext_split(self.head_revision)
      line_count = len(tmp_array)

    skip = 0

    rev = self.prev_revision.get(self.head_revision)
    while rev:
      diffs = self.deltatext_split(rev)
      for command in diffs:
        dmatch = self.d_command.match(command)
        amatch = self.a_command.match(command)
        if skip > 0:
          # Skip insertion lines from a prior "a" command
          skip = skip - 1
        elif dmatch:
          # "d" - Delete command
          start_line = int(dmatch.group(1))
          count      = int(dmatch.group(2))
          line_count = line_count - count
        elif amatch:
          # "a" - Add command
          start_line = int(amatch.group(1))
          count      = int(amatch.group(2))
          skip       = count
          line_count = line_count + count
        else:
          raise RuntimeError, 'error: illegal RCS file'

      rev = self.prev_revision.get(rev)

    # Now, play the delta edit commands *backwards* from the primordial
    # revision forward, but rather than applying the deltas to the text of
    # each revision, apply the changes to an array of revision numbers.
    # This creates a "revision map" -- an array where each element
    # represents a line of text in the given revision but contains only
    # the revision number in which the line was introduced rather than
    # the line text itself.
    #
    # Note: These are backward deltas for revisions on the trunk and
    # forward deltas for branch revisions.

    # Create initial revision map for primordial version.
    self.revision_map = [primordial] * line_count

    ancestors = [revision, ] + self.ancestor_revisions(revision)
    ancestors = ancestors[:-1]  # Remove "1.1"
    last_revision = primordial
    ancestors.reverse()
    for revision in ancestors:
      is_trunk_revision = self.trunk_rev.match(revision) is not None

      if is_trunk_revision:
        diffs = self.deltatext_split(last_revision)

        # Revisions on the trunk specify deltas that transform a
        # revision into an earlier revision, so invert the translation
        # of the 'diff' commands.
        for command in diffs:
          if skip > 0:
            skip = skip - 1
          else:
            dmatch = self.d_command.match(command)
            amatch = self.a_command.match(command)
            if dmatch:
              start_line = int(dmatch.group(1))
              count      = int(dmatch.group(2))
              temp = []
              while count > 0:
                temp.append(revision)
                count = count - 1
              self.revision_map = (self.revision_map[:start_line - 1] +
                      temp + self.revision_map[start_line - 1:])
            elif amatch:
              start_line = int(amatch.group(1))
              count      = int(amatch.group(2))
              del self.revision_map[start_line:start_line + count]
              skip = count
            else:
              raise RuntimeError, 'Error parsing diff commands'

      else:
        # Revisions on a branch are arranged backwards from those on
        # the trunk.  They specify deltas that transform a revision
        # into a later revision.
        adjust = 0
        diffs = self.deltatext_split(revision)
        for command in diffs:
          if skip > 0:
            skip = skip - 1
          else:
            dmatch = self.d_command.match(command)
            amatch = self.a_command.match(command)
            if dmatch:
              start_line = int(dmatch.group(1))
              count      = int(dmatch.group(2))
              adj_begin  = start_line + adjust - 1
              adj_end    = start_line + adjust - 1 + count
              del self.revision_map[adj_begin:adj_end]
              adjust = adjust - count
            elif amatch:
              start_line = int(amatch.group(1))
              count      = int(amatch.group(2))
              skip = count
              temp = []
              while count > 0:
                temp.append(revision)
                count = count - 1
              self.revision_map = (self.revision_map[:start_line + adjust] +
                      temp + self.revision_map[start_line + adjust:])
              adjust = adjust + skip
            else:
              raise RuntimeError, 'Error parsing diff commands'

      last_revision = revision

    return revision


class BlameSource:
  def __init__(self, rcs_file, opt_rev=None, include_text=False):
    # Parse the CVS file
    parser = CVSParser()
    revision = parser.parse_cvs_file(rcs_file, opt_rev)
    count = len(parser.revision_map)
    lines = parser.extract_revision(revision)
    if len(lines) != count:
      raise RuntimeError, 'Internal consistency error'

    # set up some state variables
    self.revision = revision
    self.lines = lines
    self.num_lines = count
    self.parser = parser
    self.include_text = include_text

    # keep track of where we are during an iteration
    self.idx = -1
    self.last = None

  def __getitem__(self, idx):
    if idx == self.idx:
      return self.last
    if idx >= self.num_lines:
      raise IndexError("No more annotations")
    if idx != self.idx + 1:
      raise BlameSequencingError()

    # Get the line and metadata for it.
    rev = self.parser.revision_map[idx]
    prev_rev = self.parser.prev_revision.get(rev)
    line_number = idx + 1
    author = self.parser.revision_author[rev]
    thisline = self.lines[idx]
    if not self.include_text:
      thisline = None
    ### TODO:  Put a real date in here.
    item = vclib.Annotation(thisline, line_number, rev, prev_rev, author, None)
    self.last = item
    self.idx = idx
    return item


class BlameSequencingError(Exception):
  pass