Reintegrate the 'issue-495-dev' branch into the trunk. This

reintroduces the issue #495 feature ("Syntax highlight/colorize scripts without extensions") while building much more fault tolerance into the system. git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@2699 8cb11bc2-c004-0410-86c3-e597b4017df7
2011-12-06 14:22:48 +00:00 · 2011-12-06 14:22:48 +00:00 · 06a3feec90
parent b42a4a0915 a03cd82de5
commit 06a3feec90
8 changed files with 156 additions and 107 deletions
--- a/lib/blame.py
+++ b/lib/blame.py
@ -75,7 +75,8 @@ class HTMLBlameSource:
    self.path_parts = path_parts
    self.diff_url = diff_url
    self.include_url = include_url
-    self.annotation, self.revision = self.repos.annotate(path_parts, opt_rev)
+    self.annotation, self.revision = self.repos.annotate(path_parts, opt_rev,
+                                                         True)

  def __getitem__(self, idx):
    item = self.annotation.__getitem__(idx)
--- a/lib/vclib/init.py
+++ b/lib/vclib/init.py
@ -169,12 +169,19 @@ class Repository:
    Return value is a python file object
    """

-  def annotate(self, path_parts, rev):
-    """Return a list of annotate file content lines and a revision.
+  def annotate(self, path_parts, rev, include_text=False):
+    """Return a list of Annotation object, sorted by their
+    "line_number" components, which describe the lines of given
+    version of a file.

-    The result is a list of Annotation objects, sorted by their
-    line_number components.
-    """
+    The file path is specified as a list of components, relative to
+    the root of the repository. e.g. ["subdir1", "subdir2", "filename"]
+
+    rev is the revision of the item to return information about.
+    
+    If include_text is true, populate the Annotation objects' "text"
+    members with the corresponding line of file content; otherwise,
+    leave that member set to None."""

  def revinfo(self, rev):
    """Return information about a global revision
--- a/lib/vclib/ccvs/bincvs.py
+++ b/lib/vclib/ccvs/bincvs.py
@ -332,12 +332,12 @@ class BinCVSRepository(BaseCVSRepository):
      args = rcs_args
    return popen.popen(cmd, args, mode, capture_err)

-  def annotate(self, path_parts, rev=None):
+  def annotate(self, path_parts, rev=None, include_text=False):
    if self.itemtype(path_parts, rev) != vclib.FILE:  # does auth-check
      raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts)))
                        
    from vclib.ccvs import blame
-    source = blame.BlameSource(self.rcsfile(path_parts, 1), rev)
+    source = blame.BlameSource(self.rcsfile(path_parts, 1), rev, include_text)
    return source, source.revision

  def revinfo(self, rev):
--- a/lib/vclib/ccvs/blame.py
+++ b/lib/vclib/ccvs/blame.py
@ -413,7 +413,7 @@ class CVSParser(rcsparse.Sink):


 class BlameSource:
-  def __init__(self, rcs_file, opt_rev=None):
+  def __init__(self, rcs_file, opt_rev=None, include_text=False):
    # Parse the CVS file
    parser = CVSParser()
    revision = parser.parse_cvs_file(rcs_file, opt_rev)
@ -427,6 +427,7 @@ class BlameSource:
    self.lines = lines
    self.num_lines = count
    self.parser = parser
+    self.include_text = include_text

    # keep track of where we are during an iteration
    self.idx = -1
@ -446,6 +447,8 @@ class BlameSource:
    line_number = idx + 1
    author = self.parser.revision_author[rev]
    thisline = self.lines[idx]
+    if not self.include_text:
+      thisline = None
    ### TODO:  Put a real date in here.
    item = vclib.Annotation(thisline, line_number, rev, prev_rev, author, None)
    self.last = item
--- a/lib/vclib/ccvs/ccvs.py
+++ b/lib/vclib/ccvs/ccvs.py
@ -139,10 +139,10 @@ class CCVSRepository(BaseCVSRepository):
    return vclib._diff_fp(temp1, temp2, info1, info2,
                          self.utilities.diff or 'diff', diff_args)

-  def annotate(self, path_parts, rev=None):
+  def annotate(self, path_parts, rev=None, include_text=False):
    if self.itemtype(path_parts, rev) != vclib.FILE:  # does auth-check
      raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts)))
-    source = blame.BlameSource(self.rcsfile(path_parts, 1), rev)
+    source = blame.BlameSource(self.rcsfile(path_parts, 1), rev, include_text)
    return source, source.revision

  def revinfo(self, rev):
--- a/lib/vclib/svn/svn_ra.py
+++ b/lib/vclib/svn/svn_ra.py
@ -337,7 +337,7 @@ class RemoteSubversionRepository(vclib.Repository):
                                        _rev2optrev(rev), 0, self.ctx)
    return pairs and pairs[0][1] or {}
  
-  def annotate(self, path_parts, rev):
+  def annotate(self, path_parts, rev, include_text=False):
    path = self._getpath(path_parts)
    if self.itemtype(path_parts, rev) != vclib.FILE:  # does auth-check
      raise vclib.Error("Path '%s' is not a file." % path)
@ -351,12 +351,13 @@ class RemoteSubversionRepository(vclib.Repository):
      prev_rev = None
      if revision > 1:
        prev_rev = revision - 1
-      blame_data.append(vclib.Annotation(line, line_no+1, revision, prev_rev,
+      if not include_text:
+        line = None
+      blame_data.append(vclib.Annotation(line, line_no + 1, revision, prev_rev,
                                         author, None))
      
    client.svn_client_blame(url, _rev2optrev(1), _rev2optrev(rev),
                            _blame_cb, self.ctx)
-
    return blame_data, rev

  def revinfo(self, rev):
--- a/lib/vclib/svn/svn_repos.py
+++ b/lib/vclib/svn/svn_repos.py
@ -281,10 +281,11 @@ class FileContentsPipe:


 class BlameSource:
-  def __init__(self, local_url, rev, first_rev, config_dir):
+  def __init__(self, local_url, rev, first_rev, include_text, config_dir):
    self.idx = -1
    self.first_rev = first_rev
    self.blame_data = []
+    self.include_text = include_text

    ctx = client.svn_client_create_context()
    core.svn_config_ensure(config_dir)
@ -305,6 +306,8 @@ class BlameSource:
    prev_rev = None
    if rev > self.first_rev:
      prev_rev = rev - 1
+    if not self.include_text:
+      text = None
    self.blame_data.append(vclib.Annotation(text, line_no + 1, rev,
                                            prev_rev, author, None))

@ -503,7 +506,7 @@ class LocalSubversionRepository(vclib.Repository):
    fsroot = self._getroot(rev)
    return fs.node_proplist(fsroot, path)
  
-  def annotate(self, path_parts, rev):
+  def annotate(self, path_parts, rev, include_text=False):
    path = self._getpath(path_parts)
    path_type = self.itemtype(path_parts, rev)  # does auth-check
    if path_type != vclib.FILE:
@ -514,8 +517,8 @@ class LocalSubversionRepository(vclib.Repository):
                                {'svn_cross_copies': 1})
    youngest_rev, youngest_path = history[0]
    oldest_rev, oldest_path = history[-1]
-    source = BlameSource(_rootpath2url(self.rootpath, path),
-                         youngest_rev, oldest_rev, self.config_dir)
+    source = BlameSource(_rootpath2url(self.rootpath, path), youngest_rev,
+                         oldest_rev, include_text, self.config_dir)
    return source, youngest_rev

  def revinfo(self, rev):
--- a/lib/viewvc.py
+++ b/lib/viewvc.py
@ -54,6 +54,17 @@ try:
 except (SyntaxError, ImportError):
  idiff = None

+try:
+  from pygments import highlight
+  from pygments.formatters import HtmlFormatter
+  from pygments.lexers import ClassNotFound, \
+                              get_lexer_by_name, \
+                              get_lexer_for_mimetype, \
+                              get_lexer_for_filename, \
+                              guess_lexer
+except (SyntaxError, ImportError):
+  highlight = None
+
 debug.t_end('imports')

 #########################################################################
@ -1553,96 +1564,82 @@ def markup_escaped_urls(s):
    return "<a href=\"%s\">%s</a>" % (unescaped_url, url)
  return re.sub(_re_rewrite_escaped_url, _url_repl, s)

-def markup_stream_pygments(request, cfg, blame_data, fp, filename,
-                           mime_type, encoding):
+
+def markup_stream(request, cfg, blame_data, file_lines, filename,
+                  mime_type, encoding, colorize):
+  """Return the contents of a versioned file as a list of
+  vclib.Annotation objects, each representing one line of the file's
+  contents.  Use BLAME_DATA as the annotation information for the file
+  if provided.  Use FILE_LINES as the lines of file content text
+  themselves.  MIME_TYPE is the MIME content type of the file;
+  ENCODING is its character encoding.  If COLORIZE is true, attempt to
+  apply syntax coloration to the file contents, and use the
+  HTML-marked-up results as the text in the return vclib.Annotation
+  objects."""
+  
+  # Nothing to mark up?  So be it.
+  if not file_lines:
+    return []
+
  # Determine if we should use Pygments to highlight our output.
  # Reasons not to include a) being told not to by the configuration,
  # b) not being able to import the Pygments modules, and c) Pygments
  # not having a lexer for our file's format.
-  blame_source = []
-  if blame_data:
-    for i in blame_data:
-      i.text = sapi.escape(i.text)
-      i.diff_href = None
-      if i.prev_rev:
-        i.diff_href = request.get_url(view_func=view_diff,
-                                      params={'r1': i.prev_rev,
-                                              'r2': i.rev},
-                                      escape=1, partial=1)
-      blame_source.append(i)
-    blame_data = blame_source
  pygments_lexer = None
-
-  # If syntax coloration is enabled, we'll try to get our Pygments on.
-  if cfg.options.enable_syntax_coloration:
-    try:
-      from pygments import highlight
-      from pygments.formatters import HtmlFormatter
-      from pygments.lexers import ClassNotFound, \
-                                  get_lexer_by_name, \
-                                  get_lexer_for_mimetype, \
-                                  get_lexer_for_filename
-      if not encoding:
-        encoding = 'guess'
-        if cfg.options.detect_encoding:
-          try:
-            import chardet
-            encoding = 'chardet'
-          except (SyntaxError, ImportError):
-            pass
-
-      # First, see if there's a Pygments lexer associated with MIME_TYPE.
-      if mime_type:
+  if colorize:
+    if not encoding:
+      encoding = 'guess'
+      if cfg.options.detect_encoding:
        try:
-          pygments_lexer = get_lexer_for_mimetype(mime_type,
-                                                  encoding=encoding,
-                                                  tabsize=cfg.options.tabsize,
-                                                  stripnl=False)
-        except ClassNotFound:
-          pygments_lexer = None
+          import chardet
+          encoding = 'chardet'
+        except (SyntaxError, ImportError):
+          pass

-      # If we've no lexer thus far, try to find one based on the FILENAME.
-      if not pygments_lexer:
-        try:
-          pygments_lexer = get_lexer_for_filename(filename,
-                                                  encoding=encoding,
-                                                  tabsize=cfg.options.tabsize,
-                                                  stripnl=False)
-        except ClassNotFound:
-          pygments_lexer = None
+    # First, see if there's a Pygments lexer associated with MIME_TYPE.
+    if mime_type:
+      try:
+        pygments_lexer = get_lexer_for_mimetype(mime_type,
+                                                encoding=encoding,
+                                                tabsize=cfg.options.tabsize,
+                                                stripnl=False)
+      except ClassNotFound:
+        pygments_lexer = None
+
+    # If we've no lexer thus far, try to find one based on the FILENAME.
+    if not pygments_lexer:
+      try:
+        pygments_lexer = get_lexer_for_filename(filename,
+                                                encoding=encoding,
+                                                tabsize=cfg.options.tabsize,
+                                                stripnl=False)
+      except ClassNotFound:
+        pygments_lexer = None
+
+    # Still no lexer?  If we've reason to believe this is a text
+    # file, try to guess the lexer based on the file's content.
+    if not pygments_lexer and is_text(mime_type) and file_lines:
+      try:
+        pygments_lexer = guess_lexer(file_lines[0])
+      except ClassNotFound:
+        pygments_lexer = None
        
-    except ImportError:
-      pass
-
-  # If we aren't going to be highlighting anything, just return the
-  # BLAME_SOURCE.  If there's no blame_source, we'll generate a fake
-  # one from the file contents we fetch with PATH and REV.
+  # If we aren't highlighting, just return an amalgamation of the
+  # BLAME_DATA (if any) and the FILE_LINES.
  if not pygments_lexer:
-    if blame_source:
-      class BlameSourceTabsizeWrapper:
-        def __init__(self, blame_source, tabsize):
-          self.blame_source = blame_source
-          self.tabsize = cfg.options.tabsize
-        def __getitem__(self, idx):
-          item = self.blame_source.__getitem__(idx)
-          item.text = item.text.expandtabs(self.tabsize)
-          item.text = markup_escaped_urls(item.text)
-          return item
-      return BlameSourceTabsizeWrapper(blame_source, cfg.options.tabsize)
-    else:
-      lines = []
-      line_no = 0
-      while 1:
-        line = fp.readline()
-        if not line:
-          break
-        line_no = line_no + 1
-        line = sapi.escape(line.expandtabs(cfg.options.tabsize))
-        line = markup_escaped_urls(line)
-        item = vclib.Annotation(line, line_no, None, None, None, None)
-        item.diff_href = None
-        lines.append(item)
-      return lines
+    lines = []
+    for i in range(len(file_lines)):
+      line = file_lines[i]
+      line = sapi.escape(line.expandtabs(cfg.options.tabsize))
+      line = markup_escaped_urls(line)
+      if blame_data:
+        blame_item = blame_data[i]
+        blame_item.text = line
+      else:
+        blame_item = vclib.Annotation(line, i + 1, None, None, None, None)
+        blame_item.diff_href = None
+      lines.append(blame_item)
+    return lines

  # If we get here, we're highlighting something.
  class PygmentsSink:
@ -1665,8 +1662,9 @@ def markup_stream_pygments(request, cfg, blame_data, fp, filename,
        item.diff_href = None
        self.blame_data.append(item)
      self.line_no = self.line_no + 1
-  ps = PygmentsSink(blame_source)
-  highlight(fp.read(), pygments_lexer,
+
+  ps = PygmentsSink(blame_data)
+  highlight(''.join(file_lines), pygments_lexer,
            HtmlFormatter(nowrap=True,
                          classprefix="pygments-",
                          encoding='utf-8'), ps)
@ -1772,27 +1770,63 @@ def markup_or_annotate(request, is_annotate):

  # Not a viewable image.
  else:
-    blame_source = None
+    blame_data = None
+
+    # If this was an annotation request, try to annotate this file.
+    # If something goes wrong, that's okay -- we'll gracefully revert
+    # to a plain markup display.
    if is_annotate:
-      # Try to annotate this file, but don't croak if we fail.
      try:
-        blame_source, revision = request.repos.annotate(path, rev)
-        annotation = 'annotated'
+        blame_source, revision = request.repos.annotate(path, rev, False)
        if check_freshness(request, None, revision, weak=1):
          return
+        # Create BLAME_DATA list from BLAME_SOURCE, adding diff_href
+        # items to each relevant "line".
+        blame_data = []
+        for item in blame_source:
+          item.diff_href = None
+          if item.prev_rev:
+            item.diff_href = request.get_url(view_func=view_diff,
+                                             params={'r1': item.prev_rev,
+                                                     'r2': item.rev},
+                                             escape=1, partial=1)
+          blame_data.append(item)
+        annotation = 'annotated'
      except vclib.NonTextualFileContents:
        annotation = 'binary'
      except:
        annotation = 'error'

+    # Grab the file contents.
    fp, revision = request.repos.openfile(path, rev, {'cvs_oldkeywords' : 1})
    if check_freshness(request, None, revision, weak=1):
      fp.close()
      return
-    lines = markup_stream_pygments(request, cfg, blame_source, fp,
-                                   path[-1], mime_type, encoding)
+    file_lines = fp.readlines()
    fp.close()

+    # Do we have a differing number of file content lines and
+    # annotation items?  That's no good.  Call it an error and don't
+    # bother attempting the annotation display.
+    if blame_data and (len(file_lines) != len(blame_data)):
+      annotation = 'error'
+      blame_data = None
+
+    # Try to markup the file contents/annotation.  If we get an error
+    # and we were colorizing the stream, try once more without the
+    # colorization enabled.
+    colorize = cfg.options.enable_syntax_coloration and highlight
+    try:
+      lines = markup_stream(request, cfg, blame_data, file_lines,
+                            path[-1], mime_type, encoding, colorize)
+    except:
+      if colorize:
+        lines = markup_stream(request, cfg, blame_data, file_lines,
+                              path[-1], mime_type, encoding, False)
+      else:
+        raise debug.ViewVCException('Error displaying file contents',
+                                    '500 Internal Server Error')
+
  data = common_template_data(request, revision, mime_type)
  data.merge(ezt.TemplateData({
    'mime_type' : mime_type,