Some improvements to the py2html integration. Thanks (I think...) to

David Martínez Moreno (Ender) for sending me down this path. * lib/py2html.py Update to version 0.8. Patch by David Martínez Moreno <ender@debian.org>. * lib/viewcvs.py (markup_stream_python): Call PrettyPrint.set_mode_rawhtml_color(). * lib/PyFontify.py Rework this file to use spaces instead of tabs. Use the 're' module instead of 'regex'. Fix a bug which caused "raise" to not be noticed as a keyword (though "rais" would be). Add "assert" and "exec" to the keywords list. * templates/docroot/styles.css Reformat the highlight styles, and made the .kwd style have a little color. Also, add the styles that py2html uses. git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@1282 8cb11bc2-c004-0410-86c3-e597b4017df7
2006-03-13 19:56:31 +00:00 · 2006-03-13 19:56:31 +00:00 · 34d32b394e
parent 342ec95573
commit 34d32b394e
4 changed files with 478 additions and 400 deletions
--- a/lib/PyFontify.py
+++ b/lib/PyFontify.py
@ -1,16 +1,27 @@
 """Module to analyze Python source code; for syntax coloring tools.

 Interface:
+
    tags = fontify(pytext, searchfrom, searchto)

-The 'pytext' argument is a string containing Python source code.
-The (optional) arguments 'searchfrom' and 'searchto' may contain a slice in pytext. 
+The PYTEXT argument is a string containing Python source code.  The
+(optional) arguments SEARCHFROM and SEARCHTO may contain a slice in
+PYTEXT.
+
 The returned value is a list of tuples, formatted like this:
-	[('keyword', 0, 6, None), ('keyword', 11, 17, None), ('comment', 23, 53, None), etc. ]
+
+    [('keyword', 0, 6, None),
+     ('keyword', 11, 17, None),
+     ('comment', 23, 53, None),
+     ...
+    ]
+    
 The tuple contents are always like this:
+
    (tag, startindex, endindex, sublist)
-tag is one of 'keyword', 'string', 'comment' or 'identifier'
-sublist is not used, hence always None. 
+    
+TAG is one of 'keyword', 'string', 'comment' or 'identifier'
+SUBLIST is not used, hence always None.
 """

 # Based on FontText.py by Mitchell S. Chapman,
@ -29,79 +40,73 @@ __version__ = "0.3.1"

 import string, re

+
+# This list of keywords is taken from ref/node13.html of the
+# Python 1.3 HTML documentation. ("access" is intentionally omitted.)
+
+keywordsList = ["and", "assert", "break", "class", "continue", "def",
+                "del", "elif", "else", "except", "exec", "finally",
+                "for", "from", "global", "if", "import", "in", "is",
+                "lambda", "not", "or", "pass", "print", "raise",
+                "return", "try", "while",
+                ]
+
 # First a little helper, since I don't like to repeat things. (Tismer speaking)
 def replace(where, what, with):
    return string.join(string.split(where, what), with)

-# This list of keywords is taken from ref/node13.html of the
-# Python 1.3 HTML documentation. ("access" is intentionally omitted.)
-keywordsList = [
-	"del", "from", "lambda", "return",
-	"and", "elif", "global", "not", "try",
-	"break", "else", "if", "or", "while",
-	"class", "except", "import", "pass",
-	"continue", "finally", "in", "print",
-	"def", "for", "is", "raise"]
-
-# Build up a regular expression which will match anything
-# interesting, including multi-line triple-quoted strings.
+# A regexp for matching Python comments.
 commentPat = "#.*"

-pat = "q[^\q\n]*\(\\\\[\000-\377][^\q\n]*\)*q"
-quotePat = replace(pat, "q", "'") + "\|" + replace(pat, 'q', '"')
+# A regexp for matching simple quoted strings.
+pat = "q[^q\\n]*(\\[\000-\377][^q\\n]*)*q"
+quotePat = replace(pat, "q", "'") + "|" + replace(pat, 'q', '"')

-# Way to go, Tim!
+# A regexp for matching multi-line tripled-quoted strings.  (Way to go, Tim!)
 pat = """
    qqq
-	[^\\q]*
-	\(
-		\(	\\\\[\000-\377]
-		\|	q
-			\(	\\\\[\000-\377]
-			\|	[^\\q]
-			\|	q
-				\(	\\\\[\000-\377]
-				\|	[^\\q]
-				\)
-			\)
-		\)
-		[^\\q]*
-	\)*
+    [^q]*
+    (
+        (    \\[\000-\377]
+        |    q
+            (    \\[\000-\377]
+            |    [^q]
+            |    q
+                (    \\[\000-\377]
+                |    [^q]
+                )
+            )
+        )
+        [^q]*
+    )*
    qqq
 """
 pat = string.join(string.split(pat), '')   # get rid of whitespace
-tripleQuotePat = replace(pat, "q", "'") + "\|" + replace(pat, 'q', '"')
+tripleQuotePat = replace(pat, "q", "'") + "|" + replace(pat, 'q', '"')

-# Build up a regular expression which matches all and only
-# Python keywords. This will let us skip the uninteresting
-# identifier references.
-# nonKeyPat identifies characters which may legally precede
-# a keyword pattern.
-nonKeyPat = "\(^\|[^a-zA-Z0-9_.\"']\)"
+# A regexp which matches all and only Python keywords. This will let
+# us skip the uninteresting identifier references.
+nonKeyPat = "(^|[^a-zA-Z0-9_.\"'])"   # legal keyword-preceding characters
+keyPat = nonKeyPat + "(" + string.join(keywordsList, "|") + ")" + nonKeyPat

-keyPat = nonKeyPat + "\("
-for keyword in keywordsList:
-	keyPat = keyPat + keyword + "\|"
-keyPat = keyPat[:-2] + "\)" + nonKeyPat
+# Our final syntax-matching regexp is the concatation of the regexp's we
+# constructed above.
+syntaxPat = keyPat + \
+            "|" + commentPat + \
+            "|" + tripleQuotePat + \
+            "|" + quotePat
+syntaxRE = re.compile(syntaxPat)

-matchPat = keyPat + "\|" + commentPat + "\|" + tripleQuotePat + "\|" + quotePat
-matchRE = re.compile(matchPat)
-
-idKeyPat = "[ \t]*[A-Za-z_][A-Za-z_0-9.]*"	# Ident w. leading whitespace.
+# Finally, we construct a regexp for matching indentifiers (with
+# optional leading whitespace).
+idKeyPat = "[ \t]*[A-Za-z_][A-Za-z_0-9.]*"
 idRE = re.compile(idKeyPat)


 def fontify(pytext, searchfrom=0, searchto=None):
    if searchto is None:
        searchto = len(pytext)
-	# Cache a few attributes for quicker reference.
-	search = matchRE.search
-	group = matchRE.group
-	idSearch = idRE.search
-	idGroup = idRE.group
-	
    tags = []
-	tags_append = tags.append
    commentTag = 'comment'
    stringTag = 'string'
    keywordTag = 'keyword'
@ -110,36 +115,48 @@ def fontify(pytext, searchfrom = 0, searchto = None):
    start = 0
    end = searchfrom
    while 1:
-		start = search(pytext, end)
-		if start < 0 or start >= searchto:
-			break	# EXIT LOOP
-		match = group(0)
+        # Look for some syntax token we're interested in.  If find
+        # nothing, we're done.
+        matchobj = syntaxRE.search(pytext, end)
+        if not matchobj:
+            break
+
+        # If we found something outside our search area, it doesn't
+        # count (and we're done).
+        start = matchobj.start()
+        if start >= searchto:
+            break
+
+        match = matchobj.group(0)
        end = start + len(match)
        c = match[0]
-		if c not in "#'\"":
-			# Must have matched a keyword.
-			if start <> searchfrom:
+        if c == '#':
+            # We matched a comment.
+            tags.append((commentTag, start, end, None))
+        elif c == '"' or c == '\'':
+            # We matched a string.
+            tags.append((stringTag, start, end, None))
+        else:
+            # We matched a keyword.
+            if start != searchfrom:
                # there's still a redundant char before and after it, strip!
                match = match[1:-1]
                start = start + 1
            else:
-				# this is the first keyword in the text.
+                # This is the first keyword in the text.
                # Only a space at the end.
                match = match[:-1]
            end = end - 1
-			tags_append((keywordTag, start, end, None))
+            tags.append((keywordTag, start, end, None))
            # If this was a defining keyword, look ahead to the
            # following identifier.
            if match in ["def", "class"]:
-				start = idSearch(pytext, end)
-				if start == end:
-					match = idGroup(0)
-					end = start + len(match)
-					tags_append((identifierTag, start, end, None))
-		elif c == "#":
-			tags_append((commentTag, start, end, None))
-		else:
-			tags_append((stringTag, start, end, None))
+                matchobj = idRE.search(pytext, end)
+                if matchobj:
+                    start = matchobj.start()
+                    if start == end and start < searchto:
+                        end = start + len(matchobj.group(0))
+                        tags.append((identifierTag, start, end, None))
    return tags


@ -150,3 +167,7 @@ def test(path):
    tags = fontify(text)
    for tag, start, end, sublist in tags:
        print tag, `text[start:end]`
+
+if __name__ == "__main__":
+    import sys
+    test(sys.argv[0])
--- a/lib/py2html.py
+++ b/lib/py2html.py
@ -1,6 +1,6 @@
-#!/usr/local/bin/python -u
+#!/usr/bin/python -u

-""" Python Highlighter for HTML                          Version: 0.5
+""" Python Highlighter                                    Version: 0.8

    py2html.py [options] files...

@ -10,8 +10,8 @@
     -stdout        read from files, write to stdout
     -files         read from files, write to filename+'.html' (default)
     -format:
-       html         output HTML page (default)
-       rawhtml      output pure HTML (without headers, titles, etc.)
+       html         output XHTML page (default)
+       rawhtml      output pure XHTML (without headers, titles, etc.)
     -mode:
       color        output in color (default)
       mono         output b/w (for printing)
@ -25,7 +25,7 @@
     -v             verbose

    Takes the input, assuming it is Python code and formats it into
-    colored HTML. When called without parameters the script tries to
+    colored XHTML. When called without parameters the script tries to
    work in CGI mode. It looks for a field 'script=URL' and tries to
    use that URL as input file. If it can't find this field, the path
    info (the part of the URL following the CGI script name) is
@ -34,46 +34,42 @@
 
    * Uses Just van Rossum's PyFontify version 0.3 to tag Python scripts.
      You can get it via his homepage on starship:
-        URL: http://starship.skyport.net/crew/just
+        URL: http://starship.python.net/crew/just
 """
 __comments__ = """

    The following snippet is a small shell script I use for viewing
-    Python scripts per less on Unix:
+    Python scripts via less on Unix:
+
+pyless:
 #!/bin/sh
 # Browse pretty printed Python code using ANSI codes for highlighting
-py2html -stdout -format:ansi -mode:mono $* | less -r
+py2html -stdout -format:ansi -mode:color $* | less -r

    History:

+    0.8: Added patch by Patrick Lynch to have py2html.py use style
+         sheets for markup
+    0.7: Added patch by Ville Skyttä to make py2html.py output
+         valid XHTML.
+    0.6: Fixed a bug in .escape_html(); thanks to Vespe Savikko for
+         finding this one.
    0.5: Added a few suggestions by Kevin Ng to make the CGI version
         a little more robust.

 """
-__copyright__ = """
-----------------------------------------------------------------------------
-(c) Copyright by Marc-Andre Lemburg, 1998 (mailto:mal@lemburg.com)
-
-    Permission to use, copy, modify, and distribute this software and its
-    documentation for any purpose and without fee or royalty is hereby granted,
-    provided that the above copyright notice appear in all copies and that
-    both that copyright notice and this permission notice appear in
-    supporting documentation or portions thereof, including modifications,
-    that you make.
-
-    THE AUTHOR MARC-ANDRE LEMBURG DISCLAIMS ALL WARRANTIES WITH REGARD TO
-    THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
-    FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL,
-    INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
-    FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
-    NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
-    WITH THE USE OR PERFORMANCE OF THIS SOFTWARE !
+__copyright__ = """\
+    Copyright (c) 1998-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
+    Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com
+    Distributed under the terms and conditions of the eGenix.com Public
+    License. See http://www.egenix.com/files/python/mxLicense.html for
+    details, or contact the author. All Rights Reserved.\
 """

-__version__ = '0.5'
+__version__ = '0.8'

 __cgifooter__ = ('\n<pre># code highlighted using <a href='
-		 '"http://starship.skyport.net/~lemburg/">py2html.py</a> '
+                 '"http://www.lemburg.com/files/python/">py2html.py</a> '
                 'version %s</pre>\n' % __version__)

 import sys,string,re
@ -86,7 +82,11 @@ sys.path.append('.')
 # URL of the input form the user is redirected to in case no script=xxx
 # form field is given. The URL *must* be absolute. Leave blank to
 # have the script issue an error instead.
-INPUT_FORM = 'http://starship.skyport.net/~lemburg/SoftwareDescriptions.html#py2html.py'
+INPUT_FORM = 'http://www.lemburg.com/files/python/SoftwareDescriptions.html#py2html.py'
+
+# HTML DOCTYPE and XML namespace
+HTML_DOCTYPE = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
+HTML_XMLNS = ' xmlns="http://www.w3.org/1999/xhtml"'

 ### Helpers

@ -116,7 +116,7 @@ class PrettyPrint:
          # --------------------------
          # rawhtml     |    x     x   (HTML without headers, etc.)
          # html        |    x     x   (a HTML page with HEAD&BODY:)
-	  # ansi        |          x   (with Ansi-escape sequences)
+          # ansi        |    x     x   (with Ansi-escape sequences)

        * interfaces:

@ -141,6 +141,7 @@ class PrettyPrint:
    # misc settings
    title = ''
    bgcolor = '#FFFFFF'
+    css = ''
    header = ''
    footer = ''
    replace_URLs = 0
@ -150,18 +151,18 @@ class PrettyPrint:
    def __init__(self,tagfct=None,format='html',mode='color'):

        self.tag = tagfct
-	self.set_mode = getattr(self,'set_mode_'+format+'_'+mode)
-	self.filter = getattr(self,'filter_'+format)
-	self.set_mode()
+        self.set_mode = getattr(self,'set_mode_%s_%s' % (format, mode))
+        self.filter = getattr(self,'filter_%s' % format)

    def file_filter(self,infile,outfile):

+        self.set_mode()
        text = fileio(infile,'r')
        if type(infile) == type('') and self.title == '':
            self.title = infile
        fileio(outfile,'w',self.filter(text))

-    ### set pre- and postfixes for formats & modes
+    ### Set pre- and postfixes for formats & modes
    #
    # These methods must set self.formats to a dictionary having
    # an entry for every tag returned by the tagging function.
@ -175,30 +176,54 @@ class PrettyPrint:

    def set_mode_html_color(self):

+        self.css = """
+        <STYLE TYPE="text/css">
+          <!--
+            body{ background: %s; }
+            .PY_KEYWORD{ color: #0000C0; font-weight: bold; }
+            .PY_COMMENT{ color: #000080; }
+            .PY_PARAMETER{ color: #C00000; }
+            .PY_IDENTIFIER{ color: #C00000; font-weight: bold; }
+            .PY_STRING{ color: #008000; }
+          -->
+        </STYLE> """ % self.bgcolor
+
        self.formats = {
            'all':('<pre>','</pre>'),
-	    'comment':('<span style="color:#1111CC">','</span>'),
-	    'keyword':('<span style="color:#3333CC"><strong>','</strong></span>'),
-	    'parameter':('<span style="color:#000066>','</span>'),
+            'comment':('<span class="PY_COMMENT">','</span>'),
+            'keyword':('<span class="PY_KEYWORD">','</span>'),
+            'parameter':('<span class="PY_PARAMETER">','</span>'),
            'identifier':( lambda x,strip=string.strip:
-                           '<a name="%s"><span style="color:#CC0000"><strong>' % (strip(x)),
-	                   '</strong></span></a>'),
-	    'string':('<span style="color:#115511">','</span>')
+                           '<a name="%s"><span class="PY_IDENTIFIER">' % (strip(x)),
+                           '</span></a>'),
+            'string':('<span class="PY_STRING">','</span>')
            }

    set_mode_rawhtml_color = set_mode_html_color

    def set_mode_html_mono(self):

+        self.css = """
+        <STYLE TYPE="text/css">
+          <!--
+            body{ background-color: %s }
+            .PY_KEYWORD{ text-decoration: underline }
+            .PY_COMMENT{ }
+            .PY_PARAMETER{ }
+            .PY_IDENTIFIER{ font-weight: bold}
+            .PY_STRING{ font-style: italic}
+          -->
+        </STYLE> """ % self.bgcolor
+
        self.formats = {
            'all':('<pre>','</pre>'),
-	    'comment':('',''),
-	    'keyword':( '<span style="text-decoration:underline">','</span>'),
-	    'parameter':('',''),
+            'comment':('<span class="PY_COMMENT">','</span>'),
+            'keyword':( '<span class="PY_KEYWORD">','</span>'),
+            'parameter':('<span class="PY_PARAMETER">','</span>'),
            'identifier':( lambda x,strip=string.strip:
-                           '<a name="%s"><strong>' % (strip(x)),
-		           '</strong>'),
-	    'string':('','')
+                           '<a name="%s"><span class="PY_IDENTIFIER">' % (strip(x)),
+                           '</span></a>'),
+            'string':('<span class="PY_STRING">','</span>')
            }

    set_mode_rawhtml_mono = set_mode_html_mono
@ -214,11 +239,22 @@ class PrettyPrint:
            'string':('','')
            }

-    ### filter for Python scripts given as string
+    def set_mode_ansi_color(self):
+
+        self.formats = {
+            'all':('',''),
+            'comment':('\033[34;2m','\033[m'),
+            'keyword':('\033[1;34m','\033[m'),
+            'parameter':('',''),
+            'identifier':('\033[1;31m','\033[m'),
+            'string':('\033[32;2m','\033[m')
+            }
+
+    ### Filters for Python scripts given as string

    def escape_html(self,text):

-	t = (('<','&lt;'),('>','&gt;'))
+        t = (('&','&amp;'),('<','&lt;'),('>','&gt;'))
        for x,y in t:
            text = string.join(string.split(text,x),y)
        return text
@ -229,12 +265,26 @@ class PrettyPrint:
        if self.replace_URLs:
            output = re.sub('URL:([ \t]+)([^ \n\r<]+)',
                            'URL:\\1<a href="\\2">\\2</a>',output)
-	html = """<html><head><title>%s</title></head>
-		  <body style="background-color:%s">
-		  <!--header-->%s
-		  <!--script-->%s
-		  <!--footer-->%s
-		  </body>\n"""%(self.title,self.bgcolor,self.header,output,self.footer)
+        html = """%s<html%s>
+                  <head>
+                  <title>%s</title>
+                  <!--css-->
+                  %s
+                  </head>
+                  <body>
+                  <!--header-->
+                  %s
+                  <!--script-->
+                  %s
+                  <!--footer-->
+                  %s
+                  </body></html>\n"""%(HTML_DOCTYPE,
+                                       HTML_XMLNS,
+                                       self.title,
+                                       self.css,
+                                       self.header,
+                                       output,
+                                       self.footer)
        return html

    def filter_rawhtml(self,text):
@ -250,7 +300,7 @@ class PrettyPrint:
        output = self.fontify(text)
        return self.header + output + self.footer

-    ### fontify engine
+    ### Fontify engine

    def fontify(self,pytext):

@ -277,7 +327,9 @@ class PrettyPrint:
        return string.join(l,'')

 def addsplits(splits,text,formats,taglist):
-    # helper for fontify()
+
+    """ Helper for .fontify()
+    """
    for id,left,right,sublist in taglist:
        try:
            pre,post = formats[id]
@ -297,12 +349,12 @@ def addsplits(splits,text,formats,taglist):
 def write_html_error(titel,text):

    print """\
-html><head><title>%s</title></head>
+%s<html%s><head><title>%s</title></head>
 <body>
 <h2>%s</h2>
 %s
 </body></html>
-""" % (titel,titel,text)
+""" % (HTML_DOCTYPE,HTML_XMLNS,titel,titel,text)

 def redirect_to(url):

@ -310,13 +362,13 @@ def redirect_to(url):
    sys.stdout.write('Status: 302\r\n')
    sys.stdout.write('Location: %s\r\n\r\n' % url)
    print """
-<html><head>
+%s<html%s><head>
 <title>302 Moved Temporarily</title>
 </head><body>
 <h1>302 Moved Temporarily</h1>
-<p>The document has moved to <a href="%s">%s</a>.</p>
+The document has moved to <a href="%s">%s</a>.<p></p>
 </body></html>
-""" % (url,url)
+""" % (HTML_DOCTYPE,HTML_XMLNS,url,url)

 def main(cmdline):

@ -337,18 +389,18 @@ def main(cmdline):
            break
    files = cmdline[len(options)+1:]

-    # create converting object	
+    ### create converting object

    # load fontifier
    if '-marcs' in options:
-	# use mxTextTool's tagging engine
-	from mxTextTools import tag
-	from mxTextTools.Examples.Python import python_script
+        # use mxTextTool's tagging engine as fontifier
+        from mx.TextTools import tag
+        from mx.TextTools.Examples.Python import python_script
        tagfct = lambda text,tag=tag,pytable=python_script: \
                 tag(text,pytable)[1]
        print "Py2HTML: using Marc's tagging engine"
    else:
-	# load Just's
+        # load Just's fontifier
        try:
            import PyFontify
            if PyFontify.__version__ < '0.3': raise ValueError
@ -358,7 +410,7 @@ def main(cmdline):
    Sorry, but this script needs the PyFontify.py module version 0.3;
    You can download it from Just's homepage at

-       URL: http://starship.skyport.net/crew/just
+       URL: http://starship.python.net/crew/just
 """
            sys.exit()

@ -378,7 +430,7 @@ def main(cmdline):
    c = PrettyPrint(tagfct,format,mode)
    convert = c.file_filter

-    # start working
+    ### start working

    if '-title' in options:
        c.title = optvalues['-title']
--- a/lib/viewcvs.py
+++ b/lib/viewcvs.py
@ -1282,6 +1282,7 @@ def markup_stream_python(fp, cfg):
  ### mailtos as well as we do.
  html = cgi.escape(fp.read())
  pp = py2html.PrettyPrint(PyFontify.fontify, "rawhtml", "color")
+  pp.set_mode_rawhtml_color()
  html = pp.fontify(html)
  html = re.sub(_re_rewrite_url, r'<a href="\1">\1</a>', html)
  html = re.sub(_re_rewrite_email, r'<a href="mailto:\1">\1</a>', html)
--- a/templates/docroot/styles.css
+++ b/templates/docroot/styles.css
@ -67,7 +67,6 @@ form { margin: 0; }
  background-color: #eeeeee;
 }

-
 /*** Highlight Markup Styles ***/
 #vc_markup .num  { color: #000000; }
 #vc_markup .esc  { color: #bd8d8b; }
@ -81,8 +80,13 @@ form { margin: 0; }
 #vc_markup .kwa  { color: #9c20ee; font-weight: bold; }
 #vc_markup .kwb  { color: #208920; }
 #vc_markup .kwc  { color: #0000ff; }
-#vc_markup .kwd  { color:#000000; }
+#vc_markup .kwd  { color: #404040; }

+/*** Py2html Markup Styles  ***/
+#vc_markup .PY_STRING     { color: #bd8d8b; }
+#vc_markup .PY_COMMENT    { color: #ac2020; font-style: italic; }
+#vc_markup .PY_KEYWORD    { color: #9c20ee; font-weight: bold; }
+#vc_markup .PY_IDENTIFIER { color: #404040; }

 /*** Line numbers outputted by highlight colorizer ***/
 .line {