Some improvements to the py2html integration. Thanks (I think...) to

David Martínez Moreno (Ender) for sending me down this path.

* lib/py2html.py
  Update to version 0.8.  Patch by David Martínez Moreno <ender@debian.org>.

* lib/viewcvs.py
  (markup_stream_python):  Call PrettyPrint.set_mode_rawhtml_color().

* lib/PyFontify.py
  Rework this file to use spaces instead of tabs.  Use the 're' module
  instead of 'regex'.  Fix a bug which caused "raise" to not be
  noticed as a keyword (though "rais" would be).  Add "assert" and
  "exec" to the keywords list.

* templates/docroot/styles.css
  Reformat the highlight styles, and made the .kwd style have a little color.
  Also, add the styles that py2html uses.


git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@1282 8cb11bc2-c004-0410-86c3-e597b4017df7
remotes/tags/1.0.0-rc1
cmpilato 2006-03-13 19:56:31 +00:00
parent 342ec95573
commit 34d32b394e
4 changed files with 478 additions and 400 deletions

View File

@ -1,16 +1,27 @@
"""Module to analyze Python source code; for syntax coloring tools.
Interface:
tags = fontify(pytext, searchfrom, searchto)
The 'pytext' argument is a string containing Python source code.
The (optional) arguments 'searchfrom' and 'searchto' may contain a slice in pytext.
The PYTEXT argument is a string containing Python source code. The
(optional) arguments SEARCHFROM and SEARCHTO may contain a slice in
PYTEXT.
The returned value is a list of tuples, formatted like this:
[('keyword', 0, 6, None), ('keyword', 11, 17, None), ('comment', 23, 53, None), etc. ]
[('keyword', 0, 6, None),
('keyword', 11, 17, None),
('comment', 23, 53, None),
...
]
The tuple contents are always like this:
(tag, startindex, endindex, sublist)
tag is one of 'keyword', 'string', 'comment' or 'identifier'
sublist is not used, hence always None.
TAG is one of 'keyword', 'string', 'comment' or 'identifier'
SUBLIST is not used, hence always None.
"""
# Based on FontText.py by Mitchell S. Chapman,
@ -29,79 +40,73 @@ __version__ = "0.3.1"
import string, re
# This list of keywords is taken from ref/node13.html of the
# Python 1.3 HTML documentation. ("access" is intentionally omitted.)
keywordsList = ["and", "assert", "break", "class", "continue", "def",
"del", "elif", "else", "except", "exec", "finally",
"for", "from", "global", "if", "import", "in", "is",
"lambda", "not", "or", "pass", "print", "raise",
"return", "try", "while",
]
# First a little helper, since I don't like to repeat things. (Tismer speaking)
def replace(where, what, with):
return string.join(string.split(where, what), with)
# This list of keywords is taken from ref/node13.html of the
# Python 1.3 HTML documentation. ("access" is intentionally omitted.)
keywordsList = [
"del", "from", "lambda", "return",
"and", "elif", "global", "not", "try",
"break", "else", "if", "or", "while",
"class", "except", "import", "pass",
"continue", "finally", "in", "print",
"def", "for", "is", "raise"]
# Build up a regular expression which will match anything
# interesting, including multi-line triple-quoted strings.
# A regexp for matching Python comments.
commentPat = "#.*"
pat = "q[^\q\n]*\(\\\\[\000-\377][^\q\n]*\)*q"
quotePat = replace(pat, "q", "'") + "\|" + replace(pat, 'q', '"')
# A regexp for matching simple quoted strings.
pat = "q[^q\\n]*(\\[\000-\377][^q\\n]*)*q"
quotePat = replace(pat, "q", "'") + "|" + replace(pat, 'q', '"')
# Way to go, Tim!
# A regexp for matching multi-line tripled-quoted strings. (Way to go, Tim!)
pat = """
qqq
[^\\q]*
\(
\( \\\\[\000-\377]
\| q
\( \\\\[\000-\377]
\| [^\\q]
\| q
\( \\\\[\000-\377]
\| [^\\q]
\)
\)
\)
[^\\q]*
\)*
[^q]*
(
( \\[\000-\377]
| q
( \\[\000-\377]
| [^q]
| q
( \\[\000-\377]
| [^q]
)
)
)
[^q]*
)*
qqq
"""
pat = string.join(string.split(pat), '') # get rid of whitespace
tripleQuotePat = replace(pat, "q", "'") + "\|" + replace(pat, 'q', '"')
tripleQuotePat = replace(pat, "q", "'") + "|" + replace(pat, 'q', '"')
# Build up a regular expression which matches all and only
# Python keywords. This will let us skip the uninteresting
# identifier references.
# nonKeyPat identifies characters which may legally precede
# a keyword pattern.
nonKeyPat = "\(^\|[^a-zA-Z0-9_.\"']\)"
# A regexp which matches all and only Python keywords. This will let
# us skip the uninteresting identifier references.
nonKeyPat = "(^|[^a-zA-Z0-9_.\"'])" # legal keyword-preceding characters
keyPat = nonKeyPat + "(" + string.join(keywordsList, "|") + ")" + nonKeyPat
keyPat = nonKeyPat + "\("
for keyword in keywordsList:
keyPat = keyPat + keyword + "\|"
keyPat = keyPat[:-2] + "\)" + nonKeyPat
# Our final syntax-matching regexp is the concatation of the regexp's we
# constructed above.
syntaxPat = keyPat + \
"|" + commentPat + \
"|" + tripleQuotePat + \
"|" + quotePat
syntaxRE = re.compile(syntaxPat)
matchPat = keyPat + "\|" + commentPat + "\|" + tripleQuotePat + "\|" + quotePat
matchRE = re.compile(matchPat)
idKeyPat = "[ \t]*[A-Za-z_][A-Za-z_0-9.]*" # Ident w. leading whitespace.
# Finally, we construct a regexp for matching indentifiers (with
# optional leading whitespace).
idKeyPat = "[ \t]*[A-Za-z_][A-Za-z_0-9.]*"
idRE = re.compile(idKeyPat)
def fontify(pytext, searchfrom=0, searchto=None):
if searchto is None:
searchto = len(pytext)
# Cache a few attributes for quicker reference.
search = matchRE.search
group = matchRE.group
idSearch = idRE.search
idGroup = idRE.group
tags = []
tags_append = tags.append
commentTag = 'comment'
stringTag = 'string'
keywordTag = 'keyword'
@ -110,36 +115,48 @@ def fontify(pytext, searchfrom = 0, searchto = None):
start = 0
end = searchfrom
while 1:
start = search(pytext, end)
if start < 0 or start >= searchto:
break # EXIT LOOP
match = group(0)
# Look for some syntax token we're interested in. If find
# nothing, we're done.
matchobj = syntaxRE.search(pytext, end)
if not matchobj:
break
# If we found something outside our search area, it doesn't
# count (and we're done).
start = matchobj.start()
if start >= searchto:
break
match = matchobj.group(0)
end = start + len(match)
c = match[0]
if c not in "#'\"":
# Must have matched a keyword.
if start <> searchfrom:
if c == '#':
# We matched a comment.
tags.append((commentTag, start, end, None))
elif c == '"' or c == '\'':
# We matched a string.
tags.append((stringTag, start, end, None))
else:
# We matched a keyword.
if start != searchfrom:
# there's still a redundant char before and after it, strip!
match = match[1:-1]
start = start + 1
else:
# this is the first keyword in the text.
# This is the first keyword in the text.
# Only a space at the end.
match = match[:-1]
end = end - 1
tags_append((keywordTag, start, end, None))
tags.append((keywordTag, start, end, None))
# If this was a defining keyword, look ahead to the
# following identifier.
if match in ["def", "class"]:
start = idSearch(pytext, end)
if start == end:
match = idGroup(0)
end = start + len(match)
tags_append((identifierTag, start, end, None))
elif c == "#":
tags_append((commentTag, start, end, None))
else:
tags_append((stringTag, start, end, None))
matchobj = idRE.search(pytext, end)
if matchobj:
start = matchobj.start()
if start == end and start < searchto:
end = start + len(matchobj.group(0))
tags.append((identifierTag, start, end, None))
return tags
@ -150,3 +167,7 @@ def test(path):
tags = fontify(text)
for tag, start, end, sublist in tags:
print tag, `text[start:end]`
if __name__ == "__main__":
import sys
test(sys.argv[0])

View File

@ -1,6 +1,6 @@
#!/usr/local/bin/python -u
#!/usr/bin/python -u
""" Python Highlighter for HTML Version: 0.5
""" Python Highlighter Version: 0.8
py2html.py [options] files...
@ -10,8 +10,8 @@
-stdout read from files, write to stdout
-files read from files, write to filename+'.html' (default)
-format:
html output HTML page (default)
rawhtml output pure HTML (without headers, titles, etc.)
html output XHTML page (default)
rawhtml output pure XHTML (without headers, titles, etc.)
-mode:
color output in color (default)
mono output b/w (for printing)
@ -25,7 +25,7 @@
-v verbose
Takes the input, assuming it is Python code and formats it into
colored HTML. When called without parameters the script tries to
colored XHTML. When called without parameters the script tries to
work in CGI mode. It looks for a field 'script=URL' and tries to
use that URL as input file. If it can't find this field, the path
info (the part of the URL following the CGI script name) is
@ -34,46 +34,42 @@
* Uses Just van Rossum's PyFontify version 0.3 to tag Python scripts.
You can get it via his homepage on starship:
URL: http://starship.skyport.net/crew/just
URL: http://starship.python.net/crew/just
"""
__comments__ = """
The following snippet is a small shell script I use for viewing
Python scripts per less on Unix:
Python scripts via less on Unix:
pyless:
#!/bin/sh
# Browse pretty printed Python code using ANSI codes for highlighting
py2html -stdout -format:ansi -mode:mono $* | less -r
py2html -stdout -format:ansi -mode:color $* | less -r
History:
0.8: Added patch by Patrick Lynch to have py2html.py use style
sheets for markup
0.7: Added patch by Ville Skyttä to make py2html.py output
valid XHTML.
0.6: Fixed a bug in .escape_html(); thanks to Vespe Savikko for
finding this one.
0.5: Added a few suggestions by Kevin Ng to make the CGI version
a little more robust.
"""
__copyright__ = """
-----------------------------------------------------------------------------
(c) Copyright by Marc-Andre Lemburg, 1998 (mailto:mal@lemburg.com)
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee or royalty is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation or portions thereof, including modifications,
that you make.
THE AUTHOR MARC-ANDRE LEMBURG DISCLAIMS ALL WARRANTIES WITH REGARD TO
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL,
INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
WITH THE USE OR PERFORMANCE OF THIS SOFTWARE !
__copyright__ = """\
Copyright (c) 1998-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
Copyright (c) 2000-2002, eGenix.com Software GmbH; mailto:info@egenix.com
Distributed under the terms and conditions of the eGenix.com Public
License. See http://www.egenix.com/files/python/mxLicense.html for
details, or contact the author. All Rights Reserved.\
"""
__version__ = '0.5'
__version__ = '0.8'
__cgifooter__ = ('\n<pre># code highlighted using <a href='
'"http://starship.skyport.net/~lemburg/">py2html.py</a> '
'"http://www.lemburg.com/files/python/">py2html.py</a> '
'version %s</pre>\n' % __version__)
import sys,string,re
@ -86,7 +82,11 @@ sys.path.append('.')
# URL of the input form the user is redirected to in case no script=xxx
# form field is given. The URL *must* be absolute. Leave blank to
# have the script issue an error instead.
INPUT_FORM = 'http://starship.skyport.net/~lemburg/SoftwareDescriptions.html#py2html.py'
INPUT_FORM = 'http://www.lemburg.com/files/python/SoftwareDescriptions.html#py2html.py'
# HTML DOCTYPE and XML namespace
HTML_DOCTYPE = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
HTML_XMLNS = ' xmlns="http://www.w3.org/1999/xhtml"'
### Helpers
@ -116,7 +116,7 @@ class PrettyPrint:
# --------------------------
# rawhtml | x x (HTML without headers, etc.)
# html | x x (a HTML page with HEAD&BODY:)
# ansi | x (with Ansi-escape sequences)
# ansi | x x (with Ansi-escape sequences)
* interfaces:
@ -141,6 +141,7 @@ class PrettyPrint:
# misc settings
title = ''
bgcolor = '#FFFFFF'
css = ''
header = ''
footer = ''
replace_URLs = 0
@ -150,18 +151,18 @@ class PrettyPrint:
def __init__(self,tagfct=None,format='html',mode='color'):
self.tag = tagfct
self.set_mode = getattr(self,'set_mode_'+format+'_'+mode)
self.filter = getattr(self,'filter_'+format)
self.set_mode()
self.set_mode = getattr(self,'set_mode_%s_%s' % (format, mode))
self.filter = getattr(self,'filter_%s' % format)
def file_filter(self,infile,outfile):
self.set_mode()
text = fileio(infile,'r')
if type(infile) == type('') and self.title == '':
self.title = infile
fileio(outfile,'w',self.filter(text))
### set pre- and postfixes for formats & modes
### Set pre- and postfixes for formats & modes
#
# These methods must set self.formats to a dictionary having
# an entry for every tag returned by the tagging function.
@ -175,30 +176,54 @@ class PrettyPrint:
def set_mode_html_color(self):
self.css = """
<STYLE TYPE="text/css">
<!--
body{ background: %s; }
.PY_KEYWORD{ color: #0000C0; font-weight: bold; }
.PY_COMMENT{ color: #000080; }
.PY_PARAMETER{ color: #C00000; }
.PY_IDENTIFIER{ color: #C00000; font-weight: bold; }
.PY_STRING{ color: #008000; }
-->
</STYLE> """ % self.bgcolor
self.formats = {
'all':('<pre>','</pre>'),
'comment':('<span style="color:#1111CC">','</span>'),
'keyword':('<span style="color:#3333CC"><strong>','</strong></span>'),
'parameter':('<span style="color:#000066>','</span>'),
'comment':('<span class="PY_COMMENT">','</span>'),
'keyword':('<span class="PY_KEYWORD">','</span>'),
'parameter':('<span class="PY_PARAMETER">','</span>'),
'identifier':( lambda x,strip=string.strip:
'<a name="%s"><span style="color:#CC0000"><strong>' % (strip(x)),
'</strong></span></a>'),
'string':('<span style="color:#115511">','</span>')
'<a name="%s"><span class="PY_IDENTIFIER">' % (strip(x)),
'</span></a>'),
'string':('<span class="PY_STRING">','</span>')
}
set_mode_rawhtml_color = set_mode_html_color
def set_mode_html_mono(self):
self.css = """
<STYLE TYPE="text/css">
<!--
body{ background-color: %s }
.PY_KEYWORD{ text-decoration: underline }
.PY_COMMENT{ }
.PY_PARAMETER{ }
.PY_IDENTIFIER{ font-weight: bold}
.PY_STRING{ font-style: italic}
-->
</STYLE> """ % self.bgcolor
self.formats = {
'all':('<pre>','</pre>'),
'comment':('',''),
'keyword':( '<span style="text-decoration:underline">','</span>'),
'parameter':('',''),
'comment':('<span class="PY_COMMENT">','</span>'),
'keyword':( '<span class="PY_KEYWORD">','</span>'),
'parameter':('<span class="PY_PARAMETER">','</span>'),
'identifier':( lambda x,strip=string.strip:
'<a name="%s"><strong>' % (strip(x)),
'</strong>'),
'string':('','')
'<a name="%s"><span class="PY_IDENTIFIER">' % (strip(x)),
'</span></a>'),
'string':('<span class="PY_STRING">','</span>')
}
set_mode_rawhtml_mono = set_mode_html_mono
@ -214,11 +239,22 @@ class PrettyPrint:
'string':('','')
}
### filter for Python scripts given as string
def set_mode_ansi_color(self):
self.formats = {
'all':('',''),
'comment':('\033[34;2m','\033[m'),
'keyword':('\033[1;34m','\033[m'),
'parameter':('',''),
'identifier':('\033[1;31m','\033[m'),
'string':('\033[32;2m','\033[m')
}
### Filters for Python scripts given as string
def escape_html(self,text):
t = (('<','&lt;'),('>','&gt;'))
t = (('&','&amp;'),('<','&lt;'),('>','&gt;'))
for x,y in t:
text = string.join(string.split(text,x),y)
return text
@ -229,12 +265,26 @@ class PrettyPrint:
if self.replace_URLs:
output = re.sub('URL:([ \t]+)([^ \n\r<]+)',
'URL:\\1<a href="\\2">\\2</a>',output)
html = """<html><head><title>%s</title></head>
<body style="background-color:%s">
<!--header-->%s
<!--script-->%s
<!--footer-->%s
</body>\n"""%(self.title,self.bgcolor,self.header,output,self.footer)
html = """%s<html%s>
<head>
<title>%s</title>
<!--css-->
%s
</head>
<body>
<!--header-->
%s
<!--script-->
%s
<!--footer-->
%s
</body></html>\n"""%(HTML_DOCTYPE,
HTML_XMLNS,
self.title,
self.css,
self.header,
output,
self.footer)
return html
def filter_rawhtml(self,text):
@ -250,7 +300,7 @@ class PrettyPrint:
output = self.fontify(text)
return self.header + output + self.footer
### fontify engine
### Fontify engine
def fontify(self,pytext):
@ -277,7 +327,9 @@ class PrettyPrint:
return string.join(l,'')
def addsplits(splits,text,formats,taglist):
# helper for fontify()
""" Helper for .fontify()
"""
for id,left,right,sublist in taglist:
try:
pre,post = formats[id]
@ -297,12 +349,12 @@ def addsplits(splits,text,formats,taglist):
def write_html_error(titel,text):
print """\
html><head><title>%s</title></head>
%s<html%s><head><title>%s</title></head>
<body>
<h2>%s</h2>
%s
</body></html>
""" % (titel,titel,text)
""" % (HTML_DOCTYPE,HTML_XMLNS,titel,titel,text)
def redirect_to(url):
@ -310,13 +362,13 @@ def redirect_to(url):
sys.stdout.write('Status: 302\r\n')
sys.stdout.write('Location: %s\r\n\r\n' % url)
print """
<html><head>
%s<html%s><head>
<title>302 Moved Temporarily</title>
</head><body>
<h1>302 Moved Temporarily</h1>
<p>The document has moved to <a href="%s">%s</a>.</p>
The document has moved to <a href="%s">%s</a>.<p></p>
</body></html>
""" % (url,url)
""" % (HTML_DOCTYPE,HTML_XMLNS,url,url)
def main(cmdline):
@ -337,18 +389,18 @@ def main(cmdline):
break
files = cmdline[len(options)+1:]
# create converting object
### create converting object
# load fontifier
if '-marcs' in options:
# use mxTextTool's tagging engine
from mxTextTools import tag
from mxTextTools.Examples.Python import python_script
# use mxTextTool's tagging engine as fontifier
from mx.TextTools import tag
from mx.TextTools.Examples.Python import python_script
tagfct = lambda text,tag=tag,pytable=python_script: \
tag(text,pytable)[1]
print "Py2HTML: using Marc's tagging engine"
else:
# load Just's
# load Just's fontifier
try:
import PyFontify
if PyFontify.__version__ < '0.3': raise ValueError
@ -358,7 +410,7 @@ def main(cmdline):
Sorry, but this script needs the PyFontify.py module version 0.3;
You can download it from Just's homepage at
URL: http://starship.skyport.net/crew/just
URL: http://starship.python.net/crew/just
"""
sys.exit()
@ -378,7 +430,7 @@ def main(cmdline):
c = PrettyPrint(tagfct,format,mode)
convert = c.file_filter
# start working
### start working
if '-title' in options:
c.title = optvalues['-title']

View File

@ -1282,6 +1282,7 @@ def markup_stream_python(fp, cfg):
### mailtos as well as we do.
html = cgi.escape(fp.read())
pp = py2html.PrettyPrint(PyFontify.fontify, "rawhtml", "color")
pp.set_mode_rawhtml_color()
html = pp.fontify(html)
html = re.sub(_re_rewrite_url, r'<a href="\1">\1</a>', html)
html = re.sub(_re_rewrite_email, r'<a href="mailto:\1">\1</a>', html)

View File

@ -67,7 +67,6 @@ form { margin: 0; }
background-color: #eeeeee;
}
/*** Highlight Markup Styles ***/
#vc_markup .num { color: #000000; }
#vc_markup .esc { color: #bd8d8b; }
@ -81,8 +80,13 @@ form { margin: 0; }
#vc_markup .kwa { color: #9c20ee; font-weight: bold; }
#vc_markup .kwb { color: #208920; }
#vc_markup .kwc { color: #0000ff; }
#vc_markup .kwd { color:#000000; }
#vc_markup .kwd { color: #404040; }
/*** Py2html Markup Styles ***/
#vc_markup .PY_STRING { color: #bd8d8b; }
#vc_markup .PY_COMMENT { color: #ac2020; font-style: italic; }
#vc_markup .PY_KEYWORD { color: #9c20ee; font-weight: bold; }
#vc_markup .PY_IDENTIFIER { color: #404040; }
/*** Line numbers outputted by highlight colorizer ***/
.line {