Bug 82651 - Tika&Sphinx&chardet content indexing (done!)

git-svn-id: svn://svn.office.custis.ru/3rdparty/viewvc.org/trunk@1388 6955db30-a419-402b-8a0d-67ecbb4d7f56
remotes/github/custis
vfilippov 2011-09-27 16:13:53 +00:00 committed by Vitaliy Filippov
parent 83c7e6fe49
commit e363cf19b1
10 changed files with 761 additions and 332 deletions

View File

@ -44,6 +44,7 @@ CREATE TABLE branches (
DROP TABLE IF EXISTS checkins; DROP TABLE IF EXISTS checkins;
CREATE TABLE checkins ( CREATE TABLE checkins (
id int NOT NULL AUTO_INCREMENT PRIMARY KEY,
type enum('Change','Add','Remove'), type enum('Change','Add','Remove'),
ci_when datetime DEFAULT '0000-00-00 00:00:00' NOT NULL, ci_when datetime DEFAULT '0000-00-00 00:00:00' NOT NULL,
whoid mediumint(9) DEFAULT '0' NOT NULL, whoid mediumint(9) DEFAULT '0' NOT NULL,
@ -57,7 +58,7 @@ CREATE TABLE checkins (
removedlines int(11) DEFAULT '0' NOT NULL, removedlines int(11) DEFAULT '0' NOT NULL,
descid mediumint(9), descid mediumint(9),
UNIQUE repositoryid (repositoryid,dirid,fileid,revision), UNIQUE repositoryid (repositoryid,dirid,fileid,revision),
KEY repository_when (repositoryid,ci_when), KEY repositoryid_when (repositoryid,ci_when),
KEY ci_when (ci_when), KEY ci_when (ci_when),
KEY whoid (whoid,ci_when), KEY whoid (whoid,ci_when),
KEY dirid (dirid), KEY dirid (dirid),
@ -138,6 +139,7 @@ CREATE TABLE branches (
DROP TABLE IF EXISTS commits; DROP TABLE IF EXISTS commits;
CREATE TABLE commits ( CREATE TABLE commits (
id int NOT NULL AUTO_INCREMENT PRIMARY KEY,
type enum('Change','Add','Remove'), type enum('Change','Add','Remove'),
ci_when datetime DEFAULT '0000-00-00 00:00:00' NOT NULL, ci_when datetime DEFAULT '0000-00-00 00:00:00' NOT NULL,
whoid mediumint(9) DEFAULT '0' NOT NULL, whoid mediumint(9) DEFAULT '0' NOT NULL,
@ -151,9 +153,9 @@ CREATE TABLE commits (
removedlines int(11) DEFAULT '0' NOT NULL, removedlines int(11) DEFAULT '0' NOT NULL,
descid mediumint(9), descid mediumint(9),
UNIQUE repositoryid (repositoryid,dirid,fileid,revision), UNIQUE repositoryid (repositoryid,dirid,fileid,revision),
KEY repositoryid_when (repositoryid,ci_when),
KEY ci_when (ci_when), KEY ci_when (ci_when),
KEY whoid (whoid), KEY whoid (whoid,ci_when),
KEY repositoryid_2 (repositoryid),
KEY dirid (dirid), KEY dirid (dirid),
KEY fileid (fileid), KEY fileid (fileid),
KEY branchid (branchid), KEY branchid (branchid),
@ -253,7 +255,7 @@ Options:
[Default: ViewVC] [Default: ViewVC]
--help Show this usage message. --help Show this usage message.
--hostname=ARG Use ARG as the hostname for the MySQL connection. --hostname=ARG Use ARG as the hostname for the MySQL connection.
[Default: localhost] [Default: localhost]
@ -264,7 +266,7 @@ Options:
--version=ARG Create the database using the schema employed by --version=ARG Create the database using the schema employed by
version ARG of ViewVC. Valid values are: version ARG of ViewVC. Valid values are:
[ "1.0" ] [ "1.0" ]
""" % (os.path.basename(sys.argv[0]))) """ % (os.path.basename(sys.argv[0])))
if errmsg is not None: if errmsg is not None:
stream.write("[ERROR] %s.\n" % (errmsg)) stream.write("[ERROR] %s.\n" % (errmsg))

View File

@ -58,7 +58,11 @@ else:
import os import os
import string import string
import socket
import select
import re import re
import mimetypes
import time
import svn.core import svn.core
import svn.repos import svn.repos
@ -68,14 +72,20 @@ import svn.delta
import cvsdb import cvsdb
import viewvc import viewvc
import vclib import vclib
from viewvcmagic import ContentMagic
class SvnRepo: class SvnRepo:
"""Class used to manage a connection to a SVN repository.""" """Class used to manage a connection to a SVN repository."""
def __init__(self, path): def __init__(self, path, index_content = None, tika_client = None, guesser = None,
svn_ignore_mimetype = False):
self.path = path self.path = path
self.repo = svn.repos.svn_repos_open(path) self.repo = svn.repos.svn_repos_open(path)
self.fs = svn.repos.svn_repos_fs(self.repo) self.fs = svn.repos.svn_repos_fs(self.repo)
self.rev_max = svn.fs.youngest_rev(self.fs) self.rev_max = svn.fs.youngest_rev(self.fs)
self.index_content = index_content
self.tika_client = tika_client
self.guesser = guesser
self.svn_ignore_mimetype = svn_ignore_mimetype
def __getitem__(self, rev): def __getitem__(self, rev):
if rev is None: if rev is None:
rev = self.rev_max rev = self.rev_max
@ -128,6 +138,74 @@ def _get_diff_counts(diff_fp):
line = diff_fp.readline() line = diff_fp.readline()
return plus, minus return plus, minus
class TikaClient:
# Create tika client
def __init__(self, tika_server, mime_types):
self.tika_server = tika_server
self.mime_types = mime_types
self.addr = tika_server.split(':')
# Split address
if len(self.addr) != 2:
raise Exception('tika_server value is incorrect: \''+tika_server+'\', please use \'host:port\' format')
self.addr = (self.addr[0], int(self.addr[1]))
# Build regexp for MIME types
m = re.split('\s+', mime_types.strip())
self.mime_regexp = re.compile('|'.join('^'+re.escape(i).replace('\\*', '.*')+'$' for i in m))
# Extract text content from file using Tika which runs in server mode
def get_text(self, filename, mime_type, log_filename):
if not self.mime_regexp.match(mime_type):
# Tika can't handle this mime type, return nothing
return ''
fd = None
s = None
text = ''
fsize = 0
try:
# Read original file
fd = open(filename, 'rb')
data = fd.read()
fsize = len(data)
if not fsize:
return ''
# Connect to Tika
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(self.addr)
s.setblocking(0)
sockfd = s.fileno()
# Tika is somewhat delicate about network IO, so:
# Read and write using poll(2) system call
p = select.poll()
p.register(sockfd)
while 1:
fds = p.poll()
if not fds:
break
(pollfd, event) = fds[0]
if event & select.POLLIN:
# Exception or empty data means EOF...
try: part = os.read(sockfd, 65536)
except: break
if not part: break
text += part
if event & select.POLLOUT:
if not len(data):
# Shutdown output and forget about POLLOUT
s.shutdown(socket.SHUT_WR)
p.modify(sockfd, select.POLLIN)
else:
# Write and consume some data
l = os.write(sockfd, data)
data = data[l:]
if len(text) == 0:
raise Exception('Empty response from Tika server')
print "Extracted %d bytes from %s (%s) of size %d" % (len(text), log_filename, mime_type, fsize)
except Exception, e:
print "Error extracting text from %s (%s) of size %d: %s" % (log_filename, mime_type, fsize, str(e))
finally:
if fd: fd.close()
if s: s.close()
return text
class SvnRev: class SvnRev:
"""Class used to hold information about a particular revision of """Class used to hold information about a particular revision of
@ -151,7 +229,7 @@ class SvnRev:
# get a root for the current revisions # get a root for the current revisions
fsroot = self._get_root_for_rev(rev) fsroot = self._get_root_for_rev(rev)
# find changes in the revision # find changes in the revision
editor = svn.repos.RevisionChangeCollector(repo.fs, rev) editor = svn.repos.RevisionChangeCollector(repo.fs, rev)
e_ptr, e_baton = svn.delta.make_editor(editor) e_ptr, e_baton = svn.delta.make_editor(editor)
@ -168,7 +246,7 @@ class SvnRev:
base_root = None base_root = None
if change.base_path: if change.base_path:
base_root = self._get_root_for_rev(change.base_rev) base_root = self._get_root_for_rev(change.base_rev)
if not change.path: if not change.path:
action = 'remove' action = 'remove'
elif change.added: elif change.added:
@ -184,19 +262,53 @@ class SvnRev:
['-b', '-B']) ['-b', '-B'])
diff_fp = diffobj.get_pipe() diff_fp = diffobj.get_pipe()
plus, minus = _get_diff_counts(diff_fp) plus, minus = _get_diff_counts(diff_fp)
# TODO Indexing file contents
# For binary files: svn.fs.contents_changed(root1, path1, root2, path2)
# Temp file with contents is at: diffobj.tempfile2
# Apache Tika server may even be at another host!
# CustIS Bug 50473: a workaround for svnlib behaviour in file movements (FILE1 -> FILE2 + FILE1 -> null) # CustIS Bug 50473: a workaround for svnlib behaviour in file movements (FILE1 -> FILE2 + FILE1 -> null)
if change.base_path: if change.base_path:
if not change.path and changes_hash.get(change.base_path, '') != '': if not change.path and change.base_path in changes_hash:
minus = 0 minus = 0
elif change.path: elif change.path:
changes_hash[change.base_path] = change.path changes_hash[change.base_path] = change.path
self.changes.append((path, action, plus, minus)) content = ''
mime = ''
# need to check if binary file's content changed when copying,
# if not, don't extract it, just get it from previous revision later
if repo.index_content and change.path and (not change.base_path
or svn.fs.contents_changed(
base_root and base_root or None,
base_root and change.base_path or None,
fsroot, change.path
)):
props = svn.fs.node_proplist(fsroot, change.path)
if not repo.svn_ignore_mimetype:
mime = props.get('svn:mime-type', None)
else:
mime = None
mime = repo.guesser.guess_mime(
mime,
os.path.basename(change.path),
diffobj.tempfile2
)
# Read and guess charset by ourselves for text files
if mime.startswith('text/') or (mime.startswith('application/') and mime.endswith('xml')):
try:
fd = open(diffobj.tempfile2, 'rb')
content = fd.read()
fd.close()
except: pass
# Guess charset
if content:
content, charset = repo.guesser.guess_charset(content)
if charset:
content = content.encode('utf-8')
print 'Guessed %s for %s' % (charset, change.path)
else:
print 'Failed to guess charset for %s, not indexing' % (change.path, )
# Try to extract content using Tika from binary documents
elif repo.tika_client:
content = repo.tika_client.get_text(diffobj.tempfile2, mime, change.path)
self.changes.append((path, action, plus, minus, content, mime))
def _get_root_for_rev(self, rev): def _get_root_for_rev(self, rev):
"""Fetch a revision root from a cache of such, or a fresh root """Fetch a revision root from a cache of such, or a fresh root
@ -217,7 +329,7 @@ def handle_revision(db, command, repo, rev, verbose, force=0):
if verbose: print "skipped (no changes)." if verbose: print "skipped (no changes)."
return return
for (path, action, plus, minus) in revision.changes: for (path, action, plus, minus, content, mime) in revision.changes:
directory, file = os.path.split(path) directory, file = os.path.split(path)
commit = cvsdb.CreateCommit() commit = cvsdb.CreateCommit()
commit.SetRepository(repo.path) commit.SetRepository(repo.path)
@ -230,6 +342,8 @@ def handle_revision(db, command, repo, rev, verbose, force=0):
commit.SetPlusCount(plus) commit.SetPlusCount(plus)
commit.SetMinusCount(minus) commit.SetMinusCount(minus)
commit.SetBranch(None) commit.SetBranch(None)
commit.SetContent(content)
commit.SetMimeType(mime)
if action == 'add': if action == 'add':
commit.SetTypeAdd() commit.SetTypeAdd()
@ -268,7 +382,16 @@ def main(command, repository, revs=[], verbose=0, force=0):
sys.stderr.write("ERROR: " + str(e) + "\n") sys.stderr.write("ERROR: " + str(e) + "\n")
sys.exit(1) sys.exit(1)
repo = SvnRepo(repository) tika_client = None
if cfg.utilities.tika_server:
tika_client = TikaClient(cfg.utilities.tika_server, cfg.utilities.tika_mime_types)
repo = SvnRepo(
path = repository,
index_content = cfg.cvsdb.index_content,
tika_client = tika_client,
guesser = cfg.guesser(),
svn_ignore_mimetype = cfg.options.svn_ignore_mimetype,
)
if command == 'rebuild' or (command == 'update' and not revs): if command == 'rebuild' or (command == 'update' and not revs):
for rev in range(repo.rev_max+1): for rev in range(repo.rev_max+1):
handle_revision(db, command, repo, rev, verbose) handle_revision(db, command, repo, rev, verbose)
@ -312,7 +435,7 @@ Usage: 1. %s [-v] rebuild REPOS-PATH
the database. If a range is specified, the revisions will be the database. If a range is specified, the revisions will be
processed in ascending order, and you may specify "HEAD" to processed in ascending order, and you may specify "HEAD" to
indicate "the youngest revision currently in the repository". indicate "the youngest revision currently in the repository".
3. Purge information specific to the repository located at REPOS-PATH 3. Purge information specific to the repository located at REPOS-PATH
from the database. from the database.
@ -337,7 +460,7 @@ if __name__ == '__main__':
del args[index] del args[index]
except ValueError: except ValueError:
pass pass
if len(args) < 3: if len(args) < 3:
usage() usage()

View File

@ -1,6 +1,6 @@
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------
# #
# Configuration file for ViewVC # Configuration file for ViewVC (4IntraNet patched version)
# #
# Information on ViewVC is located at the following web site: # Information on ViewVC is located at the following web site:
# http://viewvc.org/ # http://viewvc.org/
@ -9,7 +9,7 @@
# THE FORMAT OF THIS CONFIGURATION FILE # THE FORMAT OF THIS CONFIGURATION FILE
# #
# This file is delineated by sections, specified in [brackets]. Within # This file is delineated by sections, specified in [brackets]. Within
# each section, are a number of configuration settings. These settings # each section, are a number of configuration settings. These settings
# take the form of: name = value. Values may be continued on the # take the form of: name = value. Values may be continued on the
# following line by indenting the continued line. # following line by indenting the continued line.
@ -17,14 +17,14 @@
# WARNING: Indentation *always* means continuation. Name=value lines # WARNING: Indentation *always* means continuation. Name=value lines
# should always start in column zero. # should always start in column zero.
# #
# Comments should always start in column zero, and are identified # Comments should always start in column zero, and are identified
# with "#". # with "#".
# #
# Certain configuration settings may have multiple values. These should # Certain configuration settings may have multiple values. These should
# be separated by a comma. The settings where this is allowed are noted # be separated by a comma. The settings where this is allowed are noted
# below. Any other setting that requires special syntax is noted at that # below. Any other setting that requires special syntax is noted at that
# setting. # setting.
# #
# #
# SOME TERMINOLOGY USED HEREIN # SOME TERMINOLOGY USED HEREIN
# #
@ -50,10 +50,10 @@
# recommend you pay attention to. Of course, don't try to change the # recommend you pay attention to. Of course, don't try to change the
# options here -- do so in the relevant section of the configuration # options here -- do so in the relevant section of the configuration
# file below. # file below.
# #
# For correct operation, you will probably need to change the following # For correct operation, you will probably need to change the following
# configuration variables: # configuration variables:
# #
# cvs_roots (for CVS) # cvs_roots (for CVS)
# svn_roots (for Subversion) # svn_roots (for Subversion)
# root_parents (for CVS or Subversion) # root_parents (for CVS or Subversion)
@ -62,18 +62,18 @@
# rcs_dir # rcs_dir
# mime_types_file # mime_types_file
# the many options in the [utilities] section # the many options in the [utilities] section
# #
# It is usually desirable to change the following variables: # It is usually desirable to change the following variables:
# #
# address # address
# forbidden # forbidden
# #
# To optimize delivery of ViewVC static files: # To optimize delivery of ViewVC static files:
# #
# docroot # docroot
# #
# To customize the display of ViewVC for your site: # To customize the display of ViewVC for your site:
# #
# template_dir # template_dir
# the [templates] override section # the [templates] override section
# #
@ -139,7 +139,7 @@ default_root = cvs
# provided only as a convenience for ViewVC installations which are # provided only as a convenience for ViewVC installations which are
# using the default template set, where the value of this option will # using the default template set, where the value of this option will
# be displayed in the footer of every ViewVC page.) # be displayed in the footer of every ViewVC page.)
address = address =
# #
# This option provides a mechanism for custom key/value pairs to be # This option provides a mechanism for custom key/value pairs to be
@ -244,21 +244,47 @@ cvsnt =
# See also bin/cvsnt-rcsfile-inetd.pl # See also bin/cvsnt-rcsfile-inetd.pl
#rcsfile_socket = 'host:port' #rcsfile_socket = host:port
# Example: rcsfile_socket = '127.0.0.1:8071' # Example: rcsfile_socket = 127.0.0.1:8071
# Subversion command-line client, used for viewing Subversion repositories # Subversion command-line client, used for viewing Subversion repositories
svn = svn =
# svn = /usr/bin/svn # svn = /usr/bin/svn
# GNU diff, used for showing file version differences # GNU diff, used for showing file version differences
diff = diff =
# diff = /usr/bin/diff # diff = /usr/bin/diff
# CvsGraph, a graphical CVS version graph generator (see options.use_cvsgraph) # CvsGraph, a graphical CVS version graph generator (see options.use_cvsgraph)
cvsgraph = cvsgraph =
# cvsgraph = /usr/local/bin/cvsgraph # cvsgraph = /usr/local/bin/cvsgraph
# Apache Tika TCP server host and port, used to extract text from binary documents
# Note that as of 2011-09-12, Tika 0.9 has a bug which leads to hangups on processing
# M$Word documents in server mode. So you must use the fixed version, downloaded from:
# http://wiki.4intra.net/public/tika-app-0.9-fix-TIKA709.jar
# (mirror) http://code.google.com/p/mediawiki4intranet/downloads/detail?name=tika-app-0.9-fix-TIKA709.jar
# Or apply the patch by yourself and rebuild Tika from source, see patch here:
# https://issues.apache.org/jira/browse/TIKA-709
# Tika server should be started with command 'java -jar tika-app-0.9.jar -p PORT -t -eutf-8'
#tika_server = host:port
# Example: tika_server = 127.0.0.1:8072
# This lists MIME types that can be processed by Tika
# You may change it if your Tika is newer than 0.9 and supports more formats
# (note) *+xml examples: xhtml+xml, rss+xml, atom+xml, docbook+xml, rdf+xml
tika_mime_types =
text/*
application/*+xml
application/xml
application/vnd.oasis.opendocument.*
application/vnd.openxmlformats
application/vnd.ms-*
application/msaccess
application/msword
application/pdf
application/rtf
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------
[options] [options]
@ -358,7 +384,7 @@ svn_ignore_mimetype = 0
# directory ViewVC should consult for various things, including cached # directory ViewVC should consult for various things, including cached
# remote authentication credentials. If unset, Subversion will use # remote authentication credentials. If unset, Subversion will use
# the default location(s) ($HOME/.subversion, etc.) # the default location(s) ($HOME/.subversion, etc.)
svn_config_dir = svn_config_dir =
# use the rcsparse Python module to retrieve CVS repository # use the rcsparse Python module to retrieve CVS repository
# information instead of invoking rcs utilities [EXPERIMENTAL] # information instead of invoking rcs utilities [EXPERIMENTAL]
@ -494,12 +520,18 @@ short_log_len = 80
# should we colorize known file content syntaxes? (requires Pygments module) # should we colorize known file content syntaxes? (requires Pygments module)
enable_syntax_coloration = 1 enable_syntax_coloration = 1
# detect_encoding: Should we attempt to detect versioned file
# character encodings? [Requires 'chardet' module]
# Used in file list, file content display and indexing
# See also options.encodings for naive guessing.
detect_encoding = 1
# Use CvsGraph. See http://www.akhphd.au.dk/~bertho/cvsgraph/ for # Use CvsGraph. See http://www.akhphd.au.dk/~bertho/cvsgraph/ for
# documentation and download. # documentation and download.
use_cvsgraph = 0 use_cvsgraph = 0
#use_cvsgraph = 1 #use_cvsgraph = 1
# Location of the customized cvsgraph configuration file. # Location of the customized cvsgraph configuration file.
cvsgraph_conf = cvsgraph.conf cvsgraph_conf = cvsgraph.conf
# #
@ -544,6 +576,17 @@ use_pagesize = 0
# Set to 0 to disable the limit. # Set to 0 to disable the limit.
limit_changes = 100 limit_changes = 100
# You can also use primitive charset guessing instead of chardet (options.detect_encoding)
# Just set this to the list of possible charsets in your repository.
# ViewVC will simply try to decode content using each of them, and pick
# the first which succeeds. UTF-8 is always tried automatically.
#encodings = cp1251:iso-8859-1
# Sadly this is also required - for back-links from query results to files
# in CVS, because it doesn't recode file names to UTF-8 as Subversion does.
# Just set to cp1251 if you work with your CVS from Windowz.
#cvs_ondisk_charset = cp1251
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------
[templates] [templates]
@ -554,7 +597,7 @@ limit_changes = 100
# use a different template for a particular view, simply uncomment the # use a different template for a particular view, simply uncomment the
# appropriate option below and specify the currect location of the EZT # appropriate option below and specify the currect location of the EZT
# template file you wish to use for that view. # template file you wish to use for that view.
# #
# Templates are specified relative to the configured template # Templates are specified relative to the configured template
# directory (see the "template_dir" option), but absolute paths may # directory (see the "template_dir" option), but absolute paths may
# also be used as well. # also be used as well.
@ -569,13 +612,13 @@ limit_changes = 100
#diff = diff.ezt #diff = diff.ezt
#directory = directory.ezt #directory = directory.ezt
### an alternative directory view ### an alternative directory view
#directory = dir_new.ezt #directory = dir_new.ezt
#error = error.ezt #error = error.ezt
#file = file.ezt #file = file.ezt
#graph = graph.ezt #graph = graph.ezt
#log = log.ezt #log = log.ezt
### a table-based alternative log view ### a table-based alternative log view
#log = log_table.ezt #log = log_table.ezt
#query = query.ezt #query = query.ezt
#query_form = query_form.ezt #query_form = query_form.ezt
#query_results = query_results.ezt #query_results = query_results.ezt
@ -588,22 +631,51 @@ limit_changes = 100
# Set to 1 to enable the database integration feature, 0 otherwise. # Set to 1 to enable the database integration feature, 0 otherwise.
enabled = 0 enabled = 0
# Database hostname and port. # Set to 1 to enable indexing of file contents using Sphinx and Tika
index_content = 0
# Database hostname, port, and socket
#host = localhost #host = localhost
#port = 3306 #port = 3306
# On Debian Linux, enable this:
#socket = /var/run/mysqld/mysqld.sock
# ViewVC database name. # ViewVC database name.
#database_name = ViewVC #database_name = ViewVC
# Username and password of user with read/write privileges to the ViewVC # Username and password of user with read/write privileges to the ViewVC
# database. # database.
#user = #user =
#passwd = #passwd =
# Username and password of user with read privileges to the ViewVC # Username and password of user with read privileges to the ViewVC
# database. # database.
#readonly_user = #readonly_user =
#readonly_passwd = #readonly_passwd =
# ViewVC can use Sphinx (http://sphinxsearch.com) full-text search engine
# to index file contents with full history and then search over them.
# Also, Apache Tika console application can be used in TCP server mode to
# add support for indexing binary documents (M$Word, PDF and etc).
# See tika_server in [utilities].
# Requires Sphinx >= 0.9.9 with a real-time updatable SphinxQL index.
# Index must be created in sphinx.conf by hand and have the following fields:
# rt_field = content
# rt_attr_string = content
# rt_attr_string = mimetype
# rt_attr_timestamp = ci_when
# rt_attr_uint = whoid
# rt_attr_uint = repositoryid
# rt_attr_uint = dirid
# rt_attr_uint = fileid
# rt_attr_uint = revision
# rt_attr_uint = branchid
# Sphinx connection parameters:
#sphinx_host =
#sphinx_port =
#sphinx_socket = /var/run/sphinxql.sock
#sphinx_index = viewvc
# Limit the number of rows returned by a given query to this number. # Limit the number of rows returned by a given query to this number.
#row_limit = 1000 #row_limit = 1000
@ -616,7 +688,7 @@ enabled = 0
# Check if the repository is found in the database before showing # Check if the repository is found in the database before showing
# the query link and RSS feeds. Set to 1 to enable check. # the query link and RSS feeds. Set to 1 to enable check.
# #
# WARNING: Enabling this check adds the cost of a database connection # WARNING: Enabling this check adds the cost of a database connection
# and query to most ViewVC requests. If all your roots are represented # and query to most ViewVC requests. If all your roots are represented
# in the commits database, or if you don't care about the creation of # in the commits database, or if you don't care about the creation of
@ -640,7 +712,7 @@ enabled = 0
# #
# ViewVC allows you to customize its configuration options for # ViewVC allows you to customize its configuration options for
# individual virtual hosts. You might, for example, wish to expose # individual virtual hosts. You might, for example, wish to expose
# all of your Subversion repositories at http://svn.yourdomain.com/viewvc/ # all of your Subversion repositories at http://svn.yourdomain.com/viewvc/
# and all your CVS ones at http://cvs.yourdomain.com/viewvc/, with no # and all your CVS ones at http://cvs.yourdomain.com/viewvc/, with no
# cross-exposure. Using ViewVC's virtual host (vhost) configuration # cross-exposure. Using ViewVC's virtual host (vhost) configuration
# support, you can do this. Simply create two vhost configurations # support, you can do this. Simply create two vhost configurations
@ -671,7 +743,7 @@ enabled = 0
# gui = guiproject.yourdomain.* # gui = guiproject.yourdomain.*
# #
# [vhost-libs/general] # [vhost-libs/general]
# cvs_roots = # cvs_roots =
# svn_roots = svnroot: /var/svn/libs-repos # svn_roots = svnroot: /var/svn/libs-repos
# default_root = svnroot # default_root = svnroot
# #
@ -680,7 +752,7 @@ enabled = 0
# #
# [vhost-gui/general] # [vhost-gui/general]
# cvs_roots = cvsroot: /var/cvs/guiproject # cvs_roots = cvsroot: /var/cvs/guiproject
# svn_roots = # svn_roots =
# default_root = cvsroot # default_root = cvsroot
# #
@ -697,7 +769,7 @@ enabled = 0
# #
# Here is an example showing how to enable Subversion authz-based # Here is an example showing how to enable Subversion authz-based
# authorization for only the single root named "svnroot": # authorization for only the single root named "svnroot":
# #
# [root-svnroot/options] # [root-svnroot/options]
# authorizer = svnauthz # authorizer = svnauthz
# #
@ -726,7 +798,7 @@ enabled = 0
# #
# Tests are case-sensitive. # Tests are case-sensitive.
# #
# NOTE: Again, this is for the hiding of modules within repositories, *not* # NOTE: Again, this is for the hiding of modules within repositories, *not*
# for the hiding of repositories (roots) themselves. # for the hiding of repositories (roots) themselves.
# #
# Some examples: # Some examples:
@ -749,7 +821,7 @@ enabled = 0
# Allow "xml", forbid other modules starting with "x", and allow the rest: # Allow "xml", forbid other modules starting with "x", and allow the rest:
# forbidden = !xml, x*, !* # forbidden = !xml, x*, !*
# #
forbidden = forbidden =
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------
[authz-forbiddenre] [authz-forbiddenre]
@ -792,7 +864,7 @@ forbidden =
# Only allow visibility of HTML files and the directories that hold them: # Only allow visibility of HTML files and the directories that hold them:
# forbiddenre = !^([^/]+|.*(/|\.html))$ # forbiddenre = !^([^/]+|.*(/|\.html))$
# #
forbiddenre = forbiddenre =
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------
[authz-svnauthz] [authz-svnauthz]

View File

@ -24,6 +24,7 @@ import vclib.ccvs
import vclib.svn import vclib.svn
import cvsdb import cvsdb
import viewvc import viewvc
from viewvcmagic import ContentMagic
######################################################################### #########################################################################
# #
@ -47,6 +48,7 @@ class Config:
'root_parents', 'allowed_views', 'mime_types_files') 'root_parents', 'allowed_views', 'mime_types_files')
def __init__(self): def __init__(self):
self.__guesser = None
for section in self._sections: for section in self._sections:
setattr(self, section, _sub_config()) setattr(self, section, _sub_config())
@ -66,7 +68,6 @@ class Config:
if rootname: if rootname:
self._process_root_options(self.parser, rootname) self._process_root_options(self.parser, rootname)
self.expand_root_parents() self.expand_root_parents()
cvsdb.setencs(self.options.encodings.split(':'))
r = {} r = {}
for i in self.rewritehtml.__dict__.keys(): for i in self.rewritehtml.__dict__.keys():
if i[-8:] == '.replace': if i[-8:] == '.replace':
@ -201,7 +202,7 @@ class Config:
pass pass
else: else:
raise IllegalOverrideSection('root', section) raise IllegalOverrideSection('root', section)
def overlay_root_options(self, rootname): def overlay_root_options(self, rootname):
"Overly per-root options atop the existing option set." "Overly per-root options atop the existing option set."
if not self.conf_path: if not self.conf_path:
@ -217,7 +218,7 @@ class Config:
for option in parser.options(section): for option in parser.options(section):
d[option] = parser.get(section, option) d[option] = parser.get(section, option)
return d.items() return d.items()
def get_authorizer_params(self, authorizer, rootname=None): def get_authorizer_params(self, authorizer, rootname=None):
if not self.conf_path: if not self.conf_path:
return {} return {}
@ -236,7 +237,12 @@ class Config:
params[key] = value params[key] = value
params['__config'] = self params['__config'] = self
return params return params
def guesser(self):
if not self.__guesser:
self.__guesser = ContentMagic(self.options.encodings)
return self.__guesser
def set_defaults(self): def set_defaults(self):
"Set some default values in the configuration." "Set some default values in the configuration."
@ -258,6 +264,8 @@ class Config:
self.utilities.svn = '' self.utilities.svn = ''
self.utilities.diff = '' self.utilities.diff = ''
self.utilities.cvsgraph = '' self.utilities.cvsgraph = ''
self.utilities.tika_server = ''
self.utilities.tika_mime_types = ''
self.options.root_as_url_component = 1 self.options.root_as_url_component = 1
self.options.checkout_magic = 0 self.options.checkout_magic = 0
@ -302,7 +310,7 @@ class Config:
self.options.limit_changes = 100 self.options.limit_changes = 100
self.options.cvs_ondisk_charset = 'cp1251' self.options.cvs_ondisk_charset = 'cp1251'
self.options.binary_mime_re = '^(?!text/|.*\Wxml)' self.options.binary_mime_re = '^(?!text/|.*\Wxml)'
self.options.encodings = 'utf-8:cp1251:iso-8859-1' self.options.encodings = 'cp1251:iso-8859-1'
self.templates.diff = None self.templates.diff = None
self.templates.directory = None self.templates.directory = None
@ -316,6 +324,7 @@ class Config:
self.templates.roots = None self.templates.roots = None
self.cvsdb.enabled = 0 self.cvsdb.enabled = 0
self.cvsdb.index_content = 0
self.cvsdb.host = '' self.cvsdb.host = ''
self.cvsdb.port = 3306 self.cvsdb.port = 3306
self.cvsdb.socket = '' self.cvsdb.socket = ''
@ -323,12 +332,17 @@ class Config:
self.cvsdb.user = '' self.cvsdb.user = ''
self.cvsdb.passwd = '' self.cvsdb.passwd = ''
self.cvsdb.readonly_user = '' self.cvsdb.readonly_user = ''
self.cvsdb.readonly_passwd = '' self.cvsdb.readonly_passwd = ''
self.cvsdb.row_limit = 1000 self.cvsdb.row_limit = 1000
self.cvsdb.rss_row_limit = 100 self.cvsdb.rss_row_limit = 100
self.cvsdb.check_database_for_root = 0 self.cvsdb.check_database_for_root = 0
self.cvsdb.fulltext_min_relevance = 0.2 self.cvsdb.fulltext_min_relevance = 0.2
self.cvsdb.sphinx_host = ''
self.cvsdb.sphinx_port = 3307
self.cvsdb.sphinx_socket = ''
self.cvsdb.sphinx_index = ''
def _startswith(somestr, substr): def _startswith(somestr, substr):
return somestr[:len(substr)] == substr return somestr[:len(substr)] == substr

View File

@ -15,6 +15,7 @@ import sys
import string import string
import time import time
import re import re
import cgi
import vclib import vclib
import dbi import dbi
@ -36,22 +37,12 @@ error = "cvsdb error"
## defined to actually be complete; it should run well off of any DBI 2.0 ## defined to actually be complete; it should run well off of any DBI 2.0
## complient database interface ## complient database interface
encs = [ "utf-8", "cp1251", "iso-8859-1" ]
def utf8string(value):
for e in encs:
try:
value = value.decode(e)
break
except: pass
return value.encode("utf-8")
def setencs(e):
global encs
encs = e
class CheckinDatabase: class CheckinDatabase:
def __init__(self, host, port, socket, user, passwd, database, row_limit, min_relevance, authorizer = None): def __init__(self, host, port, socket, user, passwd, database, row_limit, min_relevance, cfg,
authorizer = None, index_content = 0, sphinx_host = None, sphinx_port = None,
sphinx_socket = None, sphinx_index = None):
self.cfg = cfg
self._host = host self._host = host
self._port = port self._port = port
self._socket = socket self._socket = socket
@ -63,11 +54,21 @@ class CheckinDatabase:
self._min_relevance = min_relevance self._min_relevance = min_relevance
self.authorizer = authorizer self.authorizer = authorizer
# Sphinx settings
self.index_content = index_content
self.sphinx_host = sphinx_host
self.sphinx_port = sphinx_port
self.sphinx_socket = sphinx_socket
self.sphinx_index = sphinx_index
## database lookup caches ## database lookup caches
self._get_cache = {} self._get_cache = {}
self._get_id_cache = {} self._get_id_cache = {}
self._desc_id_cache = {} self._desc_id_cache = {}
# Sphinx connection None by default
self.sphinx = None
def Connect(self): def Connect(self):
self.db = dbi.connect( self.db = dbi.connect(
self._host, self._port, self._socket, self._user, self._passwd, self._database) self._host, self._port, self._socket, self._user, self._passwd, self._database)
@ -83,12 +84,17 @@ class CheckinDatabase:
else: else:
self._version = 0 self._version = 0
if self._version > CURRENT_SCHEMA_VERSION: if self._version > CURRENT_SCHEMA_VERSION:
raise DatabaseVersionError("Database version %d is newer than the " raise DatabaseVersionError("Database version %d is newer than the "
"last version supported by this " "last version supported by this "
"software." % (self._version)) "software." % (self._version))
if self.index_content:
self.sphinx = dbi.connect(self.sphinx_host, self.sphinx_port, self.sphinx_socket, '', '', '')
def utf8(self, value):
return self.cfg.guesser().utf8(value)
def sql_get_id(self, table, column, value, auto_set): def sql_get_id(self, table, column, value, auto_set):
value = utf8string(value) value = self.utf8(value)
sql = "SELECT id FROM %s WHERE %s=%%s" % (table, column) sql = "SELECT id FROM %s WHERE %s=%%s" % (table, column)
sql_args = (value, ) sql_args = (value, )
@ -172,7 +178,7 @@ class CheckinDatabase:
temp2[id] = value temp2[id] = value
return value return value
def get_list(self, table, field_index): def get_list(self, table, field_index):
sql = "SELECT * FROM %s" % (table) sql = "SELECT * FROM %s" % (table)
cursor = self.db.cursor() cursor = self.db.cursor()
@ -198,7 +204,7 @@ class CheckinDatabase:
break break
list.append(row[0]) list.append(row[0])
return list return list
def GetMetadataValue(self, name): def GetMetadataValue(self, name):
sql = "SELECT value FROM metadata WHERE name=%s" sql = "SELECT value FROM metadata WHERE name=%s"
sql_args = (name) sql_args = (name)
@ -209,7 +215,7 @@ class CheckinDatabase:
except TypeError: except TypeError:
return None return None
return value return value
def SetMetadataValue(self, name, value): def SetMetadataValue(self, name, value):
assert(self._version > 0) assert(self._version > 0)
sql = "REPLACE INTO metadata (name, value) VALUES (%s, %s)" sql = "REPLACE INTO metadata (name, value) VALUES (%s, %s)"
@ -222,7 +228,7 @@ class CheckinDatabase:
"\tname = %s\n" "\tname = %s\n"
"\tvalue = %s\n" "\tvalue = %s\n"
% (str(e), name, value)) % (str(e), name, value))
def GetBranchID(self, branch, auto_set = 1): def GetBranchID(self, branch, auto_set = 1):
return self.get_id("branches", "branch", branch, auto_set) return self.get_id("branches", "branch", branch, auto_set)
@ -240,13 +246,13 @@ class CheckinDatabase:
def GetFile(self, id): def GetFile(self, id):
return self.get("files", "file", id) return self.get("files", "file", id)
def GetAuthorID(self, author, auto_set = 1): def GetAuthorID(self, author, auto_set = 1):
return self.get_id("people", "who", author, auto_set) return self.get_id("people", "who", author, auto_set)
def GetAuthor(self, id): def GetAuthor(self, id):
return self.get("people", "who", id) return self.get("people", "who", id)
def GetRepositoryID(self, repository, auto_set = 1): def GetRepositoryID(self, repository, auto_set = 1):
return self.get_id("repositories", "repository", repository, auto_set) return self.get_id("repositories", "repository", repository, auto_set)
@ -257,7 +263,7 @@ class CheckinDatabase:
return self.get_list("repositories", repository) return self.get_list("repositories", repository)
def SQLGetDescriptionID(self, description, auto_set = 1): def SQLGetDescriptionID(self, description, auto_set = 1):
description = utf8string(description) description = self.utf8(description)
## lame string hash, blame Netscape -JMP ## lame string hash, blame Netscape -JMP
hash = len(description) hash = len(description)
@ -330,7 +336,7 @@ class CheckinDatabase:
ci_when = cursor.fetchone()[0] ci_when = cursor.fetchone()[0]
except TypeError: except TypeError:
return None return None
return dbi.TicksFromDateTime(ci_when) return dbi.TicksFromDateTime(ci_when)
def AddCommitList(self, commit_list): def AddCommitList(self, commit_list):
@ -338,48 +344,55 @@ class CheckinDatabase:
self.AddCommit(commit) self.AddCommit(commit)
def AddCommit(self, commit): def AddCommit(self, commit):
ci_when = dbi.DateTimeFromTicks(commit.GetTime() or 0.0) props = {
ci_type = commit.GetTypeString() 'type' : commit.GetTypeString(),
who_id = self.GetAuthorID(commit.GetAuthor()) 'ci_when' : dbi.DateTimeFromTicks(commit.GetTime() or 0.0),
repository_id = self.GetRepositoryID(commit.GetRepository()) 'whoid' : self.GetAuthorID(commit.GetAuthor()),
directory_id = self.GetDirectoryID(commit.GetDirectory()) 'repositoryid' : self.GetRepositoryID(commit.GetRepository()),
file_id = self.GetFileID(commit.GetFile()) 'dirid' : self.GetDirectoryID(commit.GetDirectory()),
revision = commit.GetRevision() 'fileid' : self.GetFileID(commit.GetFile()),
sticky_tag = "NULL" 'revision' : commit.GetRevision(),
branch_id = self.GetBranchID(commit.GetBranch()) 'branchid' : self.GetBranchID(commit.GetBranch()),
plus_count = commit.GetPlusCount() or '0' 'addedlines' : commit.GetPlusCount() or '0',
minus_count = commit.GetMinusCount() or '0' 'removedlines' : commit.GetMinusCount() or '0',
description_id = self.GetDescriptionID(commit.GetDescription()) 'descid' : self.GetDescriptionID(commit.GetDescription()),
}
commits_table = self._version >= 1 and 'commits' or 'checkins' commits_table = self._version >= 1 and 'commits' or 'checkins'
sql = "REPLACE INTO %s" % (commits_table)
sql = sql + \
" (type,ci_when,whoid,repositoryid,dirid,fileid,revision,"\
" stickytag,branchid,addedlines,removedlines,descid)"\
"VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
sql_args = (ci_type, ci_when, who_id, repository_id,
directory_id, file_id, revision, sticky_tag, branch_id,
plus_count, minus_count, description_id)
cursor = self.db.cursor() cursor = self.db.cursor()
try: try:
cursor.execute(sql, sql_args) # MySQL-specific INSERT-or-UPDATE with ID retrieval
cursor.execute(
'INSERT INTO '+commits_table+'('+','.join(i for i in props)+') VALUES ('+
', '.join('%s' for i in props)+') ON DUPLICATE KEY UPDATE id=LAST_INSERT_ID(id), '+
', '.join(i+'=VALUES('+i+')' for i in props),
tuple(props[i] for i in props)
)
commit_id = cursor.lastrowid
if self.index_content:
sphcur = self.sphinx.cursor()
content = commit.GetContent()
props['ci_when'] = str(int(commit.GetTime() or 0))
if len(content):
props['content'] = content
# Now, stored MIME type is only needed while searching
# It is guessed again when the file is displayed
props['mimetype'] = commit.GetMimeType()
props['id'] = str(commit_id)
del props['addedlines']
del props['removedlines']
del props['descid']
del props['type']
sphcur.execute(
'INSERT INTO '+self.sphinx_index+'('+','.join(i for i in props)+') VALUES ('+
','.join('%s' for i in props)+')',
tuple(props[i] for i in props)
)
except Exception, e: except Exception, e:
raise Exception("Error adding commit: '%s'\n" print ("Error adding commit: '"+str(e)+"'\nValues were:\n"+
"Values were:\n" "\n".join(i+'='+str(props[i]) for i in props))
"\ttype = %s\n" raise
"\tci_when = %s\n"
"\twhoid = %s\n"
"\trepositoryid = %s\n"
"\tdirid = %s\n"
"\tfileid = %s\n"
"\trevision = %s\n"
"\tstickytag = %s\n"
"\tbranchid = %s\n"
"\taddedlines = %s\n"
"\tremovedlines = %s\n"
"\tdescid = %s\n"
% ((str(e), ) + sql_args))
def SQLQueryListString(self, field, query_entry_list): def SQLQueryListString(self, field, query_entry_list):
sqlList = [] sqlList = []
@ -414,6 +427,67 @@ class CheckinDatabase:
return "(%s)" % (string.join(sqlList, " OR ")) return "(%s)" % (string.join(sqlList, " OR "))
def query_ids(self, in_field, table, id_field, name_field, lst):
if not len(lst):
return None
cond = self.SQLQueryListString(name_field, lst)
cursor = self.db.cursor()
cursor.execute('SELECT %s FROM %s WHERE %s' % (id_field, table, cond))
ids = list(str(row[0]) for row in cursor)
if not len(ids):
return None
return "%s IN (%s)" % (in_field, ','.join(ids))
def CreateSphinxQueryString(self, query):
condList = [
'MATCH(%s)' % (self.db.literal(query.content_query), ),
self.query_ids('repositoryid', 'repositories', 'id', 'repository', query.repository_list),
self.query_ids('branchid', 'branches', 'id', 'branch', query.branch_list),
self.query_ids('dirid', 'dirs', 'id', 'dir', query.directory_list),
self.query_ids('fileid', 'files', 'id', 'file', query.file_list),
self.query_ids('authorid', 'people', 'id', 'who', query.author_list),
self.query_ids('descid', 'descs', 'id', 'description', query.comment_list),
]
if len(query.revision_list):
condList.append("revision IN ("+','.join(self.db.literal(s) for s in query.revision_list)+")")
if query.from_date:
condList.append('ci_when>='+str(dbi.TicksFromDateTime(query.from_date)))
if query.to_date:
condList.append('ci_when<='+str(dbi.TicksFromDateTime(query.to_date)))
if query.sort == 'date':
order_by = 'ORDER BY `ci_when` DESC, `relevance` DESC'
elif query.sort == 'date_rev':
order_by = 'ORDER BY `ci_when` ASC, `relevance` DESC'
else: # /* if query.sort == 'relevance' */
order_by = 'ORDER BY `relevance` DESC'
conditions = string.join((i for i in condList if i), " AND ")
conditions = conditions and "WHERE %s" % conditions
## limit the number of rows requested or we could really slam
## a server with a large database
limit = ""
if query.limit:
limit = "LIMIT %s" % (str(query.limit))
elif self._row_limit:
limit = "LIMIT %s" % (str(self._row_limit))
fields = "id `id`, WEIGHT() `relevance`, `content`, `mimetype`"
return "SELECT %s FROM %s %s %s %s" % (fields, self.sphinx_index, conditions, order_by, limit)
# Get commits by their IDs
def CreateIdQueryString(self, ids):
commits_table = self._version >= 1 and 'commits' or 'checkins'
return (
'SELECT %s.*, repositories.repository AS repository_name, dirs.dir AS dir_name, files.file AS file_name'
' FROM %s, repositories, dirs, files'
' WHERE %s.id IN (%s) AND repositoryid=repositories.id'
' AND dirid=dirs.id AND fileid=files.id' % (commits_table, commits_table, commits_table, ','.join(ids))
)
def CreateSQLQueryString(self, query): def CreateSQLQueryString(self, query):
commits_table = self._version >= 1 and 'commits' or 'checkins' commits_table = self._version >= 1 and 'commits' or 'checkins'
fields = [ fields = [
@ -427,7 +501,7 @@ class CheckinDatabase:
("dirs", "(%s.dirid=dirs.id)" % (commits_table)), ("dirs", "(%s.dirid=dirs.id)" % (commits_table)),
("files", "(%s.fileid=files.id)" % (commits_table))] ("files", "(%s.fileid=files.id)" % (commits_table))]
condList = [] condList = []
if len(query.text_query): if len(query.text_query):
tableList.append(("descs", "(descs.id=%s.descid)" % (commits_table))) tableList.append(("descs", "(descs.id=%s.descid)" % (commits_table)))
temp = "MATCH (descs.description) AGAINST (%s" % (self.db.literal(query.text_query)) temp = "MATCH (descs.description) AGAINST (%s" % (self.db.literal(query.text_query))
@ -435,6 +509,7 @@ class CheckinDatabase:
fields.append("%s) AS relevance" % temp) fields.append("%s) AS relevance" % temp)
else: else:
fields.append("'' AS relevance") fields.append("'' AS relevance")
fields.append("'' AS snippet")
if len(query.repository_list): if len(query.repository_list):
temp = self.SQLQueryListString("repositories.repository", temp = self.SQLQueryListString("repositories.repository",
@ -478,16 +553,18 @@ class CheckinDatabase:
temp = "(%s.ci_when<=\"%s\")" % (commits_table, str(query.to_date)) temp = "(%s.ci_when<=\"%s\")" % (commits_table, str(query.to_date))
condList.append(temp) condList.append(temp)
if query.sort == "date": if query.sort == "relevance" and len(query.text_query):
order_by = "ORDER BY %s.ci_when DESC,descid,%s.repositoryid" % (commits_table, commits_table) order_by = "ORDER BY relevance DESC,%s.ci_when DESC,descid,%s.repositoryid" % (commits_table, commits_table)
elif query.sort == "date_rev":
order_by = "ORDER BY %s.ci_when ASC,descid,%s.repositoryid" % (commits_table, commits_table)
elif query.sort == "author": elif query.sort == "author":
tableList.append(("people", "(%s.whoid=people.id)" % (commits_table))) tableList.append(("people", "(%s.whoid=people.id)" % (commits_table)))
order_by = "ORDER BY people.who,descid,%s.repositoryid" % (commits_table) order_by = "ORDER BY people.who,descid,%s.repositoryid" % (commits_table)
elif query.sort == "file": elif query.sort == "file":
tableList.append(("files", "(%s.fileid=files.id)" % (commits_table))) tableList.append(("files", "(%s.fileid=files.id)" % (commits_table)))
order_by = "ORDER BY files.file,descid,%s.repositoryid" % (commits_table) order_by = "ORDER BY files.file,descid,%s.repositoryid" % (commits_table)
elif query.sort == "relevance" and len(query.text_query): else: # /* if query.sort == "date": */
order_by = "ORDER BY relevance DESC,%s.ci_when DESC,descid,%s.repositoryid" % (commits_table, commits_table) order_by = "ORDER BY %s.ci_when DESC,descid,%s.repositoryid" % (commits_table, commits_table)
## exclude duplicates from the table list, and split out join ## exclude duplicates from the table list, and split out join
## conditions from table names. In future, the join conditions ## conditions from table names. In future, the join conditions
@ -517,7 +594,7 @@ class CheckinDatabase:
fields, tables, conditions, order_by, limit) fields, tables, conditions, order_by, limit)
return sql return sql
def check_commit_access(self, repos, dir, file, rev): def check_commit_access(self, repos, dir, file, rev):
if self.authorizer: if self.authorizer:
rootname = repos.split('/') rootname = repos.split('/')
@ -528,19 +605,60 @@ class CheckinDatabase:
return True return True
def RunQuery(self, query): def RunQuery(self, query):
sql = self.CreateSQLQueryString(query) if len(query.content_query) and self.sphinx:
cursor = self.db.cursor() # Use Sphinx to search on document content
cursor.execute(sql) sql = self.CreateSphinxQueryString(query)
cursor = self.sphinx.cursor()
cursor.execute(sql)
sphinx_rows = list((str(docid), rel, content, mimetype) for docid, rel, content, mimetype in cursor)
if len(sphinx_rows):
# Fetch snippets
snippet_options = {
'around': 15,
'limit': 200,
'before_match': '<span style="color:red">',
'after_match': '</span>',
'chunk_separator': ' ... ',
}
preformatted_mime = 'text/(?!html|xml).*'
snippets = {}
bm_html = cgi.escape(snippet_options['before_match'])
am_html = cgi.escape(snippet_options['after_match'])
for docid, rel, content, mimetype in sphinx_rows:
cursor.execute(
'CALL SNIPPETS(%s, %s, %s'+''.join(', %s AS '+i for i in snippet_options)+')',
(content, self.sphinx_index, query.content_query) + tuple(snippet_options.values())
)
s, = cursor.fetchone()
s = cgi.escape(s)
if re.match(preformatted_mime, mimetype):
s = s.replace('\n', '<br />')
s = s.replace(bm_html, snippet_options['before_match'])
s = s.replace(am_html, snippet_options['after_match'])
snippets[docid] = s
# Fetch all fields from MySQL
sql = self.CreateIdQueryString((docid for (docid, _, _, _) in sphinx_rows))
cursor = self.db.cursor()
cursor.execute(sql)
byid = {}
for row in cursor:
byid[str(row[0])] = row
rows = list(byid[docid] + (rel, snippets[docid]) for (docid, rel, _, _) in sphinx_rows if docid in byid)
else:
rows = []
else:
# Use regular queries when document content is not searched
sql = self.CreateSQLQueryString(query)
cursor = self.db.cursor()
cursor.execute(sql)
rows = list(cursor)
while 1: # Convert rows to commit objects
row = cursor.fetchone() for row in rows:
if not row: (dbId, dbType, dbCI_When, dbAuthorID, dbRepositoryID, dbDirID,
break
(dbType, dbCI_When, dbAuthorID, dbRepositoryID, dbDirID,
dbFileID, dbRevision, dbStickyTag, dbBranchID, dbAddedLines, dbFileID, dbRevision, dbStickyTag, dbBranchID, dbAddedLines,
dbRemovedLines, dbDescID, dbRepositoryName, dbDirName, dbRemovedLines, dbDescID, dbRepositoryName, dbDirName,
dbFileName, dbRelevance) = row dbFileName, dbRelevance, dbSnippet) = row
if not self.check_commit_access(dbRepositoryName, dbDirName, dbFileName, dbRevision): if not self.check_commit_access(dbRepositoryName, dbDirName, dbFileName, dbRevision):
continue continue
@ -564,6 +682,7 @@ class CheckinDatabase:
commit.SetMinusCount(dbRemovedLines) commit.SetMinusCount(dbRemovedLines)
commit.SetDescriptionID(dbDescID) commit.SetDescriptionID(dbDescID)
commit.SetRelevance(dbRelevance) commit.SetRelevance(dbRelevance)
commit.SetSnippet(dbSnippet)
query.AddCommit(commit) query.AddCommit(commit)
@ -623,46 +742,21 @@ class CheckinDatabase:
raise UnknownRepositoryError("Unknown repository '%s'" raise UnknownRepositoryError("Unknown repository '%s'"
% (repository)) % (repository))
if (self._version >= 1): checkins_table = self._version >= 1 and 'commits' or 'checkins'
self.sql_delete('repositories', 'id', rep_id) self.sql_delete('repositories', 'id', rep_id)
self.sql_purge('commits', 'repositoryid', 'id', 'repositories') self.sql_purge(checkins_table, 'repositoryid', 'id', 'repositories')
self.sql_purge('files', 'id', 'fileid', 'commits') self.sql_purge('files', 'id', 'fileid', checkins_table)
self.sql_purge('dirs', 'id', 'dirid', 'commits') self.sql_purge('dirs', 'id', 'dirid', checkins_table)
self.sql_purge('branches', 'id', 'branchid', 'commits') self.sql_purge('branches', 'id', 'branchid', checkins_table)
self.sql_purge('descs', 'id', 'descid', 'commits') self.sql_purge('descs', 'id', 'descid', checkins_table)
self.sql_purge('people', 'id', 'whoid', 'commits') self.sql_purge('people', 'id', 'whoid', checkins_table)
else:
sql = "SELECT * FROM checkins WHERE repositoryid=%s"
sql_args = (rep_id, )
cursor = self.db.cursor()
cursor.execute(sql, sql_args)
checkins = []
while 1:
try:
(ci_type, ci_when, who_id, repository_id,
dir_id, file_id, revision, sticky_tag, branch_id,
plus_count, minus_count, description_id) = \
cursor.fetchone()
except TypeError:
break
checkins.append([file_id, dir_id, branch_id,
description_id, who_id])
#self.sql_delete('repositories', 'id', rep_id)
self.sql_delete('checkins', 'repositoryid', rep_id)
for checkin in checkins:
self.sql_delete('files', 'id', checkin[0], 'fileid')
self.sql_delete('dirs', 'id', checkin[1], 'dirid')
self.sql_delete('branches', 'id', checkin[2], 'branchid')
self.sql_delete('descs', 'id', checkin[3], 'descid')
self.sql_delete('people', 'id', checkin[4], 'whoid')
# Reset all internal id caches. We could be choosier here, # Reset all internal id caches. We could be choosier here,
# but let's just be as safe as possible. # but let's just be as safe as possible.
self._get_cache = {} self._get_cache = {}
self._get_id_cache = {} self._get_id_cache = {}
self._desc_id_cache = {} self._desc_id_cache = {}
class DatabaseVersionError(Exception): class DatabaseVersionError(Exception):
pass pass
@ -678,7 +772,7 @@ class Commit:
CHANGE = 0 CHANGE = 0
ADD = 1 ADD = 1
REMOVE = 2 REMOVE = 2
def __init__(self): def __init__(self):
self.__directory = '' self.__directory = ''
self.__file = '' self.__file = ''
@ -690,15 +784,20 @@ class Commit:
self.__minuscount = '' self.__minuscount = ''
self.__description = '' self.__description = ''
self.__relevance = '' self.__relevance = ''
self.__snippet = ''
self.__gmt_time = 0.0 self.__gmt_time = 0.0
self.__type = Commit.CHANGE self.__type = Commit.CHANGE
self.__content = ''
self.__mimetype = ''
self.__base_path = ''
self.__base_rev = ''
def SetRepository(self, repository): def SetRepository(self, repository):
self.__repository = repository self.__repository = repository
def GetRepository(self): def GetRepository(self):
return self.__repository return self.__repository
def SetDirectory(self, dir): def SetDirectory(self, dir):
self.__directory = dir self.__directory = dir
@ -710,7 +809,7 @@ class Commit:
def GetFile(self): def GetFile(self):
return self.__file return self.__file
def SetRevision(self, revision): def SetRevision(self, revision):
self.__revision = revision self.__revision = revision
@ -758,12 +857,19 @@ class Commit:
def GetDescription(self): def GetDescription(self):
return self.__description return self.__description
# Relevance and snippet are used when querying commit database
def SetRelevance(self, relevance): def SetRelevance(self, relevance):
self.__relevance = relevance self.__relevance = relevance
def GetRelevance(self): def GetRelevance(self):
return self.__relevance return self.__relevance
def SetSnippet(self, snippet):
self.__snippet = snippet
def GetSnippet(self):
return self.__snippet
def SetTypeChange(self): def SetTypeChange(self):
self.__type = Commit.CHANGE self.__type = Commit.CHANGE
@ -784,66 +890,80 @@ class Commit:
elif self.__type == Commit.REMOVE: elif self.__type == Commit.REMOVE:
return 'Remove' return 'Remove'
# File content (extracted text), optional, indexed with Sphinx
def SetContent(self, content):
self.__content = content
def GetContent(self):
return self.__content
# MIME type, optional, now only stored in Sphinx
def SetMimeType(self, mimetype):
self.__mimetype = mimetype
def GetMimeType(self):
return self.__mimetype
## LazyCommit overrides a few methods of Commit to only retrieve ## LazyCommit overrides a few methods of Commit to only retrieve
## it's properties as they are needed ## it's properties as they are needed
class LazyCommit(Commit): class LazyCommit(Commit):
def __init__(self, db): def __init__(self, db):
Commit.__init__(self) Commit.__init__(self)
self.__db = db self.__db = db
def SetFileID(self, dbFileID): def SetFileID(self, dbFileID):
self.__dbFileID = dbFileID self.__dbFileID = dbFileID
def GetFileID(self): def GetFileID(self):
return self.__dbFileID return self.__dbFileID
def GetFile(self): def GetFile(self):
return self.__db.GetFile(self.__dbFileID) return self.__db.GetFile(self.__dbFileID)
def SetDirectoryID(self, dbDirID): def SetDirectoryID(self, dbDirID):
self.__dbDirID = dbDirID self.__dbDirID = dbDirID
def GetDirectoryID(self): def GetDirectoryID(self):
return self.__dbDirID return self.__dbDirID
def GetDirectory(self): def GetDirectory(self):
return self.__db.GetDirectory(self.__dbDirID) return self.__db.GetDirectory(self.__dbDirID)
def SetRepositoryID(self, dbRepositoryID): def SetRepositoryID(self, dbRepositoryID):
self.__dbRepositoryID = dbRepositoryID self.__dbRepositoryID = dbRepositoryID
def GetRepositoryID(self): def GetRepositoryID(self):
return self.__dbRepositoryID return self.__dbRepositoryID
def GetRepository(self): def GetRepository(self):
return self.__db.GetRepository(self.__dbRepositoryID) return self.__db.GetRepository(self.__dbRepositoryID)
def SetAuthorID(self, dbAuthorID): def SetAuthorID(self, dbAuthorID):
self.__dbAuthorID = dbAuthorID self.__dbAuthorID = dbAuthorID
def GetAuthorID(self): def GetAuthorID(self):
return self.__dbAuthorID return self.__dbAuthorID
def GetAuthor(self): def GetAuthor(self):
return self.__db.GetAuthor(self.__dbAuthorID) return self.__db.GetAuthor(self.__dbAuthorID)
def SetBranchID(self, dbBranchID): def SetBranchID(self, dbBranchID):
self.__dbBranchID = dbBranchID self.__dbBranchID = dbBranchID
def GetBranchID(self): def GetBranchID(self):
return self.__dbBranchID return self.__dbBranchID
def GetBranch(self): def GetBranch(self):
return self.__db.GetBranch(self.__dbBranchID) return self.__db.GetBranch(self.__dbBranchID)
def SetDescriptionID(self, dbDescID): def SetDescriptionID(self, dbDescID):
self.__dbDescID = dbDescID self.__dbDescID = dbDescID
def GetDescriptionID(self): def GetDescriptionID(self):
return self.__dbDescID return self.__dbDescID
def GetDescription(self): def GetDescription(self):
return self.__db.GetDescription(self.__dbDescID) return self.__db.GetDescription(self.__dbDescID)
## QueryEntry holds data on one match-type in the SQL database ## QueryEntry holds data on one match-type in the SQL database
## match is: "exact", "like", or "regex" ## match is: "exact", "like", or "regex"
@ -858,8 +978,8 @@ class CheckinDatabaseQuery:
def __init__(self): def __init__(self):
## sorting ## sorting
self.sort = "date" self.sort = "date"
## repository to query ## repository, branch, etc to query
self.repository_list = [] self.repository_list = []
self.branch_list = [] self.branch_list = []
self.directory_list = [] self.directory_list = []
@ -867,7 +987,11 @@ class CheckinDatabaseQuery:
self.revision_list = [] self.revision_list = []
self.author_list = [] self.author_list = []
self.comment_list = [] self.comment_list = []
## text_query = Fulltext query on comments
## content_query = Fulltext query on content
self.text_query = "" self.text_query = ""
self.content_query = ""
## date range in DBI 2.0 timedate objects ## date range in DBI 2.0 timedate objects
self.from_date = None self.from_date = None
@ -886,6 +1010,9 @@ class CheckinDatabaseQuery:
def SetTextQuery(self, query): def SetTextQuery(self, query):
self.text_query = query self.text_query = query
def SetContentQuery(self, query):
self.content_query = query
def SetRepository(self, repository, match = "exact"): def SetRepository(self, repository, match = "exact"):
self.repository_list.append(QueryEntry(repository, match)) self.repository_list.append(QueryEntry(repository, match))
@ -921,7 +1048,7 @@ class CheckinDatabaseQuery:
def SetFromDateHoursAgo(self, hours_ago): def SetFromDateHoursAgo(self, hours_ago):
ticks = time.time() - (3600 * hours_ago) ticks = time.time() - (3600 * hours_ago)
self.from_date = dbi.DateTimeFromTicks(ticks) self.from_date = dbi.DateTimeFromTicks(ticks)
def SetFromDateDaysAgo(self, days_ago): def SetFromDateDaysAgo(self, days_ago):
ticks = time.time() - (86400 * days_ago) ticks = time.time() - (86400 * days_ago)
self.from_date = dbi.DateTimeFromTicks(ticks) self.from_date = dbi.DateTimeFromTicks(ticks)
@ -942,7 +1069,7 @@ class CheckinDatabaseQuery:
## ##
def CreateCommit(): def CreateCommit():
return Commit() return Commit()
def CreateCheckinQuery(): def CreateCheckinQuery():
return CheckinDatabaseQuery() return CheckinDatabaseQuery()
@ -953,9 +1080,23 @@ def ConnectDatabase(cfg, authorizer=None, readonly=0):
else: else:
user = cfg.cvsdb.user user = cfg.cvsdb.user
passwd = cfg.cvsdb.passwd passwd = cfg.cvsdb.passwd
db = CheckinDatabase(cfg.cvsdb.host, cfg.cvsdb.port, cfg.cvsdb.socket, user, passwd, db = CheckinDatabase(
cfg.cvsdb.database_name, cfg.cvsdb.row_limit, cfg.cvsdb.fulltext_min_relevance, host = cfg.cvsdb.host,
authorizer) port = cfg.cvsdb.port,
socket = cfg.cvsdb.socket,
user = user,
passwd = passwd,
database = cfg.cvsdb.database_name,
row_limit = cfg.cvsdb.row_limit,
min_relevance = cfg.cvsdb.fulltext_min_relevance,
authorizer = authorizer,
index_content = cfg.cvsdb.index_content,
sphinx_host = cfg.cvsdb.sphinx_host,
sphinx_port = int(cfg.cvsdb.sphinx_port),
sphinx_socket = cfg.cvsdb.sphinx_socket,
sphinx_index = cfg.cvsdb.sphinx_index,
cfg = cfg,
)
db.Connect() db.Connect()
return db return db

View File

@ -31,8 +31,8 @@ import popen
class BaseCVSRepository(vclib.Repository): class BaseCVSRepository(vclib.Repository):
def __init__(self, name, rootpath, authorizer, utilities): def __init__(self, name, rootpath, authorizer, utilities):
if not os.path.isdir(rootpath): if not os.path.isdir(rootpath):
raise vclib.ReposNotFound(name) raise vclib.ReposNotFound(name)
self.name = name self.name = name
self.rootpath = rootpath self.rootpath = rootpath
self.auth = authorizer self.auth = authorizer
@ -53,7 +53,7 @@ class BaseCVSRepository(vclib.Repository):
def authorizer(self): def authorizer(self):
return self.auth return self.auth
def itemtype(self, path_parts, rev): def itemtype(self, path_parts, rev):
basepath = self._getpath(path_parts) basepath = self._getpath(path_parts)
kind = None kind = None
@ -74,12 +74,12 @@ class BaseCVSRepository(vclib.Repository):
def itemprops(self, path_parts, rev): def itemprops(self, path_parts, rev):
self.itemtype(path_parts, rev) # does auth-check self.itemtype(path_parts, rev) # does auth-check
return {} # CVS doesn't support properties return {} # CVS doesn't support properties
def listdir(self, path_parts, rev, options): def listdir(self, path_parts, rev, options):
if self.itemtype(path_parts, rev) != vclib.DIR: # does auth-check if self.itemtype(path_parts, rev) != vclib.DIR: # does auth-check
raise vclib.Error("Path '%s' is not a directory." raise vclib.Error("Path '%s' is not a directory."
% (string.join(path_parts, "/"))) % (string.join(path_parts, "/")))
# Only RCS files (*,v) and subdirs are returned. # Only RCS files (*,v) and subdirs are returned.
data = [ ] data = [ ]
full_name = self._getpath(path_parts) full_name = self._getpath(path_parts)
@ -115,7 +115,7 @@ class BaseCVSRepository(vclib.Repository):
data.append(CVSDirEntry(name, kind, errors, 1)) data.append(CVSDirEntry(name, kind, errors, 1))
return data return data
def _getpath(self, path_parts): def _getpath(self, path_parts):
return apply(os.path.join, (self.rootpath,) + tuple(path_parts)) return apply(os.path.join, (self.rootpath,) + tuple(path_parts))
@ -177,7 +177,7 @@ class BinCVSRepository(BaseCVSRepository):
used_rlog = 0 used_rlog = 0
tip_rev = None # used only if we have to fallback to using rlog tip_rev = None # used only if we have to fallback to using rlog
fp = self.rcs_popen('co', (rev_flag, full_name), 'rb') fp = self.rcs_popen('co', (rev_flag, full_name), 'rb')
try: try:
filename, revision = _parse_co_header(fp) filename, revision = _parse_co_header(fp)
except COMissingRevision: except COMissingRevision:
@ -191,14 +191,14 @@ class BinCVSRepository(BaseCVSRepository):
used_rlog = 1 used_rlog = 1
if not tip_rev: if not tip_rev:
raise vclib.Error("Unable to find valid revision") raise vclib.Error("Unable to find valid revision")
fp = self.rcs_popen('co', ('-p' + tip_rev.string, full_name), 'rb') fp = self.rcs_popen('co', ('-p' + tip_rev.string, full_name), 'rb')
filename, revision = _parse_co_header(fp) filename, revision = _parse_co_header(fp)
if filename is None: if filename is None:
# CVSNT's co exits without any output if a dead revision is requested. # CVSNT's co exits without any output if a dead revision is requested.
# Bug at http://www.cvsnt.org/cgi-bin/bugzilla/show_bug.cgi?id=190 # Bug at http://www.cvsnt.org/cgi-bin/bugzilla/show_bug.cgi?id=190
# As a workaround, we invoke rlog to find the first non-dead revision # As a workaround, we invoke rlog to find the first non-dead revision
# that precedes it and check out that revision instead. Of course, # that precedes it and check out that revision instead. Of course,
# if we've already invoked rlog above, we just reuse its output. # if we've already invoked rlog above, we just reuse its output.
if not used_rlog: if not used_rlog:
tip_rev = self._get_tip_revision(full_name + ',v', rev) tip_rev = self._get_tip_revision(full_name + ',v', rev)
@ -207,7 +207,7 @@ class BinCVSRepository(BaseCVSRepository):
raise vclib.Error( raise vclib.Error(
'Could not find non-dead revision preceding "%s"' % rev) 'Could not find non-dead revision preceding "%s"' % rev)
fp = self.rcs_popen('co', ('-p' + tip_rev.undead.string, fp = self.rcs_popen('co', ('-p' + tip_rev.undead.string,
full_name), 'rb') full_name), 'rb')
filename, revision = _parse_co_header(fp) filename, revision = _parse_co_header(fp)
if filename is None: if filename is None:
@ -278,7 +278,7 @@ class BinCVSRepository(BaseCVSRepository):
if self.itemtype(path_parts, rev) != vclib.FILE: # does auth-check if self.itemtype(path_parts, rev) != vclib.FILE: # does auth-check
raise vclib.Error("Path '%s' is not a file." raise vclib.Error("Path '%s' is not a file."
% (string.join(path_parts, "/"))) % (string.join(path_parts, "/")))
# Invoke rlog # Invoke rlog
rcsfile = self.rcsfile(path_parts, 1) rcsfile = self.rcsfile(path_parts, 1)
if rev and options.get('cvs_pass_rev', 0): if rev and options.get('cvs_pass_rev', 0):
@ -341,7 +341,7 @@ class BinCVSRepository(BaseCVSRepository):
def revinfo(self, rev): def revinfo(self, rev):
raise vclib.UnsupportedFeature raise vclib.UnsupportedFeature
def rawdiff(self, path_parts1, rev1, path_parts2, rev2, type, options={}): def rawdiff(self, path_parts1, rev1, path_parts2, rev2, type, options={}):
"""see vclib.Repository.rawdiff docstring """see vclib.Repository.rawdiff docstring
@ -439,9 +439,9 @@ def _match_revs_tags(revlist, taglist):
example: if revision is 1.2.3.4, parent is 1.2 example: if revision is 1.2.3.4, parent is 1.2
"undead" "undead"
If the revision is dead, then this is a reference to the first If the revision is dead, then this is a reference to the first
previous revision which isn't dead, otherwise it's a reference previous revision which isn't dead, otherwise it's a reference
to itself. If all the previous revisions are dead it's None. to itself. If all the previous revisions are dead it's None.
"branch_number" "branch_number"
tuple representing branch number or empty tuple if on trunk tuple representing branch number or empty tuple if on trunk
@ -653,7 +653,7 @@ def _parse_co_header(fp):
pass pass
else: else:
break break
raise COMalformedOutput, "Unable to find revision in co output stream" raise COMalformedOutput, "Unable to find revision in co output stream"
# if your rlog doesn't use 77 '=' characters, then this must change # if your rlog doesn't use 77 '=' characters, then this must change
@ -674,7 +674,7 @@ _EOF_ERROR = 'error message found' # rlog issued an error
# ^rlog\: (.*)(?:\:\d+)?\: (.*)$ # ^rlog\: (.*)(?:\:\d+)?\: (.*)$
# #
# But for some reason the windows version of rlog omits the "rlog: " prefix # But for some reason the windows version of rlog omits the "rlog: " prefix
# for the first error message when the standard error stream has been # for the first error message when the standard error stream has been
# redirected to a file or pipe. (the prefix is present in subsequent errors # redirected to a file or pipe. (the prefix is present in subsequent errors
# and when rlog is run from the console). So the expression below is more # and when rlog is run from the console). So the expression below is more
# complicated # complicated
@ -703,7 +703,7 @@ def _parse_log_header(fp):
Returns: filename, default branch, tag dictionary, lock dictionary, Returns: filename, default branch, tag dictionary, lock dictionary,
rlog error message, and eof flag rlog error message, and eof flag
""" """
filename = head = branch = msg = "" filename = head = branch = msg = ""
taginfo = { } # tag name => number taginfo = { } # tag name => number
lockinfo = { } # revision => locker lockinfo = { } # revision => locker
@ -732,7 +732,7 @@ def _parse_log_header(fp):
else: else:
# oops. this line isn't lock info. stop parsing tags. # oops. this line isn't lock info. stop parsing tags.
state = 0 state = 0
if state == 0: if state == 0:
if line[:9] == 'RCS file:': if line[:9] == 'RCS file:':
filename = line[10:-1] filename = line[10:-1]
@ -902,7 +902,7 @@ def _file_log(revs, taginfo, lockinfo, cur_branch, filter):
except ValueError: except ValueError:
view_tag = None view_tag = None
else: else:
tags.append(view_tag) tags.append(view_tag)
# Match up tags and revisions # Match up tags and revisions
_match_revs_tags(revs, tags) _match_revs_tags(revs, tags)
@ -910,13 +910,13 @@ def _file_log(revs, taginfo, lockinfo, cur_branch, filter):
# Match up lockinfo and revision # Match up lockinfo and revision
for rev in revs: for rev in revs:
rev.lockinfo = lockinfo.get(rev.string) rev.lockinfo = lockinfo.get(rev.string)
# Add artificial ViewVC tag HEAD, which acts like a non-branch tag pointing # Add artificial ViewVC tag HEAD, which acts like a non-branch tag pointing
# at the latest revision on the MAIN branch. The HEAD revision doesn't have # at the latest revision on the MAIN branch. The HEAD revision doesn't have
# anything to do with the "head" revision number specified in the RCS file # anything to do with the "head" revision number specified in the RCS file
# and in rlog output. HEAD refers to the revision that the CVS and RCS co # and in rlog output. HEAD refers to the revision that the CVS and RCS co
# commands will check out by default, whereas the "head" field just refers # commands will check out by default, whereas the "head" field just refers
# to the highest revision on the trunk. # to the highest revision on the trunk.
taginfo['HEAD'] = _add_tag('HEAD', taginfo['MAIN'].co_rev) taginfo['HEAD'] = _add_tag('HEAD', taginfo['MAIN'].co_rev)
# Determine what revisions to return # Determine what revisions to return
@ -954,7 +954,7 @@ def _file_log(revs, taginfo, lockinfo, cur_branch, filter):
_remove_tag(view_tag) _remove_tag(view_tag)
else: else:
filtered_revs = revs filtered_revs = revs
return filtered_revs return filtered_revs
def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs): def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
@ -1004,7 +1004,7 @@ def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
= _parse_log_header(rlog) = _parse_log_header(rlog)
if eof == _EOF_LOG: if eof == _EOF_LOG:
# the rlog output ended early. this can happen on errors that rlog # the rlog output ended early. this can happen on errors that rlog
# thinks are so serious that it stops parsing the current file and # thinks are so serious that it stops parsing the current file and
# refuses to parse any of the files that come after it. one of the # refuses to parse any of the files that come after it. one of the
# errors that triggers this obnoxious behavior looks like: # errors that triggers this obnoxious behavior looks like:
@ -1052,8 +1052,8 @@ def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
tag = None tag = None
# we don't care about the specific values -- just the keys and whether # we don't care about the specific values -- just the keys and whether
# the values point to branches or revisions. this the fastest way to # the values point to branches or revisions. this the fastest way to
# merge the set of keys and keep values that allow us to make the # merge the set of keys and keep values that allow us to make the
# distinction between branch tags and normal tags # distinction between branch tags and normal tags
alltags.update(taginfo) alltags.update(taginfo)
@ -1098,7 +1098,7 @@ def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
file.dead = 0 file.dead = 0
#file.errors.append("No revisions exist on %s" % (view_tag or "MAIN")) #file.errors.append("No revisions exist on %s" % (view_tag or "MAIN"))
file.absent = 1 file.absent = 1
# done with this file now, skip the rest of this file's revisions # done with this file now, skip the rest of this file's revisions
if not eof: if not eof:
_skip_file(rlog) _skip_file(rlog)
@ -1211,7 +1211,7 @@ def _newest_file(dirpath):
newest_time = 0 newest_time = 0
### FIXME: This sucker is leaking unauthorized paths! ### ### FIXME: This sucker is leaking unauthorized paths! ###
for subfile in os.listdir(dirpath): for subfile in os.listdir(dirpath):
### filter CVS locks? stale NFS handles? ### filter CVS locks? stale NFS handles?
if subfile[-2:] != ',v': if subfile[-2:] != ',v':

View File

@ -1,4 +1,3 @@
#
# Copyright (C) 1999-2009 The ViewCVS Group. All Rights Reserved. # Copyright (C) 1999-2009 The ViewCVS Group. All Rights Reserved.
# #
# By using this file, you agree to the terms and conditions set forth in # By using this file, you agree to the terms and conditions set forth in
@ -68,7 +67,6 @@ docroot_magic_path = '*docroot*'
viewcvs_mime_type = 'text/vnd.viewcvs-markup' viewcvs_mime_type = 'text/vnd.viewcvs-markup'
alt_mime_type = 'text/x-cvsweb-markup' alt_mime_type = 'text/x-cvsweb-markup'
view_roots_magic = '*viewroots*' view_roots_magic = '*viewroots*'
magic_buf_size = 4096
default_mime_type = 'application/octet-stream' default_mime_type = 'application/octet-stream'
# Put here the variables we need in order to hold our state - they # Put here the variables we need in order to hold our state - they
@ -121,9 +119,8 @@ class Request:
# check for an authenticated username # check for an authenticated username
self.username = server.getenv('REMOTE_USER') self.username = server.getenv('REMOTE_USER')
# construct MIME magic # repository object cache
self.ms = None self.all_repos = {}
self.ms_fail = 0
# if we allow compressed output, see if the client does too # if we allow compressed output, see if the client does too
self.gzip_compress_level = 0 self.gzip_compress_level = 0
@ -134,6 +131,9 @@ class Request:
string.split(http_accept_encoding, ","))): string.split(http_accept_encoding, ","))):
self.gzip_compress_level = 9 # make this configurable? self.gzip_compress_level = 9 # make this configurable?
def utf8(self, value):
return self.cfg.guesser().utf8(value)
def create_repos(self, rootname): def create_repos(self, rootname):
if not rootname: if not rootname:
return None return None
@ -677,7 +677,7 @@ def _validate_mimetype(value):
return value in (viewcvs_mime_type, alt_mime_type, 'text/plain') return value in (viewcvs_mime_type, alt_mime_type, 'text/plain')
# obvious things here. note that we don't need uppercase for alpha. # obvious things here. note that we don't need uppercase for alpha.
_re_validate_alpha = re.compile('^[a-z]+$') _re_validate_alpha = re.compile('^[a-z_]+$')
_re_validate_number = re.compile('^[0-9]+$') _re_validate_number = re.compile('^[0-9]+$')
_re_validate_boolint = re.compile('^[01]$') _re_validate_boolint = re.compile('^[01]$')
@ -743,6 +743,7 @@ _legal_params = {
'who_match' : _re_validate_alpha, 'who_match' : _re_validate_alpha,
'comment' : None, 'comment' : None,
'comment_match' : _re_validate_alpha, 'comment_match' : _re_validate_alpha,
'search_content': None,
'querysort' : _re_validate_alpha, 'querysort' : _re_validate_alpha,
'date' : _re_validate_alpha, 'date' : _re_validate_alpha,
'hours' : _re_validate_number, 'hours' : _re_validate_number,
@ -988,7 +989,7 @@ def nav_path(request):
is_last = len(path_parts) == len(request.path_parts) is_last = len(path_parts) == len(request.path_parts)
if request.roottype == 'cvs': if request.roottype == 'cvs':
item = _item(name=cvsdb.utf8string(part), href=None) item = _item(name=request.utf8(part), href=None)
else: else:
item = _item(name=part, href=None) item = _item(name=part, href=None)
@ -1248,7 +1249,7 @@ def common_template_data(request, revision=None, mime_type=None):
cfg = request.cfg cfg = request.cfg
where = request.where where = request.where
if request.roottype == 'cvs': if request.roottype == 'cvs':
where = cvsdb.utf8string(where) where = request.utf8(where)
where = request.server.escape(where) where = request.server.escape(where)
# Initialize data dictionary members (sorted alphanumerically) # Initialize data dictionary members (sorted alphanumerically)
@ -1444,28 +1445,31 @@ def markup_stream_pygments(request, cfg, blame_data, fp, filename, mime_type):
get_lexer_for_mimetype, \ get_lexer_for_mimetype, \
get_lexer_for_filename get_lexer_for_filename
from pygments.lexers._mapping import LEXERS from pygments.lexers._mapping import LEXERS
# Hack for shell mime types:
LEXERS['BashLexer'] = ('pygments.lexers.other', 'Bash', ('bash', 'sh'), ('*.sh',), ('application/x-sh', 'application/x-shellscript', 'text/x-sh', 'text/x-shellscript')) LEXERS['BashLexer'] = ('pygments.lexers.other', 'Bash', ('bash', 'sh'), ('*.sh',), ('application/x-sh', 'application/x-shellscript', 'text/x-sh', 'text/x-shellscript'))
encoding = 'guess'
if cfg.options.detect_encoding:
try:
import chardet
encoding = 'chardet'
except (SyntaxError, ImportError):
pass
try: try:
lexer = get_lexer_for_mimetype(mime_type, lexer = get_lexer_for_mimetype(mime_type,
encoding=encoding, encoding='utf-8',
stripnl=False) stripnl=False)
except ClassNotFound: except ClassNotFound:
try: try:
lexer = get_lexer_for_filename(filename, lexer = get_lexer_for_filename(filename,
encoding=encoding, encoding='utf-8',
stripnl=False) stripnl=False)
except ClassNotFound: except ClassNotFound:
use_pygments = 0 use_pygments = 0
except ImportError: except ImportError:
use_pygments = 0 use_pygments = 0
# Detect encoding by calling chardet ourselves,
# to support it in non-highlighting mode
content = fp.read()
c, encoding = cfg.guesser().guess_charset(content)
if encoding:
content = c
else:
encoding = 'unknown'
# If we aren't going to be highlighting anything, just return the # If we aren't going to be highlighting anything, just return the
# BLAME_SOURCE. If there's no blame_source, we'll generate a fake # BLAME_SOURCE. If there's no blame_source, we'll generate a fake
# one from the file contents we fetch with PATH and REV. # one from the file contents we fetch with PATH and REV.
@ -1475,11 +1479,7 @@ def markup_stream_pygments(request, cfg, blame_data, fp, filename, mime_type):
else: else:
lines = [] lines = []
line_no = 0 line_no = 0
while 1: for line in content.split('\n'):
line = fp.readline()
if not line:
break
line = cvsdb.utf8string(line)
line_no = line_no + 1 line_no = line_no + 1
item = vclib.Annotation(cgi.escape(line), line_no, item = vclib.Annotation(cgi.escape(line), line_no,
None, None, None, None) None, None, None, None)
@ -1508,19 +1508,11 @@ def markup_stream_pygments(request, cfg, blame_data, fp, filename, mime_type):
self.blame_data.append(item) self.blame_data.append(item)
self.line_no = self.line_no + 1 self.line_no = self.line_no + 1
ps = PygmentsSink(blame_source) ps = PygmentsSink(blame_source)
fpd = fp.read() highlight(content, lexer,
try:
fpdat = unicode(fpd,'utf-8')
except:
try:
fpdat = unicode(fpd,'cp1251')
except:
fpdat = fpd
highlight(fpdat, lexer,
HtmlFormatter(nowrap=True, HtmlFormatter(nowrap=True,
classprefix='pygments-', classprefix='pygments-',
encoding='utf-8'), ps) encoding='utf-8'), ps)
return ps.blame_data return ps.blame_data, encoding
def make_time_string(date, cfg): def make_time_string(date, cfg):
"""Returns formatted date string in either local time or UTC. """Returns formatted date string in either local time or UTC.
@ -1594,6 +1586,7 @@ def calculate_mime_type(request, path_parts, rev):
return mime_type return mime_type
except: except:
pass pass
# FIXME rewrite to use viewvcmagic
return guess_mime(path_parts[-1]) return guess_mime(path_parts[-1])
def markup_or_annotate(request, is_annotate): def markup_or_annotate(request, is_annotate):
@ -1605,21 +1598,12 @@ def markup_or_annotate(request, is_annotate):
mime_type = calculate_mime_type(request, path, rev) mime_type = calculate_mime_type(request, path, rev)
if not mime_type or mime_type == default_mime_type: if not mime_type or mime_type == default_mime_type:
if request.ms is None and not request.ms_fail: try:
try: fp, revision = request.repos.openfile(path, rev)
import magic mime_type = request.cfg.guesser().guess_mime(None, None, fp)
request.ms = magic.open(magic.MAGIC_NONE | magic.MAGIC_MIME) fp.close()
request.ms.load() except:
except: raise
request.ms_fail = 1
if request.ms:
try:
fp, revision = request.repos.openfile(path, rev)
buffer = fp.read(magic_buf_size)
fp.close()
mime_type = request.ms.buffer(buffer)
except:
pass
# Is this a binary type? # Is this a binary type?
if is_binary(request.cfg, mime_type): if is_binary(request.cfg, mime_type):
@ -1657,9 +1641,10 @@ def markup_or_annotate(request, is_annotate):
if check_freshness(request, None, revision, weak=1): if check_freshness(request, None, revision, weak=1):
fp.close() fp.close()
return return
lines = markup_stream_pygments(request, cfg, blame_source, fp, lines, charset = markup_stream_pygments(request, cfg, blame_source, fp, path[-1], mime_type)
path[-1], mime_type)
fp.close() fp.close()
if mime_type.find(';') < 0:
mime_type = mime_type+'; charset='+charset
data = common_template_data(request, revision) data = common_template_data(request, revision)
data.merge(ezt.TemplateData({ data.merge(ezt.TemplateData({
@ -1910,7 +1895,7 @@ def view_directory(request):
row.short_log = format_log(file.log, cfg) row.short_log = format_log(file.log, cfg)
row.log = htmlify(file.log, cfg.options.mangle_email_addresses) row.log = htmlify(file.log, cfg.options.mangle_email_addresses)
row.lockinfo = file.lockinfo row.lockinfo = file.lockinfo
row.name = request.server.escape(cvsdb.utf8string(file.name)) row.name = request.server.escape(request.utf8(file.name))
row.anchor = row.name row.anchor = row.name
row.pathtype = (file.kind == vclib.FILE and 'file') or \ row.pathtype = (file.kind == vclib.FILE and 'file') or \
(file.kind == vclib.DIR and 'dir') (file.kind == vclib.DIR and 'dir')
@ -2285,7 +2270,7 @@ def view_log(request):
entry.ago = html_time(request, rev.date, 1) entry.ago = html_time(request, rev.date, 1)
entry.log = rev.log or "" entry.log = rev.log or ""
if cvs: if cvs:
entry.log = cvsdb.utf8string(entry.log) entry.log = request.utf8(entry.log)
entry.log = htmlify(entry.log, cfg.options.mangle_email_addresses) entry.log = htmlify(entry.log, cfg.options.mangle_email_addresses)
entry.size = rev.size entry.size = rev.size
entry.lockinfo = rev.lockinfo entry.lockinfo = rev.lockinfo
@ -2770,7 +2755,7 @@ class DiffSource:
self.save_line = None self.save_line = None
self.line_number = None self.line_number = None
self.prev_line_number = None self.prev_line_number = None
# keep track of where we are during an iteration # keep track of where we are during an iteration
self.idx = -1 self.idx = -1
self.last = None self.last = None
@ -2867,7 +2852,7 @@ class DiffSource:
diff_code = line[0] diff_code = line[0]
output = self._format_text(line[1:]) output = self._format_text(line[1:])
output = cvsdb.utf8string(output) output = self.cfg.guesser().utf8(output)
if diff_code == '+': if diff_code == '+':
if self.state == 'dump': if self.state == 'dump':
@ -3644,6 +3629,7 @@ def view_queryform(request):
'who_match' : request.query_dict.get('who_match', 'exact'), 'who_match' : request.query_dict.get('who_match', 'exact'),
'comment' : request.query_dict.get('comment', ''), 'comment' : request.query_dict.get('comment', ''),
'comment_match' : request.query_dict.get('comment_match', 'fulltext'), 'comment_match' : request.query_dict.get('comment_match', 'fulltext'),
'search_content' : request.query_dict.get('search_content', ''),
'querysort' : request.query_dict.get('querysort', 'date'), 'querysort' : request.query_dict.get('querysort', 'date'),
'date' : request.query_dict.get('date', 'hours'), 'date' : request.query_dict.get('date', 'hours'),
'hours' : request.query_dict.get('hours', '2'), 'hours' : request.query_dict.get('hours', '2'),
@ -3653,6 +3639,7 @@ def view_queryform(request):
'query_hidden_values' : query_hidden_values, 'query_hidden_values' : query_hidden_values,
'limit_changes' : limit_changes, 'limit_changes' : limit_changes,
'dir_href' : dir_href, 'dir_href' : dir_href,
'enable_search_content' : request.cfg.cvsdb.index_content,
})) }))
generate_page(request, "query_form", data) generate_page(request, "query_form", data)
@ -3791,7 +3778,8 @@ def build_commit(request, files, max_files, dir_strip, format):
plus_count = 0 plus_count = 0
minus_count = 0 minus_count = 0
found_unreadable = 0 found_unreadable = 0
all_repos = {} if not request.all_repos:
request.all_repos = {}
for f in files: for f in files:
dirname = f.GetDirectory() dirname = f.GetDirectory()
@ -3810,17 +3798,19 @@ def build_commit(request, files, max_files, dir_strip, format):
# Check path access (since the commits database logic bypasses the # Check path access (since the commits database logic bypasses the
# vclib layer and, thus, the vcauth stuff that layer uses). # vclib layer and, thus, the vcauth stuff that layer uses).
my_repos = all_repos.get(f.GetRepository(), '') my_repos = request.all_repos.get(f.GetRepository(), '')
if not my_repos: if not my_repos:
try: try:
my_repos = all_repos[f.GetRepository()] = request.create_repos(f.GetRepository()) my_repos = request.all_repos[f.GetRepository()] = request.create_repos(f.GetRepository())
except: except:
my_repos = None my_repos = None
if not my_repos: if not my_repos:
return None return None
if my_repos['roottype'] == 'cvs': if my_repos['roottype'] == 'cvs':
try: where = unicode(where,'utf-8') # we store UTF-8 in the DB
try: where = where.decode('utf-8')
except: pass except: pass
# FIXME maybe store "real" filesystem path in the DB instead of having such setting?
try: where = where.encode(cfg.options.cvs_ondisk_charset) try: where = where.encode(cfg.options.cvs_ondisk_charset)
except: pass except: pass
path_parts = _path_parts(where) path_parts = _path_parts(where)
@ -3907,24 +3897,27 @@ def build_commit(request, files, max_files, dir_strip, format):
if max_files and num_allowed > max_files: if max_files and num_allowed > max_files:
continue continue
commit_files.append(_item(date=commit_time, commit_files.append(_item(
dir=request.server.escape(dirname), date=commit_time,
file=request.server.escape(filename), dir=request.server.escape(dirname),
author=request.server.escape(f.GetAuthor()), file=request.server.escape(filename),
rev=rev, author=request.server.escape(f.GetAuthor()),
branch=f.GetBranch(), rev=rev,
plus=plus, branch=f.GetBranch(),
minus=minus, plus=plus,
type=change_type, minus=minus,
dir_href=dir_href, type=change_type,
log_href=log_href, snippet=f.GetSnippet(),
view_href=view_href, dir_href=dir_href,
download_href=download_href, log_href=log_href,
prefer_markup=prefer_markup, view_href=view_href,
diff_href=diff_href, download_href=download_href,
root=my_repos, prefer_markup=prefer_markup,
path=where, diff_href=diff_href,
path_prev=path_prev)) root=my_repos,
path=where,
path_prev=path_prev,
))
# No files survived authz checks? Let's just pretend this # No files survived authz checks? Let's just pretend this
# little commit didn't happen, shall we? # little commit didn't happen, shall we?
@ -4115,6 +4108,7 @@ def view_query(request):
who_match = request.query_dict.get('who_match', 'exact') who_match = request.query_dict.get('who_match', 'exact')
comment = request.query_dict.get('comment', '') comment = request.query_dict.get('comment', '')
comment_match = request.query_dict.get('comment_match', 'fulltext') comment_match = request.query_dict.get('comment_match', 'fulltext')
search_content = request.query_dict.get('search_content', '')
querysort = request.query_dict.get('querysort', 'date') querysort = request.query_dict.get('querysort', 'date')
date = request.query_dict.get('date', 'hours') date = request.query_dict.get('date', 'hours')
hours = request.query_dict.get('hours', '2') hours = request.query_dict.get('hours', '2')
@ -4126,7 +4120,7 @@ def view_query(request):
cfg.options.limit_changes)) cfg.options.limit_changes))
match_types = { 'exact':1, 'like':1, 'glob':1, 'regex':1, 'notregex':1 } match_types = { 'exact':1, 'like':1, 'glob':1, 'regex':1, 'notregex':1 }
sort_types = { 'date':1, 'author':1, 'file':1 } sort_types = { 'date':1, 'date_rev':1, 'author':1, 'file':1, 'relevance':1 }
date_types = { 'hours':1, 'day':1, 'week':1, 'month':1, date_types = { 'hours':1, 'day':1, 'week':1, 'month':1,
'all':1, 'explicit':1 } 'all':1, 'explicit':1 }
@ -4193,6 +4187,8 @@ def view_query(request):
query.SetComment(comment, comment_match) query.SetComment(comment, comment_match)
else: else:
query.SetTextQuery(comment) query.SetTextQuery(comment)
if search_content:
query.SetContentQuery(search_content)
query.SetSortMethod(querysort) query.SetSortMethod(querysort)
if date == 'hours': if date == 'hours':
query.SetFromDateHoursAgo(int(hours)) query.SetFromDateHoursAgo(int(hours))

70
lib/viewvcmagic.py Normal file
View File

@ -0,0 +1,70 @@
#!/usr/bin/python
import mimetypes
have_chardet = 0
try:
import chardet
have_chardet = 1
except: pass
class ContentMagic:
def __init__(self, encodings):
self.encodings = encodings.split(':')
self.mime_magic = None
self.errors = []
# Try to load magic
try:
import magic
self.mime_magic = magic.open(magic.MAGIC_MIME_TYPE)
self.mime_magic.load()
except Exception, e:
self.errors.append(e)
# returns MIME type
def guess_mime(self, mime, filename, tempfile):
if mime == 'application/octet-stream':
mime = ''
if not mime and filename:
mime = mimetypes.guess_type(filename)[0]
if not mime and tempfile and self.mime_magic:
if type(tempfile) == type(''):
mime = self.mime_magic.file(tempfile)
else:
c = tempfile.read(4096)
mime = self.mime_magic.buffer(c)
return mime
# returns (utf8_content, charset)
def guess_charset(self, content):
# Try to guess with chardet
charset = None
if have_chardet:
# Try chardet
try:
charset = chardet.detect(content)
if charset and charset['encoding']:
charset = charset['encoding']
content = content.decode(charset)
except: charset = None
else:
# Try UTF-8
charset = 'utf-8'
try: content = content.decode('utf-8')
except: charset = None
# Then try to guess primitively
if charset is None:
for charset in self.encodings:
try:
content = content.decode(charset)
break
except: charset = None
return (content, charset)
# guess and encode return value into UTF-8
def utf8(self, content):
(uni, charset) = self.guess_charset(content)
if charset:
return uni.encode('utf-8')
return content

View File

@ -144,7 +144,7 @@ Browse Directory</a></p>
<tr> <tr>
<th style="text-align:right;vertical-align:top;">Comment:</th> <th style="text-align:right;vertical-align:top;">Comment:</th>
<td> <td>
<input type="text" name="comment" value="[comment]" /><br /> <input type="text" name="comment" value="[comment]" size="40" /><br />
<label for="comment_match_exact"> <label for="comment_match_exact">
<input type="radio" name="comment_match" id="comment_match_fulltext" <input type="radio" name="comment_match" id="comment_match_fulltext"
value="fulltext" [is comment_match "fulltext"]checked=""[end] /> value="fulltext" [is comment_match "fulltext"]checked=""[end] />
@ -172,13 +172,21 @@ Browse Directory</a></p>
</label> </label>
</td> </td>
</tr> </tr>
[if-any enable_search_content]
<tr>
<th style="text-align:right;vertical-align:top;">Search content:</th>
<td><input type="text" name="search_content" value="[search_content]" size="60" /></td>
</tr>
[end]
<tr> <tr>
<th style="text-align:right;vertical-align:top;">Sort By:</th> <th style="text-align:right;vertical-align:top;">Sort By:</th>
<td> <td>
<select name="querysort"> <select name="querysort">
<option value="date" [is querysort "date"]selected="selected"[end]>Date</option> <option value="date" [is querysort "date"]selected="selected"[end]>Date</option>
<option value="date_rev" [is querysort "date_rev"]selected="selected"[end]>Date (oldest first)</option>
<option value="author" [is querysort "author"]selected="selected"[end]>Author</option> <option value="author" [is querysort "author"]selected="selected"[end]>Author</option>
<option value="file" [is querysort "file"]selected="selected"[end]>File</option> <option value="file" [is querysort "file"]selected="selected"[end]>File</option>
<option value="relevance" [is querysort "relevance"]selected="selected"[end]>Relevance</option>
</select> </select>
</td> </td>
</tr> </tr>

View File

@ -46,15 +46,18 @@
<tr class="vc_row_[if-index commits even]even[else]odd[end]"> <tr class="vc_row_[if-index commits even]even[else]odd[end]">
<td style="vertical-align: top;"> <td style="vertical-align: top;">
[define rev_href][if-any commits.files.prefer_markup][commits.files.view_href][else][if-any commits.files.download_href][commits.files.download_href][end][end][end] [define rev_href][if-any commits.files.prefer_markup][commits.files.view_href][else][if-any commits.files.download_href][commits.files.download_href][end][end][end]
[if-any commits.files.rev][if-any rev_href]<a href="[rev_href]">[end][commits.files.rev][if-any rev_href]</a>[end][else]&nbsp;[end] [if-any commits.files.rev][if-any rev_href]<a href="[rev_href]">[end][commits.files.rev][if-any rev_href]</a>[end][else]&nbsp;[end]
</td> </td>
<td style="vertical-align: top;"> <td style="vertical-align: top;">
<a href="[commits.files.dir_href]">[commits.files.dir]/</a> <a href="[commits.files.dir_href]">[commits.files.dir]/</a>
<a href="[commits.files.log_href]">[commits.files.file]</a> <a href="[commits.files.log_href]">[commits.files.file]</a>
[if-any commits.files.snippet]
<div class="snippet">[commits.files.snippet]</div>
[end]
</td> </td>
[if-any show_branch] [if-any show_branch]
<td style="vertical-align: top;"> <td style="vertical-align: top;">
[if-any commits.files.branch][commits.files.branch][else]&nbsp;[end] [if-any commits.files.branch][commits.files.branch][else]&nbsp;[end]
</td> </td>
[end] [end]
<td style="vertical-align: top;"> <td style="vertical-align: top;">
@ -68,10 +71,10 @@
[is commits.files.type "Remove"]</del>[end] [is commits.files.type "Remove"]</del>[end]
</td> </td>
<td style="vertical-align: top;"> <td style="vertical-align: top;">
[if-any commits.files.date][commits.files.date][else]&nbsp;[end] [if-any commits.files.date][commits.files.date][else]&nbsp;[end]
</td> </td>
<td style="vertical-align: top;"> <td style="vertical-align: top;">
[if-any commits.files.author][commits.files.author][else]&nbsp;[end] [if-any commits.files.author][commits.files.author][else]&nbsp;[end]
</td> </td>
</tr> </tr>
[end] [end]