Bug 82651 - Tika&Sphinx&chardet content indexing (done!)
git-svn-id: svn://svn.office.custis.ru/3rdparty/viewvc.org/trunk@1388 6955db30-a419-402b-8a0d-67ecbb4d7f56remotes/github/custis
parent
83c7e6fe49
commit
e363cf19b1
|
@ -44,6 +44,7 @@ CREATE TABLE branches (
|
||||||
|
|
||||||
DROP TABLE IF EXISTS checkins;
|
DROP TABLE IF EXISTS checkins;
|
||||||
CREATE TABLE checkins (
|
CREATE TABLE checkins (
|
||||||
|
id int NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||||
type enum('Change','Add','Remove'),
|
type enum('Change','Add','Remove'),
|
||||||
ci_when datetime DEFAULT '0000-00-00 00:00:00' NOT NULL,
|
ci_when datetime DEFAULT '0000-00-00 00:00:00' NOT NULL,
|
||||||
whoid mediumint(9) DEFAULT '0' NOT NULL,
|
whoid mediumint(9) DEFAULT '0' NOT NULL,
|
||||||
|
@ -57,7 +58,7 @@ CREATE TABLE checkins (
|
||||||
removedlines int(11) DEFAULT '0' NOT NULL,
|
removedlines int(11) DEFAULT '0' NOT NULL,
|
||||||
descid mediumint(9),
|
descid mediumint(9),
|
||||||
UNIQUE repositoryid (repositoryid,dirid,fileid,revision),
|
UNIQUE repositoryid (repositoryid,dirid,fileid,revision),
|
||||||
KEY repository_when (repositoryid,ci_when),
|
KEY repositoryid_when (repositoryid,ci_when),
|
||||||
KEY ci_when (ci_when),
|
KEY ci_when (ci_when),
|
||||||
KEY whoid (whoid,ci_when),
|
KEY whoid (whoid,ci_when),
|
||||||
KEY dirid (dirid),
|
KEY dirid (dirid),
|
||||||
|
@ -138,6 +139,7 @@ CREATE TABLE branches (
|
||||||
|
|
||||||
DROP TABLE IF EXISTS commits;
|
DROP TABLE IF EXISTS commits;
|
||||||
CREATE TABLE commits (
|
CREATE TABLE commits (
|
||||||
|
id int NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||||
type enum('Change','Add','Remove'),
|
type enum('Change','Add','Remove'),
|
||||||
ci_when datetime DEFAULT '0000-00-00 00:00:00' NOT NULL,
|
ci_when datetime DEFAULT '0000-00-00 00:00:00' NOT NULL,
|
||||||
whoid mediumint(9) DEFAULT '0' NOT NULL,
|
whoid mediumint(9) DEFAULT '0' NOT NULL,
|
||||||
|
@ -151,9 +153,9 @@ CREATE TABLE commits (
|
||||||
removedlines int(11) DEFAULT '0' NOT NULL,
|
removedlines int(11) DEFAULT '0' NOT NULL,
|
||||||
descid mediumint(9),
|
descid mediumint(9),
|
||||||
UNIQUE repositoryid (repositoryid,dirid,fileid,revision),
|
UNIQUE repositoryid (repositoryid,dirid,fileid,revision),
|
||||||
|
KEY repositoryid_when (repositoryid,ci_when),
|
||||||
KEY ci_when (ci_when),
|
KEY ci_when (ci_when),
|
||||||
KEY whoid (whoid),
|
KEY whoid (whoid,ci_when),
|
||||||
KEY repositoryid_2 (repositoryid),
|
|
||||||
KEY dirid (dirid),
|
KEY dirid (dirid),
|
||||||
KEY fileid (fileid),
|
KEY fileid (fileid),
|
||||||
KEY branchid (branchid),
|
KEY branchid (branchid),
|
||||||
|
@ -253,7 +255,7 @@ Options:
|
||||||
[Default: ViewVC]
|
[Default: ViewVC]
|
||||||
|
|
||||||
--help Show this usage message.
|
--help Show this usage message.
|
||||||
|
|
||||||
--hostname=ARG Use ARG as the hostname for the MySQL connection.
|
--hostname=ARG Use ARG as the hostname for the MySQL connection.
|
||||||
[Default: localhost]
|
[Default: localhost]
|
||||||
|
|
||||||
|
@ -264,7 +266,7 @@ Options:
|
||||||
--version=ARG Create the database using the schema employed by
|
--version=ARG Create the database using the schema employed by
|
||||||
version ARG of ViewVC. Valid values are:
|
version ARG of ViewVC. Valid values are:
|
||||||
[ "1.0" ]
|
[ "1.0" ]
|
||||||
|
|
||||||
""" % (os.path.basename(sys.argv[0])))
|
""" % (os.path.basename(sys.argv[0])))
|
||||||
if errmsg is not None:
|
if errmsg is not None:
|
||||||
stream.write("[ERROR] %s.\n" % (errmsg))
|
stream.write("[ERROR] %s.\n" % (errmsg))
|
||||||
|
|
149
bin/svndbadmin
149
bin/svndbadmin
|
@ -58,7 +58,11 @@ else:
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import string
|
import string
|
||||||
|
import socket
|
||||||
|
import select
|
||||||
import re
|
import re
|
||||||
|
import mimetypes
|
||||||
|
import time
|
||||||
|
|
||||||
import svn.core
|
import svn.core
|
||||||
import svn.repos
|
import svn.repos
|
||||||
|
@ -68,14 +72,20 @@ import svn.delta
|
||||||
import cvsdb
|
import cvsdb
|
||||||
import viewvc
|
import viewvc
|
||||||
import vclib
|
import vclib
|
||||||
|
from viewvcmagic import ContentMagic
|
||||||
|
|
||||||
class SvnRepo:
|
class SvnRepo:
|
||||||
"""Class used to manage a connection to a SVN repository."""
|
"""Class used to manage a connection to a SVN repository."""
|
||||||
def __init__(self, path):
|
def __init__(self, path, index_content = None, tika_client = None, guesser = None,
|
||||||
|
svn_ignore_mimetype = False):
|
||||||
self.path = path
|
self.path = path
|
||||||
self.repo = svn.repos.svn_repos_open(path)
|
self.repo = svn.repos.svn_repos_open(path)
|
||||||
self.fs = svn.repos.svn_repos_fs(self.repo)
|
self.fs = svn.repos.svn_repos_fs(self.repo)
|
||||||
self.rev_max = svn.fs.youngest_rev(self.fs)
|
self.rev_max = svn.fs.youngest_rev(self.fs)
|
||||||
|
self.index_content = index_content
|
||||||
|
self.tika_client = tika_client
|
||||||
|
self.guesser = guesser
|
||||||
|
self.svn_ignore_mimetype = svn_ignore_mimetype
|
||||||
def __getitem__(self, rev):
|
def __getitem__(self, rev):
|
||||||
if rev is None:
|
if rev is None:
|
||||||
rev = self.rev_max
|
rev = self.rev_max
|
||||||
|
@ -128,6 +138,74 @@ def _get_diff_counts(diff_fp):
|
||||||
line = diff_fp.readline()
|
line = diff_fp.readline()
|
||||||
return plus, minus
|
return plus, minus
|
||||||
|
|
||||||
|
class TikaClient:
|
||||||
|
# Create tika client
|
||||||
|
def __init__(self, tika_server, mime_types):
|
||||||
|
self.tika_server = tika_server
|
||||||
|
self.mime_types = mime_types
|
||||||
|
self.addr = tika_server.split(':')
|
||||||
|
# Split address
|
||||||
|
if len(self.addr) != 2:
|
||||||
|
raise Exception('tika_server value is incorrect: \''+tika_server+'\', please use \'host:port\' format')
|
||||||
|
self.addr = (self.addr[0], int(self.addr[1]))
|
||||||
|
# Build regexp for MIME types
|
||||||
|
m = re.split('\s+', mime_types.strip())
|
||||||
|
self.mime_regexp = re.compile('|'.join('^'+re.escape(i).replace('\\*', '.*')+'$' for i in m))
|
||||||
|
|
||||||
|
# Extract text content from file using Tika which runs in server mode
|
||||||
|
def get_text(self, filename, mime_type, log_filename):
|
||||||
|
if not self.mime_regexp.match(mime_type):
|
||||||
|
# Tika can't handle this mime type, return nothing
|
||||||
|
return ''
|
||||||
|
fd = None
|
||||||
|
s = None
|
||||||
|
text = ''
|
||||||
|
fsize = 0
|
||||||
|
try:
|
||||||
|
# Read original file
|
||||||
|
fd = open(filename, 'rb')
|
||||||
|
data = fd.read()
|
||||||
|
fsize = len(data)
|
||||||
|
if not fsize:
|
||||||
|
return ''
|
||||||
|
# Connect to Tika
|
||||||
|
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
s.connect(self.addr)
|
||||||
|
s.setblocking(0)
|
||||||
|
sockfd = s.fileno()
|
||||||
|
# Tika is somewhat delicate about network IO, so:
|
||||||
|
# Read and write using poll(2) system call
|
||||||
|
p = select.poll()
|
||||||
|
p.register(sockfd)
|
||||||
|
while 1:
|
||||||
|
fds = p.poll()
|
||||||
|
if not fds:
|
||||||
|
break
|
||||||
|
(pollfd, event) = fds[0]
|
||||||
|
if event & select.POLLIN:
|
||||||
|
# Exception or empty data means EOF...
|
||||||
|
try: part = os.read(sockfd, 65536)
|
||||||
|
except: break
|
||||||
|
if not part: break
|
||||||
|
text += part
|
||||||
|
if event & select.POLLOUT:
|
||||||
|
if not len(data):
|
||||||
|
# Shutdown output and forget about POLLOUT
|
||||||
|
s.shutdown(socket.SHUT_WR)
|
||||||
|
p.modify(sockfd, select.POLLIN)
|
||||||
|
else:
|
||||||
|
# Write and consume some data
|
||||||
|
l = os.write(sockfd, data)
|
||||||
|
data = data[l:]
|
||||||
|
if len(text) == 0:
|
||||||
|
raise Exception('Empty response from Tika server')
|
||||||
|
print "Extracted %d bytes from %s (%s) of size %d" % (len(text), log_filename, mime_type, fsize)
|
||||||
|
except Exception, e:
|
||||||
|
print "Error extracting text from %s (%s) of size %d: %s" % (log_filename, mime_type, fsize, str(e))
|
||||||
|
finally:
|
||||||
|
if fd: fd.close()
|
||||||
|
if s: s.close()
|
||||||
|
return text
|
||||||
|
|
||||||
class SvnRev:
|
class SvnRev:
|
||||||
"""Class used to hold information about a particular revision of
|
"""Class used to hold information about a particular revision of
|
||||||
|
@ -151,7 +229,7 @@ class SvnRev:
|
||||||
|
|
||||||
# get a root for the current revisions
|
# get a root for the current revisions
|
||||||
fsroot = self._get_root_for_rev(rev)
|
fsroot = self._get_root_for_rev(rev)
|
||||||
|
|
||||||
# find changes in the revision
|
# find changes in the revision
|
||||||
editor = svn.repos.RevisionChangeCollector(repo.fs, rev)
|
editor = svn.repos.RevisionChangeCollector(repo.fs, rev)
|
||||||
e_ptr, e_baton = svn.delta.make_editor(editor)
|
e_ptr, e_baton = svn.delta.make_editor(editor)
|
||||||
|
@ -168,7 +246,7 @@ class SvnRev:
|
||||||
base_root = None
|
base_root = None
|
||||||
if change.base_path:
|
if change.base_path:
|
||||||
base_root = self._get_root_for_rev(change.base_rev)
|
base_root = self._get_root_for_rev(change.base_rev)
|
||||||
|
|
||||||
if not change.path:
|
if not change.path:
|
||||||
action = 'remove'
|
action = 'remove'
|
||||||
elif change.added:
|
elif change.added:
|
||||||
|
@ -184,19 +262,53 @@ class SvnRev:
|
||||||
['-b', '-B'])
|
['-b', '-B'])
|
||||||
diff_fp = diffobj.get_pipe()
|
diff_fp = diffobj.get_pipe()
|
||||||
plus, minus = _get_diff_counts(diff_fp)
|
plus, minus = _get_diff_counts(diff_fp)
|
||||||
# TODO Indexing file contents
|
|
||||||
# For binary files: svn.fs.contents_changed(root1, path1, root2, path2)
|
|
||||||
# Temp file with contents is at: diffobj.tempfile2
|
|
||||||
# Apache Tika server may even be at another host!
|
|
||||||
|
|
||||||
# CustIS Bug 50473: a workaround for svnlib behaviour in file movements (FILE1 -> FILE2 + FILE1 -> null)
|
# CustIS Bug 50473: a workaround for svnlib behaviour in file movements (FILE1 -> FILE2 + FILE1 -> null)
|
||||||
if change.base_path:
|
if change.base_path:
|
||||||
if not change.path and changes_hash.get(change.base_path, '') != '':
|
if not change.path and change.base_path in changes_hash:
|
||||||
minus = 0
|
minus = 0
|
||||||
elif change.path:
|
elif change.path:
|
||||||
changes_hash[change.base_path] = change.path
|
changes_hash[change.base_path] = change.path
|
||||||
|
|
||||||
self.changes.append((path, action, plus, minus))
|
content = ''
|
||||||
|
mime = ''
|
||||||
|
# need to check if binary file's content changed when copying,
|
||||||
|
# if not, don't extract it, just get it from previous revision later
|
||||||
|
if repo.index_content and change.path and (not change.base_path
|
||||||
|
or svn.fs.contents_changed(
|
||||||
|
base_root and base_root or None,
|
||||||
|
base_root and change.base_path or None,
|
||||||
|
fsroot, change.path
|
||||||
|
)):
|
||||||
|
props = svn.fs.node_proplist(fsroot, change.path)
|
||||||
|
if not repo.svn_ignore_mimetype:
|
||||||
|
mime = props.get('svn:mime-type', None)
|
||||||
|
else:
|
||||||
|
mime = None
|
||||||
|
mime = repo.guesser.guess_mime(
|
||||||
|
mime,
|
||||||
|
os.path.basename(change.path),
|
||||||
|
diffobj.tempfile2
|
||||||
|
)
|
||||||
|
# Read and guess charset by ourselves for text files
|
||||||
|
if mime.startswith('text/') or (mime.startswith('application/') and mime.endswith('xml')):
|
||||||
|
try:
|
||||||
|
fd = open(diffobj.tempfile2, 'rb')
|
||||||
|
content = fd.read()
|
||||||
|
fd.close()
|
||||||
|
except: pass
|
||||||
|
# Guess charset
|
||||||
|
if content:
|
||||||
|
content, charset = repo.guesser.guess_charset(content)
|
||||||
|
if charset:
|
||||||
|
content = content.encode('utf-8')
|
||||||
|
print 'Guessed %s for %s' % (charset, change.path)
|
||||||
|
else:
|
||||||
|
print 'Failed to guess charset for %s, not indexing' % (change.path, )
|
||||||
|
# Try to extract content using Tika from binary documents
|
||||||
|
elif repo.tika_client:
|
||||||
|
content = repo.tika_client.get_text(diffobj.tempfile2, mime, change.path)
|
||||||
|
self.changes.append((path, action, plus, minus, content, mime))
|
||||||
|
|
||||||
def _get_root_for_rev(self, rev):
|
def _get_root_for_rev(self, rev):
|
||||||
"""Fetch a revision root from a cache of such, or a fresh root
|
"""Fetch a revision root from a cache of such, or a fresh root
|
||||||
|
@ -217,7 +329,7 @@ def handle_revision(db, command, repo, rev, verbose, force=0):
|
||||||
if verbose: print "skipped (no changes)."
|
if verbose: print "skipped (no changes)."
|
||||||
return
|
return
|
||||||
|
|
||||||
for (path, action, plus, minus) in revision.changes:
|
for (path, action, plus, minus, content, mime) in revision.changes:
|
||||||
directory, file = os.path.split(path)
|
directory, file = os.path.split(path)
|
||||||
commit = cvsdb.CreateCommit()
|
commit = cvsdb.CreateCommit()
|
||||||
commit.SetRepository(repo.path)
|
commit.SetRepository(repo.path)
|
||||||
|
@ -230,6 +342,8 @@ def handle_revision(db, command, repo, rev, verbose, force=0):
|
||||||
commit.SetPlusCount(plus)
|
commit.SetPlusCount(plus)
|
||||||
commit.SetMinusCount(minus)
|
commit.SetMinusCount(minus)
|
||||||
commit.SetBranch(None)
|
commit.SetBranch(None)
|
||||||
|
commit.SetContent(content)
|
||||||
|
commit.SetMimeType(mime)
|
||||||
|
|
||||||
if action == 'add':
|
if action == 'add':
|
||||||
commit.SetTypeAdd()
|
commit.SetTypeAdd()
|
||||||
|
@ -268,7 +382,16 @@ def main(command, repository, revs=[], verbose=0, force=0):
|
||||||
sys.stderr.write("ERROR: " + str(e) + "\n")
|
sys.stderr.write("ERROR: " + str(e) + "\n")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
repo = SvnRepo(repository)
|
tika_client = None
|
||||||
|
if cfg.utilities.tika_server:
|
||||||
|
tika_client = TikaClient(cfg.utilities.tika_server, cfg.utilities.tika_mime_types)
|
||||||
|
repo = SvnRepo(
|
||||||
|
path = repository,
|
||||||
|
index_content = cfg.cvsdb.index_content,
|
||||||
|
tika_client = tika_client,
|
||||||
|
guesser = cfg.guesser(),
|
||||||
|
svn_ignore_mimetype = cfg.options.svn_ignore_mimetype,
|
||||||
|
)
|
||||||
if command == 'rebuild' or (command == 'update' and not revs):
|
if command == 'rebuild' or (command == 'update' and not revs):
|
||||||
for rev in range(repo.rev_max+1):
|
for rev in range(repo.rev_max+1):
|
||||||
handle_revision(db, command, repo, rev, verbose)
|
handle_revision(db, command, repo, rev, verbose)
|
||||||
|
@ -312,7 +435,7 @@ Usage: 1. %s [-v] rebuild REPOS-PATH
|
||||||
the database. If a range is specified, the revisions will be
|
the database. If a range is specified, the revisions will be
|
||||||
processed in ascending order, and you may specify "HEAD" to
|
processed in ascending order, and you may specify "HEAD" to
|
||||||
indicate "the youngest revision currently in the repository".
|
indicate "the youngest revision currently in the repository".
|
||||||
|
|
||||||
3. Purge information specific to the repository located at REPOS-PATH
|
3. Purge information specific to the repository located at REPOS-PATH
|
||||||
from the database.
|
from the database.
|
||||||
|
|
||||||
|
@ -337,7 +460,7 @@ if __name__ == '__main__':
|
||||||
del args[index]
|
del args[index]
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if len(args) < 3:
|
if len(args) < 3:
|
||||||
usage()
|
usage()
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#---------------------------------------------------------------------------
|
#---------------------------------------------------------------------------
|
||||||
#
|
#
|
||||||
# Configuration file for ViewVC
|
# Configuration file for ViewVC (4IntraNet patched version)
|
||||||
#
|
#
|
||||||
# Information on ViewVC is located at the following web site:
|
# Information on ViewVC is located at the following web site:
|
||||||
# http://viewvc.org/
|
# http://viewvc.org/
|
||||||
|
@ -9,7 +9,7 @@
|
||||||
|
|
||||||
# THE FORMAT OF THIS CONFIGURATION FILE
|
# THE FORMAT OF THIS CONFIGURATION FILE
|
||||||
#
|
#
|
||||||
# This file is delineated by sections, specified in [brackets]. Within
|
# This file is delineated by sections, specified in [brackets]. Within
|
||||||
# each section, are a number of configuration settings. These settings
|
# each section, are a number of configuration settings. These settings
|
||||||
# take the form of: name = value. Values may be continued on the
|
# take the form of: name = value. Values may be continued on the
|
||||||
# following line by indenting the continued line.
|
# following line by indenting the continued line.
|
||||||
|
@ -17,14 +17,14 @@
|
||||||
# WARNING: Indentation *always* means continuation. Name=value lines
|
# WARNING: Indentation *always* means continuation. Name=value lines
|
||||||
# should always start in column zero.
|
# should always start in column zero.
|
||||||
#
|
#
|
||||||
# Comments should always start in column zero, and are identified
|
# Comments should always start in column zero, and are identified
|
||||||
# with "#".
|
# with "#".
|
||||||
#
|
#
|
||||||
# Certain configuration settings may have multiple values. These should
|
# Certain configuration settings may have multiple values. These should
|
||||||
# be separated by a comma. The settings where this is allowed are noted
|
# be separated by a comma. The settings where this is allowed are noted
|
||||||
# below. Any other setting that requires special syntax is noted at that
|
# below. Any other setting that requires special syntax is noted at that
|
||||||
# setting.
|
# setting.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# SOME TERMINOLOGY USED HEREIN
|
# SOME TERMINOLOGY USED HEREIN
|
||||||
#
|
#
|
||||||
|
@ -50,10 +50,10 @@
|
||||||
# recommend you pay attention to. Of course, don't try to change the
|
# recommend you pay attention to. Of course, don't try to change the
|
||||||
# options here -- do so in the relevant section of the configuration
|
# options here -- do so in the relevant section of the configuration
|
||||||
# file below.
|
# file below.
|
||||||
#
|
#
|
||||||
# For correct operation, you will probably need to change the following
|
# For correct operation, you will probably need to change the following
|
||||||
# configuration variables:
|
# configuration variables:
|
||||||
#
|
#
|
||||||
# cvs_roots (for CVS)
|
# cvs_roots (for CVS)
|
||||||
# svn_roots (for Subversion)
|
# svn_roots (for Subversion)
|
||||||
# root_parents (for CVS or Subversion)
|
# root_parents (for CVS or Subversion)
|
||||||
|
@ -62,18 +62,18 @@
|
||||||
# rcs_dir
|
# rcs_dir
|
||||||
# mime_types_file
|
# mime_types_file
|
||||||
# the many options in the [utilities] section
|
# the many options in the [utilities] section
|
||||||
#
|
#
|
||||||
# It is usually desirable to change the following variables:
|
# It is usually desirable to change the following variables:
|
||||||
#
|
#
|
||||||
# address
|
# address
|
||||||
# forbidden
|
# forbidden
|
||||||
#
|
#
|
||||||
# To optimize delivery of ViewVC static files:
|
# To optimize delivery of ViewVC static files:
|
||||||
#
|
#
|
||||||
# docroot
|
# docroot
|
||||||
#
|
#
|
||||||
# To customize the display of ViewVC for your site:
|
# To customize the display of ViewVC for your site:
|
||||||
#
|
#
|
||||||
# template_dir
|
# template_dir
|
||||||
# the [templates] override section
|
# the [templates] override section
|
||||||
#
|
#
|
||||||
|
@ -139,7 +139,7 @@ default_root = cvs
|
||||||
# provided only as a convenience for ViewVC installations which are
|
# provided only as a convenience for ViewVC installations which are
|
||||||
# using the default template set, where the value of this option will
|
# using the default template set, where the value of this option will
|
||||||
# be displayed in the footer of every ViewVC page.)
|
# be displayed in the footer of every ViewVC page.)
|
||||||
address =
|
address =
|
||||||
|
|
||||||
#
|
#
|
||||||
# This option provides a mechanism for custom key/value pairs to be
|
# This option provides a mechanism for custom key/value pairs to be
|
||||||
|
@ -244,21 +244,47 @@ cvsnt =
|
||||||
|
|
||||||
# See also bin/cvsnt-rcsfile-inetd.pl
|
# See also bin/cvsnt-rcsfile-inetd.pl
|
||||||
|
|
||||||
#rcsfile_socket = 'host:port'
|
#rcsfile_socket = host:port
|
||||||
# Example: rcsfile_socket = '127.0.0.1:8071'
|
# Example: rcsfile_socket = 127.0.0.1:8071
|
||||||
|
|
||||||
# Subversion command-line client, used for viewing Subversion repositories
|
# Subversion command-line client, used for viewing Subversion repositories
|
||||||
svn =
|
svn =
|
||||||
# svn = /usr/bin/svn
|
# svn = /usr/bin/svn
|
||||||
|
|
||||||
# GNU diff, used for showing file version differences
|
# GNU diff, used for showing file version differences
|
||||||
diff =
|
diff =
|
||||||
# diff = /usr/bin/diff
|
# diff = /usr/bin/diff
|
||||||
|
|
||||||
# CvsGraph, a graphical CVS version graph generator (see options.use_cvsgraph)
|
# CvsGraph, a graphical CVS version graph generator (see options.use_cvsgraph)
|
||||||
cvsgraph =
|
cvsgraph =
|
||||||
# cvsgraph = /usr/local/bin/cvsgraph
|
# cvsgraph = /usr/local/bin/cvsgraph
|
||||||
|
|
||||||
|
# Apache Tika TCP server host and port, used to extract text from binary documents
|
||||||
|
# Note that as of 2011-09-12, Tika 0.9 has a bug which leads to hangups on processing
|
||||||
|
# M$Word documents in server mode. So you must use the fixed version, downloaded from:
|
||||||
|
# http://wiki.4intra.net/public/tika-app-0.9-fix-TIKA709.jar
|
||||||
|
# (mirror) http://code.google.com/p/mediawiki4intranet/downloads/detail?name=tika-app-0.9-fix-TIKA709.jar
|
||||||
|
# Or apply the patch by yourself and rebuild Tika from source, see patch here:
|
||||||
|
# https://issues.apache.org/jira/browse/TIKA-709
|
||||||
|
# Tika server should be started with command 'java -jar tika-app-0.9.jar -p PORT -t -eutf-8'
|
||||||
|
|
||||||
|
#tika_server = host:port
|
||||||
|
# Example: tika_server = 127.0.0.1:8072
|
||||||
|
|
||||||
|
# This lists MIME types that can be processed by Tika
|
||||||
|
# You may change it if your Tika is newer than 0.9 and supports more formats
|
||||||
|
# (note) *+xml examples: xhtml+xml, rss+xml, atom+xml, docbook+xml, rdf+xml
|
||||||
|
tika_mime_types =
|
||||||
|
text/*
|
||||||
|
application/*+xml
|
||||||
|
application/xml
|
||||||
|
application/vnd.oasis.opendocument.*
|
||||||
|
application/vnd.openxmlformats
|
||||||
|
application/vnd.ms-*
|
||||||
|
application/msaccess
|
||||||
|
application/msword
|
||||||
|
application/pdf
|
||||||
|
application/rtf
|
||||||
|
|
||||||
#---------------------------------------------------------------------------
|
#---------------------------------------------------------------------------
|
||||||
[options]
|
[options]
|
||||||
|
@ -358,7 +384,7 @@ svn_ignore_mimetype = 0
|
||||||
# directory ViewVC should consult for various things, including cached
|
# directory ViewVC should consult for various things, including cached
|
||||||
# remote authentication credentials. If unset, Subversion will use
|
# remote authentication credentials. If unset, Subversion will use
|
||||||
# the default location(s) ($HOME/.subversion, etc.)
|
# the default location(s) ($HOME/.subversion, etc.)
|
||||||
svn_config_dir =
|
svn_config_dir =
|
||||||
|
|
||||||
# use the rcsparse Python module to retrieve CVS repository
|
# use the rcsparse Python module to retrieve CVS repository
|
||||||
# information instead of invoking rcs utilities [EXPERIMENTAL]
|
# information instead of invoking rcs utilities [EXPERIMENTAL]
|
||||||
|
@ -494,12 +520,18 @@ short_log_len = 80
|
||||||
# should we colorize known file content syntaxes? (requires Pygments module)
|
# should we colorize known file content syntaxes? (requires Pygments module)
|
||||||
enable_syntax_coloration = 1
|
enable_syntax_coloration = 1
|
||||||
|
|
||||||
|
# detect_encoding: Should we attempt to detect versioned file
|
||||||
|
# character encodings? [Requires 'chardet' module]
|
||||||
|
# Used in file list, file content display and indexing
|
||||||
|
# See also options.encodings for naive guessing.
|
||||||
|
detect_encoding = 1
|
||||||
|
|
||||||
# Use CvsGraph. See http://www.akhphd.au.dk/~bertho/cvsgraph/ for
|
# Use CvsGraph. See http://www.akhphd.au.dk/~bertho/cvsgraph/ for
|
||||||
# documentation and download.
|
# documentation and download.
|
||||||
use_cvsgraph = 0
|
use_cvsgraph = 0
|
||||||
#use_cvsgraph = 1
|
#use_cvsgraph = 1
|
||||||
|
|
||||||
# Location of the customized cvsgraph configuration file.
|
# Location of the customized cvsgraph configuration file.
|
||||||
cvsgraph_conf = cvsgraph.conf
|
cvsgraph_conf = cvsgraph.conf
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -544,6 +576,17 @@ use_pagesize = 0
|
||||||
# Set to 0 to disable the limit.
|
# Set to 0 to disable the limit.
|
||||||
limit_changes = 100
|
limit_changes = 100
|
||||||
|
|
||||||
|
# You can also use primitive charset guessing instead of chardet (options.detect_encoding)
|
||||||
|
# Just set this to the list of possible charsets in your repository.
|
||||||
|
# ViewVC will simply try to decode content using each of them, and pick
|
||||||
|
# the first which succeeds. UTF-8 is always tried automatically.
|
||||||
|
#encodings = cp1251:iso-8859-1
|
||||||
|
|
||||||
|
# Sadly this is also required - for back-links from query results to files
|
||||||
|
# in CVS, because it doesn't recode file names to UTF-8 as Subversion does.
|
||||||
|
# Just set to cp1251 if you work with your CVS from Windowz.
|
||||||
|
#cvs_ondisk_charset = cp1251
|
||||||
|
|
||||||
#---------------------------------------------------------------------------
|
#---------------------------------------------------------------------------
|
||||||
[templates]
|
[templates]
|
||||||
|
|
||||||
|
@ -554,7 +597,7 @@ limit_changes = 100
|
||||||
# use a different template for a particular view, simply uncomment the
|
# use a different template for a particular view, simply uncomment the
|
||||||
# appropriate option below and specify the currect location of the EZT
|
# appropriate option below and specify the currect location of the EZT
|
||||||
# template file you wish to use for that view.
|
# template file you wish to use for that view.
|
||||||
#
|
#
|
||||||
# Templates are specified relative to the configured template
|
# Templates are specified relative to the configured template
|
||||||
# directory (see the "template_dir" option), but absolute paths may
|
# directory (see the "template_dir" option), but absolute paths may
|
||||||
# also be used as well.
|
# also be used as well.
|
||||||
|
@ -569,13 +612,13 @@ limit_changes = 100
|
||||||
#diff = diff.ezt
|
#diff = diff.ezt
|
||||||
#directory = directory.ezt
|
#directory = directory.ezt
|
||||||
### an alternative directory view
|
### an alternative directory view
|
||||||
#directory = dir_new.ezt
|
#directory = dir_new.ezt
|
||||||
#error = error.ezt
|
#error = error.ezt
|
||||||
#file = file.ezt
|
#file = file.ezt
|
||||||
#graph = graph.ezt
|
#graph = graph.ezt
|
||||||
#log = log.ezt
|
#log = log.ezt
|
||||||
### a table-based alternative log view
|
### a table-based alternative log view
|
||||||
#log = log_table.ezt
|
#log = log_table.ezt
|
||||||
#query = query.ezt
|
#query = query.ezt
|
||||||
#query_form = query_form.ezt
|
#query_form = query_form.ezt
|
||||||
#query_results = query_results.ezt
|
#query_results = query_results.ezt
|
||||||
|
@ -588,22 +631,51 @@ limit_changes = 100
|
||||||
# Set to 1 to enable the database integration feature, 0 otherwise.
|
# Set to 1 to enable the database integration feature, 0 otherwise.
|
||||||
enabled = 0
|
enabled = 0
|
||||||
|
|
||||||
# Database hostname and port.
|
# Set to 1 to enable indexing of file contents using Sphinx and Tika
|
||||||
|
index_content = 0
|
||||||
|
|
||||||
|
# Database hostname, port, and socket
|
||||||
#host = localhost
|
#host = localhost
|
||||||
#port = 3306
|
#port = 3306
|
||||||
|
# On Debian Linux, enable this:
|
||||||
|
#socket = /var/run/mysqld/mysqld.sock
|
||||||
|
|
||||||
# ViewVC database name.
|
# ViewVC database name.
|
||||||
#database_name = ViewVC
|
#database_name = ViewVC
|
||||||
|
|
||||||
# Username and password of user with read/write privileges to the ViewVC
|
# Username and password of user with read/write privileges to the ViewVC
|
||||||
# database.
|
# database.
|
||||||
#user =
|
#user =
|
||||||
#passwd =
|
#passwd =
|
||||||
|
|
||||||
# Username and password of user with read privileges to the ViewVC
|
# Username and password of user with read privileges to the ViewVC
|
||||||
# database.
|
# database.
|
||||||
#readonly_user =
|
#readonly_user =
|
||||||
#readonly_passwd =
|
#readonly_passwd =
|
||||||
|
|
||||||
|
# ViewVC can use Sphinx (http://sphinxsearch.com) full-text search engine
|
||||||
|
# to index file contents with full history and then search over them.
|
||||||
|
# Also, Apache Tika console application can be used in TCP server mode to
|
||||||
|
# add support for indexing binary documents (M$Word, PDF and etc).
|
||||||
|
# See tika_server in [utilities].
|
||||||
|
# Requires Sphinx >= 0.9.9 with a real-time updatable SphinxQL index.
|
||||||
|
# Index must be created in sphinx.conf by hand and have the following fields:
|
||||||
|
# rt_field = content
|
||||||
|
# rt_attr_string = content
|
||||||
|
# rt_attr_string = mimetype
|
||||||
|
# rt_attr_timestamp = ci_when
|
||||||
|
# rt_attr_uint = whoid
|
||||||
|
# rt_attr_uint = repositoryid
|
||||||
|
# rt_attr_uint = dirid
|
||||||
|
# rt_attr_uint = fileid
|
||||||
|
# rt_attr_uint = revision
|
||||||
|
# rt_attr_uint = branchid
|
||||||
|
|
||||||
|
# Sphinx connection parameters:
|
||||||
|
#sphinx_host =
|
||||||
|
#sphinx_port =
|
||||||
|
#sphinx_socket = /var/run/sphinxql.sock
|
||||||
|
#sphinx_index = viewvc
|
||||||
|
|
||||||
# Limit the number of rows returned by a given query to this number.
|
# Limit the number of rows returned by a given query to this number.
|
||||||
#row_limit = 1000
|
#row_limit = 1000
|
||||||
|
@ -616,7 +688,7 @@ enabled = 0
|
||||||
|
|
||||||
# Check if the repository is found in the database before showing
|
# Check if the repository is found in the database before showing
|
||||||
# the query link and RSS feeds. Set to 1 to enable check.
|
# the query link and RSS feeds. Set to 1 to enable check.
|
||||||
#
|
#
|
||||||
# WARNING: Enabling this check adds the cost of a database connection
|
# WARNING: Enabling this check adds the cost of a database connection
|
||||||
# and query to most ViewVC requests. If all your roots are represented
|
# and query to most ViewVC requests. If all your roots are represented
|
||||||
# in the commits database, or if you don't care about the creation of
|
# in the commits database, or if you don't care about the creation of
|
||||||
|
@ -640,7 +712,7 @@ enabled = 0
|
||||||
#
|
#
|
||||||
# ViewVC allows you to customize its configuration options for
|
# ViewVC allows you to customize its configuration options for
|
||||||
# individual virtual hosts. You might, for example, wish to expose
|
# individual virtual hosts. You might, for example, wish to expose
|
||||||
# all of your Subversion repositories at http://svn.yourdomain.com/viewvc/
|
# all of your Subversion repositories at http://svn.yourdomain.com/viewvc/
|
||||||
# and all your CVS ones at http://cvs.yourdomain.com/viewvc/, with no
|
# and all your CVS ones at http://cvs.yourdomain.com/viewvc/, with no
|
||||||
# cross-exposure. Using ViewVC's virtual host (vhost) configuration
|
# cross-exposure. Using ViewVC's virtual host (vhost) configuration
|
||||||
# support, you can do this. Simply create two vhost configurations
|
# support, you can do this. Simply create two vhost configurations
|
||||||
|
@ -671,7 +743,7 @@ enabled = 0
|
||||||
# gui = guiproject.yourdomain.*
|
# gui = guiproject.yourdomain.*
|
||||||
#
|
#
|
||||||
# [vhost-libs/general]
|
# [vhost-libs/general]
|
||||||
# cvs_roots =
|
# cvs_roots =
|
||||||
# svn_roots = svnroot: /var/svn/libs-repos
|
# svn_roots = svnroot: /var/svn/libs-repos
|
||||||
# default_root = svnroot
|
# default_root = svnroot
|
||||||
#
|
#
|
||||||
|
@ -680,7 +752,7 @@ enabled = 0
|
||||||
#
|
#
|
||||||
# [vhost-gui/general]
|
# [vhost-gui/general]
|
||||||
# cvs_roots = cvsroot: /var/cvs/guiproject
|
# cvs_roots = cvsroot: /var/cvs/guiproject
|
||||||
# svn_roots =
|
# svn_roots =
|
||||||
# default_root = cvsroot
|
# default_root = cvsroot
|
||||||
#
|
#
|
||||||
|
|
||||||
|
@ -697,7 +769,7 @@ enabled = 0
|
||||||
#
|
#
|
||||||
# Here is an example showing how to enable Subversion authz-based
|
# Here is an example showing how to enable Subversion authz-based
|
||||||
# authorization for only the single root named "svnroot":
|
# authorization for only the single root named "svnroot":
|
||||||
#
|
#
|
||||||
# [root-svnroot/options]
|
# [root-svnroot/options]
|
||||||
# authorizer = svnauthz
|
# authorizer = svnauthz
|
||||||
#
|
#
|
||||||
|
@ -726,7 +798,7 @@ enabled = 0
|
||||||
#
|
#
|
||||||
# Tests are case-sensitive.
|
# Tests are case-sensitive.
|
||||||
#
|
#
|
||||||
# NOTE: Again, this is for the hiding of modules within repositories, *not*
|
# NOTE: Again, this is for the hiding of modules within repositories, *not*
|
||||||
# for the hiding of repositories (roots) themselves.
|
# for the hiding of repositories (roots) themselves.
|
||||||
#
|
#
|
||||||
# Some examples:
|
# Some examples:
|
||||||
|
@ -749,7 +821,7 @@ enabled = 0
|
||||||
# Allow "xml", forbid other modules starting with "x", and allow the rest:
|
# Allow "xml", forbid other modules starting with "x", and allow the rest:
|
||||||
# forbidden = !xml, x*, !*
|
# forbidden = !xml, x*, !*
|
||||||
#
|
#
|
||||||
forbidden =
|
forbidden =
|
||||||
|
|
||||||
#---------------------------------------------------------------------------
|
#---------------------------------------------------------------------------
|
||||||
[authz-forbiddenre]
|
[authz-forbiddenre]
|
||||||
|
@ -792,7 +864,7 @@ forbidden =
|
||||||
# Only allow visibility of HTML files and the directories that hold them:
|
# Only allow visibility of HTML files and the directories that hold them:
|
||||||
# forbiddenre = !^([^/]+|.*(/|\.html))$
|
# forbiddenre = !^([^/]+|.*(/|\.html))$
|
||||||
#
|
#
|
||||||
forbiddenre =
|
forbiddenre =
|
||||||
|
|
||||||
#---------------------------------------------------------------------------
|
#---------------------------------------------------------------------------
|
||||||
[authz-svnauthz]
|
[authz-svnauthz]
|
||||||
|
|
|
@ -24,6 +24,7 @@ import vclib.ccvs
|
||||||
import vclib.svn
|
import vclib.svn
|
||||||
import cvsdb
|
import cvsdb
|
||||||
import viewvc
|
import viewvc
|
||||||
|
from viewvcmagic import ContentMagic
|
||||||
|
|
||||||
#########################################################################
|
#########################################################################
|
||||||
#
|
#
|
||||||
|
@ -47,6 +48,7 @@ class Config:
|
||||||
'root_parents', 'allowed_views', 'mime_types_files')
|
'root_parents', 'allowed_views', 'mime_types_files')
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
self.__guesser = None
|
||||||
for section in self._sections:
|
for section in self._sections:
|
||||||
setattr(self, section, _sub_config())
|
setattr(self, section, _sub_config())
|
||||||
|
|
||||||
|
@ -66,7 +68,6 @@ class Config:
|
||||||
if rootname:
|
if rootname:
|
||||||
self._process_root_options(self.parser, rootname)
|
self._process_root_options(self.parser, rootname)
|
||||||
self.expand_root_parents()
|
self.expand_root_parents()
|
||||||
cvsdb.setencs(self.options.encodings.split(':'))
|
|
||||||
r = {}
|
r = {}
|
||||||
for i in self.rewritehtml.__dict__.keys():
|
for i in self.rewritehtml.__dict__.keys():
|
||||||
if i[-8:] == '.replace':
|
if i[-8:] == '.replace':
|
||||||
|
@ -201,7 +202,7 @@ class Config:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
raise IllegalOverrideSection('root', section)
|
raise IllegalOverrideSection('root', section)
|
||||||
|
|
||||||
def overlay_root_options(self, rootname):
|
def overlay_root_options(self, rootname):
|
||||||
"Overly per-root options atop the existing option set."
|
"Overly per-root options atop the existing option set."
|
||||||
if not self.conf_path:
|
if not self.conf_path:
|
||||||
|
@ -217,7 +218,7 @@ class Config:
|
||||||
for option in parser.options(section):
|
for option in parser.options(section):
|
||||||
d[option] = parser.get(section, option)
|
d[option] = parser.get(section, option)
|
||||||
return d.items()
|
return d.items()
|
||||||
|
|
||||||
def get_authorizer_params(self, authorizer, rootname=None):
|
def get_authorizer_params(self, authorizer, rootname=None):
|
||||||
if not self.conf_path:
|
if not self.conf_path:
|
||||||
return {}
|
return {}
|
||||||
|
@ -236,7 +237,12 @@ class Config:
|
||||||
params[key] = value
|
params[key] = value
|
||||||
params['__config'] = self
|
params['__config'] = self
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
def guesser(self):
|
||||||
|
if not self.__guesser:
|
||||||
|
self.__guesser = ContentMagic(self.options.encodings)
|
||||||
|
return self.__guesser
|
||||||
|
|
||||||
def set_defaults(self):
|
def set_defaults(self):
|
||||||
"Set some default values in the configuration."
|
"Set some default values in the configuration."
|
||||||
|
|
||||||
|
@ -258,6 +264,8 @@ class Config:
|
||||||
self.utilities.svn = ''
|
self.utilities.svn = ''
|
||||||
self.utilities.diff = ''
|
self.utilities.diff = ''
|
||||||
self.utilities.cvsgraph = ''
|
self.utilities.cvsgraph = ''
|
||||||
|
self.utilities.tika_server = ''
|
||||||
|
self.utilities.tika_mime_types = ''
|
||||||
|
|
||||||
self.options.root_as_url_component = 1
|
self.options.root_as_url_component = 1
|
||||||
self.options.checkout_magic = 0
|
self.options.checkout_magic = 0
|
||||||
|
@ -302,7 +310,7 @@ class Config:
|
||||||
self.options.limit_changes = 100
|
self.options.limit_changes = 100
|
||||||
self.options.cvs_ondisk_charset = 'cp1251'
|
self.options.cvs_ondisk_charset = 'cp1251'
|
||||||
self.options.binary_mime_re = '^(?!text/|.*\Wxml)'
|
self.options.binary_mime_re = '^(?!text/|.*\Wxml)'
|
||||||
self.options.encodings = 'utf-8:cp1251:iso-8859-1'
|
self.options.encodings = 'cp1251:iso-8859-1'
|
||||||
|
|
||||||
self.templates.diff = None
|
self.templates.diff = None
|
||||||
self.templates.directory = None
|
self.templates.directory = None
|
||||||
|
@ -316,6 +324,7 @@ class Config:
|
||||||
self.templates.roots = None
|
self.templates.roots = None
|
||||||
|
|
||||||
self.cvsdb.enabled = 0
|
self.cvsdb.enabled = 0
|
||||||
|
self.cvsdb.index_content = 0
|
||||||
self.cvsdb.host = ''
|
self.cvsdb.host = ''
|
||||||
self.cvsdb.port = 3306
|
self.cvsdb.port = 3306
|
||||||
self.cvsdb.socket = ''
|
self.cvsdb.socket = ''
|
||||||
|
@ -323,12 +332,17 @@ class Config:
|
||||||
self.cvsdb.user = ''
|
self.cvsdb.user = ''
|
||||||
self.cvsdb.passwd = ''
|
self.cvsdb.passwd = ''
|
||||||
self.cvsdb.readonly_user = ''
|
self.cvsdb.readonly_user = ''
|
||||||
self.cvsdb.readonly_passwd = ''
|
self.cvsdb.readonly_passwd = ''
|
||||||
self.cvsdb.row_limit = 1000
|
self.cvsdb.row_limit = 1000
|
||||||
self.cvsdb.rss_row_limit = 100
|
self.cvsdb.rss_row_limit = 100
|
||||||
self.cvsdb.check_database_for_root = 0
|
self.cvsdb.check_database_for_root = 0
|
||||||
self.cvsdb.fulltext_min_relevance = 0.2
|
self.cvsdb.fulltext_min_relevance = 0.2
|
||||||
|
|
||||||
|
self.cvsdb.sphinx_host = ''
|
||||||
|
self.cvsdb.sphinx_port = 3307
|
||||||
|
self.cvsdb.sphinx_socket = ''
|
||||||
|
self.cvsdb.sphinx_index = ''
|
||||||
|
|
||||||
def _startswith(somestr, substr):
|
def _startswith(somestr, substr):
|
||||||
return somestr[:len(substr)] == substr
|
return somestr[:len(substr)] == substr
|
||||||
|
|
||||||
|
|
465
lib/cvsdb.py
465
lib/cvsdb.py
|
@ -15,6 +15,7 @@ import sys
|
||||||
import string
|
import string
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
|
import cgi
|
||||||
|
|
||||||
import vclib
|
import vclib
|
||||||
import dbi
|
import dbi
|
||||||
|
@ -36,22 +37,12 @@ error = "cvsdb error"
|
||||||
## defined to actually be complete; it should run well off of any DBI 2.0
|
## defined to actually be complete; it should run well off of any DBI 2.0
|
||||||
## complient database interface
|
## complient database interface
|
||||||
|
|
||||||
encs = [ "utf-8", "cp1251", "iso-8859-1" ]
|
|
||||||
|
|
||||||
def utf8string(value):
|
|
||||||
for e in encs:
|
|
||||||
try:
|
|
||||||
value = value.decode(e)
|
|
||||||
break
|
|
||||||
except: pass
|
|
||||||
return value.encode("utf-8")
|
|
||||||
|
|
||||||
def setencs(e):
|
|
||||||
global encs
|
|
||||||
encs = e
|
|
||||||
|
|
||||||
class CheckinDatabase:
|
class CheckinDatabase:
|
||||||
def __init__(self, host, port, socket, user, passwd, database, row_limit, min_relevance, authorizer = None):
|
def __init__(self, host, port, socket, user, passwd, database, row_limit, min_relevance, cfg,
|
||||||
|
authorizer = None, index_content = 0, sphinx_host = None, sphinx_port = None,
|
||||||
|
sphinx_socket = None, sphinx_index = None):
|
||||||
|
self.cfg = cfg
|
||||||
|
|
||||||
self._host = host
|
self._host = host
|
||||||
self._port = port
|
self._port = port
|
||||||
self._socket = socket
|
self._socket = socket
|
||||||
|
@ -63,11 +54,21 @@ class CheckinDatabase:
|
||||||
self._min_relevance = min_relevance
|
self._min_relevance = min_relevance
|
||||||
self.authorizer = authorizer
|
self.authorizer = authorizer
|
||||||
|
|
||||||
|
# Sphinx settings
|
||||||
|
self.index_content = index_content
|
||||||
|
self.sphinx_host = sphinx_host
|
||||||
|
self.sphinx_port = sphinx_port
|
||||||
|
self.sphinx_socket = sphinx_socket
|
||||||
|
self.sphinx_index = sphinx_index
|
||||||
|
|
||||||
## database lookup caches
|
## database lookup caches
|
||||||
self._get_cache = {}
|
self._get_cache = {}
|
||||||
self._get_id_cache = {}
|
self._get_id_cache = {}
|
||||||
self._desc_id_cache = {}
|
self._desc_id_cache = {}
|
||||||
|
|
||||||
|
# Sphinx connection None by default
|
||||||
|
self.sphinx = None
|
||||||
|
|
||||||
def Connect(self):
|
def Connect(self):
|
||||||
self.db = dbi.connect(
|
self.db = dbi.connect(
|
||||||
self._host, self._port, self._socket, self._user, self._passwd, self._database)
|
self._host, self._port, self._socket, self._user, self._passwd, self._database)
|
||||||
|
@ -83,12 +84,17 @@ class CheckinDatabase:
|
||||||
else:
|
else:
|
||||||
self._version = 0
|
self._version = 0
|
||||||
if self._version > CURRENT_SCHEMA_VERSION:
|
if self._version > CURRENT_SCHEMA_VERSION:
|
||||||
raise DatabaseVersionError("Database version %d is newer than the "
|
raise DatabaseVersionError("Database version %d is newer than the "
|
||||||
"last version supported by this "
|
"last version supported by this "
|
||||||
"software." % (self._version))
|
"software." % (self._version))
|
||||||
|
if self.index_content:
|
||||||
|
self.sphinx = dbi.connect(self.sphinx_host, self.sphinx_port, self.sphinx_socket, '', '', '')
|
||||||
|
|
||||||
|
def utf8(self, value):
|
||||||
|
return self.cfg.guesser().utf8(value)
|
||||||
|
|
||||||
def sql_get_id(self, table, column, value, auto_set):
|
def sql_get_id(self, table, column, value, auto_set):
|
||||||
value = utf8string(value)
|
value = self.utf8(value)
|
||||||
|
|
||||||
sql = "SELECT id FROM %s WHERE %s=%%s" % (table, column)
|
sql = "SELECT id FROM %s WHERE %s=%%s" % (table, column)
|
||||||
sql_args = (value, )
|
sql_args = (value, )
|
||||||
|
@ -172,7 +178,7 @@ class CheckinDatabase:
|
||||||
|
|
||||||
temp2[id] = value
|
temp2[id] = value
|
||||||
return value
|
return value
|
||||||
|
|
||||||
def get_list(self, table, field_index):
|
def get_list(self, table, field_index):
|
||||||
sql = "SELECT * FROM %s" % (table)
|
sql = "SELECT * FROM %s" % (table)
|
||||||
cursor = self.db.cursor()
|
cursor = self.db.cursor()
|
||||||
|
@ -198,7 +204,7 @@ class CheckinDatabase:
|
||||||
break
|
break
|
||||||
list.append(row[0])
|
list.append(row[0])
|
||||||
return list
|
return list
|
||||||
|
|
||||||
def GetMetadataValue(self, name):
|
def GetMetadataValue(self, name):
|
||||||
sql = "SELECT value FROM metadata WHERE name=%s"
|
sql = "SELECT value FROM metadata WHERE name=%s"
|
||||||
sql_args = (name)
|
sql_args = (name)
|
||||||
|
@ -209,7 +215,7 @@ class CheckinDatabase:
|
||||||
except TypeError:
|
except TypeError:
|
||||||
return None
|
return None
|
||||||
return value
|
return value
|
||||||
|
|
||||||
def SetMetadataValue(self, name, value):
|
def SetMetadataValue(self, name, value):
|
||||||
assert(self._version > 0)
|
assert(self._version > 0)
|
||||||
sql = "REPLACE INTO metadata (name, value) VALUES (%s, %s)"
|
sql = "REPLACE INTO metadata (name, value) VALUES (%s, %s)"
|
||||||
|
@ -222,7 +228,7 @@ class CheckinDatabase:
|
||||||
"\tname = %s\n"
|
"\tname = %s\n"
|
||||||
"\tvalue = %s\n"
|
"\tvalue = %s\n"
|
||||||
% (str(e), name, value))
|
% (str(e), name, value))
|
||||||
|
|
||||||
def GetBranchID(self, branch, auto_set = 1):
|
def GetBranchID(self, branch, auto_set = 1):
|
||||||
return self.get_id("branches", "branch", branch, auto_set)
|
return self.get_id("branches", "branch", branch, auto_set)
|
||||||
|
|
||||||
|
@ -240,13 +246,13 @@ class CheckinDatabase:
|
||||||
|
|
||||||
def GetFile(self, id):
|
def GetFile(self, id):
|
||||||
return self.get("files", "file", id)
|
return self.get("files", "file", id)
|
||||||
|
|
||||||
def GetAuthorID(self, author, auto_set = 1):
|
def GetAuthorID(self, author, auto_set = 1):
|
||||||
return self.get_id("people", "who", author, auto_set)
|
return self.get_id("people", "who", author, auto_set)
|
||||||
|
|
||||||
def GetAuthor(self, id):
|
def GetAuthor(self, id):
|
||||||
return self.get("people", "who", id)
|
return self.get("people", "who", id)
|
||||||
|
|
||||||
def GetRepositoryID(self, repository, auto_set = 1):
|
def GetRepositoryID(self, repository, auto_set = 1):
|
||||||
return self.get_id("repositories", "repository", repository, auto_set)
|
return self.get_id("repositories", "repository", repository, auto_set)
|
||||||
|
|
||||||
|
@ -257,7 +263,7 @@ class CheckinDatabase:
|
||||||
return self.get_list("repositories", repository)
|
return self.get_list("repositories", repository)
|
||||||
|
|
||||||
def SQLGetDescriptionID(self, description, auto_set = 1):
|
def SQLGetDescriptionID(self, description, auto_set = 1):
|
||||||
description = utf8string(description)
|
description = self.utf8(description)
|
||||||
## lame string hash, blame Netscape -JMP
|
## lame string hash, blame Netscape -JMP
|
||||||
hash = len(description)
|
hash = len(description)
|
||||||
|
|
||||||
|
@ -330,7 +336,7 @@ class CheckinDatabase:
|
||||||
ci_when = cursor.fetchone()[0]
|
ci_when = cursor.fetchone()[0]
|
||||||
except TypeError:
|
except TypeError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return dbi.TicksFromDateTime(ci_when)
|
return dbi.TicksFromDateTime(ci_when)
|
||||||
|
|
||||||
def AddCommitList(self, commit_list):
|
def AddCommitList(self, commit_list):
|
||||||
|
@ -338,48 +344,55 @@ class CheckinDatabase:
|
||||||
self.AddCommit(commit)
|
self.AddCommit(commit)
|
||||||
|
|
||||||
def AddCommit(self, commit):
|
def AddCommit(self, commit):
|
||||||
ci_when = dbi.DateTimeFromTicks(commit.GetTime() or 0.0)
|
props = {
|
||||||
ci_type = commit.GetTypeString()
|
'type' : commit.GetTypeString(),
|
||||||
who_id = self.GetAuthorID(commit.GetAuthor())
|
'ci_when' : dbi.DateTimeFromTicks(commit.GetTime() or 0.0),
|
||||||
repository_id = self.GetRepositoryID(commit.GetRepository())
|
'whoid' : self.GetAuthorID(commit.GetAuthor()),
|
||||||
directory_id = self.GetDirectoryID(commit.GetDirectory())
|
'repositoryid' : self.GetRepositoryID(commit.GetRepository()),
|
||||||
file_id = self.GetFileID(commit.GetFile())
|
'dirid' : self.GetDirectoryID(commit.GetDirectory()),
|
||||||
revision = commit.GetRevision()
|
'fileid' : self.GetFileID(commit.GetFile()),
|
||||||
sticky_tag = "NULL"
|
'revision' : commit.GetRevision(),
|
||||||
branch_id = self.GetBranchID(commit.GetBranch())
|
'branchid' : self.GetBranchID(commit.GetBranch()),
|
||||||
plus_count = commit.GetPlusCount() or '0'
|
'addedlines' : commit.GetPlusCount() or '0',
|
||||||
minus_count = commit.GetMinusCount() or '0'
|
'removedlines' : commit.GetMinusCount() or '0',
|
||||||
description_id = self.GetDescriptionID(commit.GetDescription())
|
'descid' : self.GetDescriptionID(commit.GetDescription()),
|
||||||
|
}
|
||||||
|
|
||||||
commits_table = self._version >= 1 and 'commits' or 'checkins'
|
commits_table = self._version >= 1 and 'commits' or 'checkins'
|
||||||
sql = "REPLACE INTO %s" % (commits_table)
|
|
||||||
sql = sql + \
|
|
||||||
" (type,ci_when,whoid,repositoryid,dirid,fileid,revision,"\
|
|
||||||
" stickytag,branchid,addedlines,removedlines,descid)"\
|
|
||||||
"VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
|
|
||||||
sql_args = (ci_type, ci_when, who_id, repository_id,
|
|
||||||
directory_id, file_id, revision, sticky_tag, branch_id,
|
|
||||||
plus_count, minus_count, description_id)
|
|
||||||
|
|
||||||
cursor = self.db.cursor()
|
cursor = self.db.cursor()
|
||||||
try:
|
try:
|
||||||
cursor.execute(sql, sql_args)
|
# MySQL-specific INSERT-or-UPDATE with ID retrieval
|
||||||
|
cursor.execute(
|
||||||
|
'INSERT INTO '+commits_table+'('+','.join(i for i in props)+') VALUES ('+
|
||||||
|
', '.join('%s' for i in props)+') ON DUPLICATE KEY UPDATE id=LAST_INSERT_ID(id), '+
|
||||||
|
', '.join(i+'=VALUES('+i+')' for i in props),
|
||||||
|
tuple(props[i] for i in props)
|
||||||
|
)
|
||||||
|
commit_id = cursor.lastrowid
|
||||||
|
if self.index_content:
|
||||||
|
sphcur = self.sphinx.cursor()
|
||||||
|
content = commit.GetContent()
|
||||||
|
props['ci_when'] = str(int(commit.GetTime() or 0))
|
||||||
|
if len(content):
|
||||||
|
props['content'] = content
|
||||||
|
# Now, stored MIME type is only needed while searching
|
||||||
|
# It is guessed again when the file is displayed
|
||||||
|
props['mimetype'] = commit.GetMimeType()
|
||||||
|
props['id'] = str(commit_id)
|
||||||
|
del props['addedlines']
|
||||||
|
del props['removedlines']
|
||||||
|
del props['descid']
|
||||||
|
del props['type']
|
||||||
|
sphcur.execute(
|
||||||
|
'INSERT INTO '+self.sphinx_index+'('+','.join(i for i in props)+') VALUES ('+
|
||||||
|
','.join('%s' for i in props)+')',
|
||||||
|
tuple(props[i] for i in props)
|
||||||
|
)
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
raise Exception("Error adding commit: '%s'\n"
|
print ("Error adding commit: '"+str(e)+"'\nValues were:\n"+
|
||||||
"Values were:\n"
|
"\n".join(i+'='+str(props[i]) for i in props))
|
||||||
"\ttype = %s\n"
|
raise
|
||||||
"\tci_when = %s\n"
|
|
||||||
"\twhoid = %s\n"
|
|
||||||
"\trepositoryid = %s\n"
|
|
||||||
"\tdirid = %s\n"
|
|
||||||
"\tfileid = %s\n"
|
|
||||||
"\trevision = %s\n"
|
|
||||||
"\tstickytag = %s\n"
|
|
||||||
"\tbranchid = %s\n"
|
|
||||||
"\taddedlines = %s\n"
|
|
||||||
"\tremovedlines = %s\n"
|
|
||||||
"\tdescid = %s\n"
|
|
||||||
% ((str(e), ) + sql_args))
|
|
||||||
|
|
||||||
def SQLQueryListString(self, field, query_entry_list):
|
def SQLQueryListString(self, field, query_entry_list):
|
||||||
sqlList = []
|
sqlList = []
|
||||||
|
@ -414,6 +427,67 @@ class CheckinDatabase:
|
||||||
|
|
||||||
return "(%s)" % (string.join(sqlList, " OR "))
|
return "(%s)" % (string.join(sqlList, " OR "))
|
||||||
|
|
||||||
|
def query_ids(self, in_field, table, id_field, name_field, lst):
|
||||||
|
if not len(lst):
|
||||||
|
return None
|
||||||
|
cond = self.SQLQueryListString(name_field, lst)
|
||||||
|
cursor = self.db.cursor()
|
||||||
|
cursor.execute('SELECT %s FROM %s WHERE %s' % (id_field, table, cond))
|
||||||
|
ids = list(str(row[0]) for row in cursor)
|
||||||
|
if not len(ids):
|
||||||
|
return None
|
||||||
|
return "%s IN (%s)" % (in_field, ','.join(ids))
|
||||||
|
|
||||||
|
def CreateSphinxQueryString(self, query):
|
||||||
|
condList = [
|
||||||
|
'MATCH(%s)' % (self.db.literal(query.content_query), ),
|
||||||
|
self.query_ids('repositoryid', 'repositories', 'id', 'repository', query.repository_list),
|
||||||
|
self.query_ids('branchid', 'branches', 'id', 'branch', query.branch_list),
|
||||||
|
self.query_ids('dirid', 'dirs', 'id', 'dir', query.directory_list),
|
||||||
|
self.query_ids('fileid', 'files', 'id', 'file', query.file_list),
|
||||||
|
self.query_ids('authorid', 'people', 'id', 'who', query.author_list),
|
||||||
|
self.query_ids('descid', 'descs', 'id', 'description', query.comment_list),
|
||||||
|
]
|
||||||
|
|
||||||
|
if len(query.revision_list):
|
||||||
|
condList.append("revision IN ("+','.join(self.db.literal(s) for s in query.revision_list)+")")
|
||||||
|
if query.from_date:
|
||||||
|
condList.append('ci_when>='+str(dbi.TicksFromDateTime(query.from_date)))
|
||||||
|
if query.to_date:
|
||||||
|
condList.append('ci_when<='+str(dbi.TicksFromDateTime(query.to_date)))
|
||||||
|
|
||||||
|
if query.sort == 'date':
|
||||||
|
order_by = 'ORDER BY `ci_when` DESC, `relevance` DESC'
|
||||||
|
elif query.sort == 'date_rev':
|
||||||
|
order_by = 'ORDER BY `ci_when` ASC, `relevance` DESC'
|
||||||
|
else: # /* if query.sort == 'relevance' */
|
||||||
|
order_by = 'ORDER BY `relevance` DESC'
|
||||||
|
|
||||||
|
conditions = string.join((i for i in condList if i), " AND ")
|
||||||
|
conditions = conditions and "WHERE %s" % conditions
|
||||||
|
|
||||||
|
## limit the number of rows requested or we could really slam
|
||||||
|
## a server with a large database
|
||||||
|
limit = ""
|
||||||
|
if query.limit:
|
||||||
|
limit = "LIMIT %s" % (str(query.limit))
|
||||||
|
elif self._row_limit:
|
||||||
|
limit = "LIMIT %s" % (str(self._row_limit))
|
||||||
|
|
||||||
|
fields = "id `id`, WEIGHT() `relevance`, `content`, `mimetype`"
|
||||||
|
|
||||||
|
return "SELECT %s FROM %s %s %s %s" % (fields, self.sphinx_index, conditions, order_by, limit)
|
||||||
|
|
||||||
|
# Get commits by their IDs
|
||||||
|
def CreateIdQueryString(self, ids):
|
||||||
|
commits_table = self._version >= 1 and 'commits' or 'checkins'
|
||||||
|
return (
|
||||||
|
'SELECT %s.*, repositories.repository AS repository_name, dirs.dir AS dir_name, files.file AS file_name'
|
||||||
|
' FROM %s, repositories, dirs, files'
|
||||||
|
' WHERE %s.id IN (%s) AND repositoryid=repositories.id'
|
||||||
|
' AND dirid=dirs.id AND fileid=files.id' % (commits_table, commits_table, commits_table, ','.join(ids))
|
||||||
|
)
|
||||||
|
|
||||||
def CreateSQLQueryString(self, query):
|
def CreateSQLQueryString(self, query):
|
||||||
commits_table = self._version >= 1 and 'commits' or 'checkins'
|
commits_table = self._version >= 1 and 'commits' or 'checkins'
|
||||||
fields = [
|
fields = [
|
||||||
|
@ -427,7 +501,7 @@ class CheckinDatabase:
|
||||||
("dirs", "(%s.dirid=dirs.id)" % (commits_table)),
|
("dirs", "(%s.dirid=dirs.id)" % (commits_table)),
|
||||||
("files", "(%s.fileid=files.id)" % (commits_table))]
|
("files", "(%s.fileid=files.id)" % (commits_table))]
|
||||||
condList = []
|
condList = []
|
||||||
|
|
||||||
if len(query.text_query):
|
if len(query.text_query):
|
||||||
tableList.append(("descs", "(descs.id=%s.descid)" % (commits_table)))
|
tableList.append(("descs", "(descs.id=%s.descid)" % (commits_table)))
|
||||||
temp = "MATCH (descs.description) AGAINST (%s" % (self.db.literal(query.text_query))
|
temp = "MATCH (descs.description) AGAINST (%s" % (self.db.literal(query.text_query))
|
||||||
|
@ -435,6 +509,7 @@ class CheckinDatabase:
|
||||||
fields.append("%s) AS relevance" % temp)
|
fields.append("%s) AS relevance" % temp)
|
||||||
else:
|
else:
|
||||||
fields.append("'' AS relevance")
|
fields.append("'' AS relevance")
|
||||||
|
fields.append("'' AS snippet")
|
||||||
|
|
||||||
if len(query.repository_list):
|
if len(query.repository_list):
|
||||||
temp = self.SQLQueryListString("repositories.repository",
|
temp = self.SQLQueryListString("repositories.repository",
|
||||||
|
@ -478,16 +553,18 @@ class CheckinDatabase:
|
||||||
temp = "(%s.ci_when<=\"%s\")" % (commits_table, str(query.to_date))
|
temp = "(%s.ci_when<=\"%s\")" % (commits_table, str(query.to_date))
|
||||||
condList.append(temp)
|
condList.append(temp)
|
||||||
|
|
||||||
if query.sort == "date":
|
if query.sort == "relevance" and len(query.text_query):
|
||||||
order_by = "ORDER BY %s.ci_when DESC,descid,%s.repositoryid" % (commits_table, commits_table)
|
order_by = "ORDER BY relevance DESC,%s.ci_when DESC,descid,%s.repositoryid" % (commits_table, commits_table)
|
||||||
|
elif query.sort == "date_rev":
|
||||||
|
order_by = "ORDER BY %s.ci_when ASC,descid,%s.repositoryid" % (commits_table, commits_table)
|
||||||
elif query.sort == "author":
|
elif query.sort == "author":
|
||||||
tableList.append(("people", "(%s.whoid=people.id)" % (commits_table)))
|
tableList.append(("people", "(%s.whoid=people.id)" % (commits_table)))
|
||||||
order_by = "ORDER BY people.who,descid,%s.repositoryid" % (commits_table)
|
order_by = "ORDER BY people.who,descid,%s.repositoryid" % (commits_table)
|
||||||
elif query.sort == "file":
|
elif query.sort == "file":
|
||||||
tableList.append(("files", "(%s.fileid=files.id)" % (commits_table)))
|
tableList.append(("files", "(%s.fileid=files.id)" % (commits_table)))
|
||||||
order_by = "ORDER BY files.file,descid,%s.repositoryid" % (commits_table)
|
order_by = "ORDER BY files.file,descid,%s.repositoryid" % (commits_table)
|
||||||
elif query.sort == "relevance" and len(query.text_query):
|
else: # /* if query.sort == "date": */
|
||||||
order_by = "ORDER BY relevance DESC,%s.ci_when DESC,descid,%s.repositoryid" % (commits_table, commits_table)
|
order_by = "ORDER BY %s.ci_when DESC,descid,%s.repositoryid" % (commits_table, commits_table)
|
||||||
|
|
||||||
## exclude duplicates from the table list, and split out join
|
## exclude duplicates from the table list, and split out join
|
||||||
## conditions from table names. In future, the join conditions
|
## conditions from table names. In future, the join conditions
|
||||||
|
@ -517,7 +594,7 @@ class CheckinDatabase:
|
||||||
fields, tables, conditions, order_by, limit)
|
fields, tables, conditions, order_by, limit)
|
||||||
|
|
||||||
return sql
|
return sql
|
||||||
|
|
||||||
def check_commit_access(self, repos, dir, file, rev):
|
def check_commit_access(self, repos, dir, file, rev):
|
||||||
if self.authorizer:
|
if self.authorizer:
|
||||||
rootname = repos.split('/')
|
rootname = repos.split('/')
|
||||||
|
@ -528,19 +605,60 @@ class CheckinDatabase:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def RunQuery(self, query):
|
def RunQuery(self, query):
|
||||||
sql = self.CreateSQLQueryString(query)
|
if len(query.content_query) and self.sphinx:
|
||||||
cursor = self.db.cursor()
|
# Use Sphinx to search on document content
|
||||||
cursor.execute(sql)
|
sql = self.CreateSphinxQueryString(query)
|
||||||
|
cursor = self.sphinx.cursor()
|
||||||
|
cursor.execute(sql)
|
||||||
|
sphinx_rows = list((str(docid), rel, content, mimetype) for docid, rel, content, mimetype in cursor)
|
||||||
|
if len(sphinx_rows):
|
||||||
|
# Fetch snippets
|
||||||
|
snippet_options = {
|
||||||
|
'around': 15,
|
||||||
|
'limit': 200,
|
||||||
|
'before_match': '<span style="color:red">',
|
||||||
|
'after_match': '</span>',
|
||||||
|
'chunk_separator': ' ... ',
|
||||||
|
}
|
||||||
|
preformatted_mime = 'text/(?!html|xml).*'
|
||||||
|
snippets = {}
|
||||||
|
bm_html = cgi.escape(snippet_options['before_match'])
|
||||||
|
am_html = cgi.escape(snippet_options['after_match'])
|
||||||
|
for docid, rel, content, mimetype in sphinx_rows:
|
||||||
|
cursor.execute(
|
||||||
|
'CALL SNIPPETS(%s, %s, %s'+''.join(', %s AS '+i for i in snippet_options)+')',
|
||||||
|
(content, self.sphinx_index, query.content_query) + tuple(snippet_options.values())
|
||||||
|
)
|
||||||
|
s, = cursor.fetchone()
|
||||||
|
s = cgi.escape(s)
|
||||||
|
if re.match(preformatted_mime, mimetype):
|
||||||
|
s = s.replace('\n', '<br />')
|
||||||
|
s = s.replace(bm_html, snippet_options['before_match'])
|
||||||
|
s = s.replace(am_html, snippet_options['after_match'])
|
||||||
|
snippets[docid] = s
|
||||||
|
# Fetch all fields from MySQL
|
||||||
|
sql = self.CreateIdQueryString((docid for (docid, _, _, _) in sphinx_rows))
|
||||||
|
cursor = self.db.cursor()
|
||||||
|
cursor.execute(sql)
|
||||||
|
byid = {}
|
||||||
|
for row in cursor:
|
||||||
|
byid[str(row[0])] = row
|
||||||
|
rows = list(byid[docid] + (rel, snippets[docid]) for (docid, rel, _, _) in sphinx_rows if docid in byid)
|
||||||
|
else:
|
||||||
|
rows = []
|
||||||
|
else:
|
||||||
|
# Use regular queries when document content is not searched
|
||||||
|
sql = self.CreateSQLQueryString(query)
|
||||||
|
cursor = self.db.cursor()
|
||||||
|
cursor.execute(sql)
|
||||||
|
rows = list(cursor)
|
||||||
|
|
||||||
while 1:
|
# Convert rows to commit objects
|
||||||
row = cursor.fetchone()
|
for row in rows:
|
||||||
if not row:
|
(dbId, dbType, dbCI_When, dbAuthorID, dbRepositoryID, dbDirID,
|
||||||
break
|
|
||||||
|
|
||||||
(dbType, dbCI_When, dbAuthorID, dbRepositoryID, dbDirID,
|
|
||||||
dbFileID, dbRevision, dbStickyTag, dbBranchID, dbAddedLines,
|
dbFileID, dbRevision, dbStickyTag, dbBranchID, dbAddedLines,
|
||||||
dbRemovedLines, dbDescID, dbRepositoryName, dbDirName,
|
dbRemovedLines, dbDescID, dbRepositoryName, dbDirName,
|
||||||
dbFileName, dbRelevance) = row
|
dbFileName, dbRelevance, dbSnippet) = row
|
||||||
|
|
||||||
if not self.check_commit_access(dbRepositoryName, dbDirName, dbFileName, dbRevision):
|
if not self.check_commit_access(dbRepositoryName, dbDirName, dbFileName, dbRevision):
|
||||||
continue
|
continue
|
||||||
|
@ -564,6 +682,7 @@ class CheckinDatabase:
|
||||||
commit.SetMinusCount(dbRemovedLines)
|
commit.SetMinusCount(dbRemovedLines)
|
||||||
commit.SetDescriptionID(dbDescID)
|
commit.SetDescriptionID(dbDescID)
|
||||||
commit.SetRelevance(dbRelevance)
|
commit.SetRelevance(dbRelevance)
|
||||||
|
commit.SetSnippet(dbSnippet)
|
||||||
|
|
||||||
query.AddCommit(commit)
|
query.AddCommit(commit)
|
||||||
|
|
||||||
|
@ -623,46 +742,21 @@ class CheckinDatabase:
|
||||||
raise UnknownRepositoryError("Unknown repository '%s'"
|
raise UnknownRepositoryError("Unknown repository '%s'"
|
||||||
% (repository))
|
% (repository))
|
||||||
|
|
||||||
if (self._version >= 1):
|
checkins_table = self._version >= 1 and 'commits' or 'checkins'
|
||||||
self.sql_delete('repositories', 'id', rep_id)
|
self.sql_delete('repositories', 'id', rep_id)
|
||||||
self.sql_purge('commits', 'repositoryid', 'id', 'repositories')
|
self.sql_purge(checkins_table, 'repositoryid', 'id', 'repositories')
|
||||||
self.sql_purge('files', 'id', 'fileid', 'commits')
|
self.sql_purge('files', 'id', 'fileid', checkins_table)
|
||||||
self.sql_purge('dirs', 'id', 'dirid', 'commits')
|
self.sql_purge('dirs', 'id', 'dirid', checkins_table)
|
||||||
self.sql_purge('branches', 'id', 'branchid', 'commits')
|
self.sql_purge('branches', 'id', 'branchid', checkins_table)
|
||||||
self.sql_purge('descs', 'id', 'descid', 'commits')
|
self.sql_purge('descs', 'id', 'descid', checkins_table)
|
||||||
self.sql_purge('people', 'id', 'whoid', 'commits')
|
self.sql_purge('people', 'id', 'whoid', checkins_table)
|
||||||
else:
|
|
||||||
sql = "SELECT * FROM checkins WHERE repositoryid=%s"
|
|
||||||
sql_args = (rep_id, )
|
|
||||||
cursor = self.db.cursor()
|
|
||||||
cursor.execute(sql, sql_args)
|
|
||||||
checkins = []
|
|
||||||
while 1:
|
|
||||||
try:
|
|
||||||
(ci_type, ci_when, who_id, repository_id,
|
|
||||||
dir_id, file_id, revision, sticky_tag, branch_id,
|
|
||||||
plus_count, minus_count, description_id) = \
|
|
||||||
cursor.fetchone()
|
|
||||||
except TypeError:
|
|
||||||
break
|
|
||||||
checkins.append([file_id, dir_id, branch_id,
|
|
||||||
description_id, who_id])
|
|
||||||
|
|
||||||
#self.sql_delete('repositories', 'id', rep_id)
|
|
||||||
self.sql_delete('checkins', 'repositoryid', rep_id)
|
|
||||||
for checkin in checkins:
|
|
||||||
self.sql_delete('files', 'id', checkin[0], 'fileid')
|
|
||||||
self.sql_delete('dirs', 'id', checkin[1], 'dirid')
|
|
||||||
self.sql_delete('branches', 'id', checkin[2], 'branchid')
|
|
||||||
self.sql_delete('descs', 'id', checkin[3], 'descid')
|
|
||||||
self.sql_delete('people', 'id', checkin[4], 'whoid')
|
|
||||||
|
|
||||||
# Reset all internal id caches. We could be choosier here,
|
# Reset all internal id caches. We could be choosier here,
|
||||||
# but let's just be as safe as possible.
|
# but let's just be as safe as possible.
|
||||||
self._get_cache = {}
|
self._get_cache = {}
|
||||||
self._get_id_cache = {}
|
self._get_id_cache = {}
|
||||||
self._desc_id_cache = {}
|
self._desc_id_cache = {}
|
||||||
|
|
||||||
|
|
||||||
class DatabaseVersionError(Exception):
|
class DatabaseVersionError(Exception):
|
||||||
pass
|
pass
|
||||||
|
@ -678,7 +772,7 @@ class Commit:
|
||||||
CHANGE = 0
|
CHANGE = 0
|
||||||
ADD = 1
|
ADD = 1
|
||||||
REMOVE = 2
|
REMOVE = 2
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.__directory = ''
|
self.__directory = ''
|
||||||
self.__file = ''
|
self.__file = ''
|
||||||
|
@ -690,15 +784,20 @@ class Commit:
|
||||||
self.__minuscount = ''
|
self.__minuscount = ''
|
||||||
self.__description = ''
|
self.__description = ''
|
||||||
self.__relevance = ''
|
self.__relevance = ''
|
||||||
|
self.__snippet = ''
|
||||||
self.__gmt_time = 0.0
|
self.__gmt_time = 0.0
|
||||||
self.__type = Commit.CHANGE
|
self.__type = Commit.CHANGE
|
||||||
|
self.__content = ''
|
||||||
|
self.__mimetype = ''
|
||||||
|
self.__base_path = ''
|
||||||
|
self.__base_rev = ''
|
||||||
|
|
||||||
def SetRepository(self, repository):
|
def SetRepository(self, repository):
|
||||||
self.__repository = repository
|
self.__repository = repository
|
||||||
|
|
||||||
def GetRepository(self):
|
def GetRepository(self):
|
||||||
return self.__repository
|
return self.__repository
|
||||||
|
|
||||||
def SetDirectory(self, dir):
|
def SetDirectory(self, dir):
|
||||||
self.__directory = dir
|
self.__directory = dir
|
||||||
|
|
||||||
|
@ -710,7 +809,7 @@ class Commit:
|
||||||
|
|
||||||
def GetFile(self):
|
def GetFile(self):
|
||||||
return self.__file
|
return self.__file
|
||||||
|
|
||||||
def SetRevision(self, revision):
|
def SetRevision(self, revision):
|
||||||
self.__revision = revision
|
self.__revision = revision
|
||||||
|
|
||||||
|
@ -758,12 +857,19 @@ class Commit:
|
||||||
def GetDescription(self):
|
def GetDescription(self):
|
||||||
return self.__description
|
return self.__description
|
||||||
|
|
||||||
|
# Relevance and snippet are used when querying commit database
|
||||||
def SetRelevance(self, relevance):
|
def SetRelevance(self, relevance):
|
||||||
self.__relevance = relevance
|
self.__relevance = relevance
|
||||||
|
|
||||||
def GetRelevance(self):
|
def GetRelevance(self):
|
||||||
return self.__relevance
|
return self.__relevance
|
||||||
|
|
||||||
|
def SetSnippet(self, snippet):
|
||||||
|
self.__snippet = snippet
|
||||||
|
|
||||||
|
def GetSnippet(self):
|
||||||
|
return self.__snippet
|
||||||
|
|
||||||
def SetTypeChange(self):
|
def SetTypeChange(self):
|
||||||
self.__type = Commit.CHANGE
|
self.__type = Commit.CHANGE
|
||||||
|
|
||||||
|
@ -784,66 +890,80 @@ class Commit:
|
||||||
elif self.__type == Commit.REMOVE:
|
elif self.__type == Commit.REMOVE:
|
||||||
return 'Remove'
|
return 'Remove'
|
||||||
|
|
||||||
|
# File content (extracted text), optional, indexed with Sphinx
|
||||||
|
def SetContent(self, content):
|
||||||
|
self.__content = content
|
||||||
|
|
||||||
|
def GetContent(self):
|
||||||
|
return self.__content
|
||||||
|
|
||||||
|
# MIME type, optional, now only stored in Sphinx
|
||||||
|
def SetMimeType(self, mimetype):
|
||||||
|
self.__mimetype = mimetype
|
||||||
|
|
||||||
|
def GetMimeType(self):
|
||||||
|
return self.__mimetype
|
||||||
|
|
||||||
## LazyCommit overrides a few methods of Commit to only retrieve
|
## LazyCommit overrides a few methods of Commit to only retrieve
|
||||||
## it's properties as they are needed
|
## it's properties as they are needed
|
||||||
class LazyCommit(Commit):
|
class LazyCommit(Commit):
|
||||||
def __init__(self, db):
|
def __init__(self, db):
|
||||||
Commit.__init__(self)
|
Commit.__init__(self)
|
||||||
self.__db = db
|
self.__db = db
|
||||||
|
|
||||||
def SetFileID(self, dbFileID):
|
def SetFileID(self, dbFileID):
|
||||||
self.__dbFileID = dbFileID
|
self.__dbFileID = dbFileID
|
||||||
|
|
||||||
def GetFileID(self):
|
def GetFileID(self):
|
||||||
return self.__dbFileID
|
return self.__dbFileID
|
||||||
|
|
||||||
def GetFile(self):
|
def GetFile(self):
|
||||||
return self.__db.GetFile(self.__dbFileID)
|
return self.__db.GetFile(self.__dbFileID)
|
||||||
|
|
||||||
def SetDirectoryID(self, dbDirID):
|
def SetDirectoryID(self, dbDirID):
|
||||||
self.__dbDirID = dbDirID
|
self.__dbDirID = dbDirID
|
||||||
|
|
||||||
def GetDirectoryID(self):
|
def GetDirectoryID(self):
|
||||||
return self.__dbDirID
|
return self.__dbDirID
|
||||||
|
|
||||||
def GetDirectory(self):
|
def GetDirectory(self):
|
||||||
return self.__db.GetDirectory(self.__dbDirID)
|
return self.__db.GetDirectory(self.__dbDirID)
|
||||||
|
|
||||||
def SetRepositoryID(self, dbRepositoryID):
|
def SetRepositoryID(self, dbRepositoryID):
|
||||||
self.__dbRepositoryID = dbRepositoryID
|
self.__dbRepositoryID = dbRepositoryID
|
||||||
|
|
||||||
def GetRepositoryID(self):
|
def GetRepositoryID(self):
|
||||||
return self.__dbRepositoryID
|
return self.__dbRepositoryID
|
||||||
|
|
||||||
def GetRepository(self):
|
def GetRepository(self):
|
||||||
return self.__db.GetRepository(self.__dbRepositoryID)
|
return self.__db.GetRepository(self.__dbRepositoryID)
|
||||||
|
|
||||||
def SetAuthorID(self, dbAuthorID):
|
def SetAuthorID(self, dbAuthorID):
|
||||||
self.__dbAuthorID = dbAuthorID
|
self.__dbAuthorID = dbAuthorID
|
||||||
|
|
||||||
def GetAuthorID(self):
|
def GetAuthorID(self):
|
||||||
return self.__dbAuthorID
|
return self.__dbAuthorID
|
||||||
|
|
||||||
def GetAuthor(self):
|
def GetAuthor(self):
|
||||||
return self.__db.GetAuthor(self.__dbAuthorID)
|
return self.__db.GetAuthor(self.__dbAuthorID)
|
||||||
|
|
||||||
def SetBranchID(self, dbBranchID):
|
def SetBranchID(self, dbBranchID):
|
||||||
self.__dbBranchID = dbBranchID
|
self.__dbBranchID = dbBranchID
|
||||||
|
|
||||||
def GetBranchID(self):
|
def GetBranchID(self):
|
||||||
return self.__dbBranchID
|
return self.__dbBranchID
|
||||||
|
|
||||||
def GetBranch(self):
|
def GetBranch(self):
|
||||||
return self.__db.GetBranch(self.__dbBranchID)
|
return self.__db.GetBranch(self.__dbBranchID)
|
||||||
|
|
||||||
def SetDescriptionID(self, dbDescID):
|
def SetDescriptionID(self, dbDescID):
|
||||||
self.__dbDescID = dbDescID
|
self.__dbDescID = dbDescID
|
||||||
|
|
||||||
def GetDescriptionID(self):
|
def GetDescriptionID(self):
|
||||||
return self.__dbDescID
|
return self.__dbDescID
|
||||||
|
|
||||||
def GetDescription(self):
|
def GetDescription(self):
|
||||||
return self.__db.GetDescription(self.__dbDescID)
|
return self.__db.GetDescription(self.__dbDescID)
|
||||||
|
|
||||||
## QueryEntry holds data on one match-type in the SQL database
|
## QueryEntry holds data on one match-type in the SQL database
|
||||||
## match is: "exact", "like", or "regex"
|
## match is: "exact", "like", or "regex"
|
||||||
|
@ -858,8 +978,8 @@ class CheckinDatabaseQuery:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
## sorting
|
## sorting
|
||||||
self.sort = "date"
|
self.sort = "date"
|
||||||
|
|
||||||
## repository to query
|
## repository, branch, etc to query
|
||||||
self.repository_list = []
|
self.repository_list = []
|
||||||
self.branch_list = []
|
self.branch_list = []
|
||||||
self.directory_list = []
|
self.directory_list = []
|
||||||
|
@ -867,7 +987,11 @@ class CheckinDatabaseQuery:
|
||||||
self.revision_list = []
|
self.revision_list = []
|
||||||
self.author_list = []
|
self.author_list = []
|
||||||
self.comment_list = []
|
self.comment_list = []
|
||||||
|
|
||||||
|
## text_query = Fulltext query on comments
|
||||||
|
## content_query = Fulltext query on content
|
||||||
self.text_query = ""
|
self.text_query = ""
|
||||||
|
self.content_query = ""
|
||||||
|
|
||||||
## date range in DBI 2.0 timedate objects
|
## date range in DBI 2.0 timedate objects
|
||||||
self.from_date = None
|
self.from_date = None
|
||||||
|
@ -886,6 +1010,9 @@ class CheckinDatabaseQuery:
|
||||||
def SetTextQuery(self, query):
|
def SetTextQuery(self, query):
|
||||||
self.text_query = query
|
self.text_query = query
|
||||||
|
|
||||||
|
def SetContentQuery(self, query):
|
||||||
|
self.content_query = query
|
||||||
|
|
||||||
def SetRepository(self, repository, match = "exact"):
|
def SetRepository(self, repository, match = "exact"):
|
||||||
self.repository_list.append(QueryEntry(repository, match))
|
self.repository_list.append(QueryEntry(repository, match))
|
||||||
|
|
||||||
|
@ -921,7 +1048,7 @@ class CheckinDatabaseQuery:
|
||||||
def SetFromDateHoursAgo(self, hours_ago):
|
def SetFromDateHoursAgo(self, hours_ago):
|
||||||
ticks = time.time() - (3600 * hours_ago)
|
ticks = time.time() - (3600 * hours_ago)
|
||||||
self.from_date = dbi.DateTimeFromTicks(ticks)
|
self.from_date = dbi.DateTimeFromTicks(ticks)
|
||||||
|
|
||||||
def SetFromDateDaysAgo(self, days_ago):
|
def SetFromDateDaysAgo(self, days_ago):
|
||||||
ticks = time.time() - (86400 * days_ago)
|
ticks = time.time() - (86400 * days_ago)
|
||||||
self.from_date = dbi.DateTimeFromTicks(ticks)
|
self.from_date = dbi.DateTimeFromTicks(ticks)
|
||||||
|
@ -942,7 +1069,7 @@ class CheckinDatabaseQuery:
|
||||||
##
|
##
|
||||||
def CreateCommit():
|
def CreateCommit():
|
||||||
return Commit()
|
return Commit()
|
||||||
|
|
||||||
def CreateCheckinQuery():
|
def CreateCheckinQuery():
|
||||||
return CheckinDatabaseQuery()
|
return CheckinDatabaseQuery()
|
||||||
|
|
||||||
|
@ -953,9 +1080,23 @@ def ConnectDatabase(cfg, authorizer=None, readonly=0):
|
||||||
else:
|
else:
|
||||||
user = cfg.cvsdb.user
|
user = cfg.cvsdb.user
|
||||||
passwd = cfg.cvsdb.passwd
|
passwd = cfg.cvsdb.passwd
|
||||||
db = CheckinDatabase(cfg.cvsdb.host, cfg.cvsdb.port, cfg.cvsdb.socket, user, passwd,
|
db = CheckinDatabase(
|
||||||
cfg.cvsdb.database_name, cfg.cvsdb.row_limit, cfg.cvsdb.fulltext_min_relevance,
|
host = cfg.cvsdb.host,
|
||||||
authorizer)
|
port = cfg.cvsdb.port,
|
||||||
|
socket = cfg.cvsdb.socket,
|
||||||
|
user = user,
|
||||||
|
passwd = passwd,
|
||||||
|
database = cfg.cvsdb.database_name,
|
||||||
|
row_limit = cfg.cvsdb.row_limit,
|
||||||
|
min_relevance = cfg.cvsdb.fulltext_min_relevance,
|
||||||
|
authorizer = authorizer,
|
||||||
|
index_content = cfg.cvsdb.index_content,
|
||||||
|
sphinx_host = cfg.cvsdb.sphinx_host,
|
||||||
|
sphinx_port = int(cfg.cvsdb.sphinx_port),
|
||||||
|
sphinx_socket = cfg.cvsdb.sphinx_socket,
|
||||||
|
sphinx_index = cfg.cvsdb.sphinx_index,
|
||||||
|
cfg = cfg,
|
||||||
|
)
|
||||||
db.Connect()
|
db.Connect()
|
||||||
return db
|
return db
|
||||||
|
|
||||||
|
|
|
@ -31,8 +31,8 @@ import popen
|
||||||
class BaseCVSRepository(vclib.Repository):
|
class BaseCVSRepository(vclib.Repository):
|
||||||
def __init__(self, name, rootpath, authorizer, utilities):
|
def __init__(self, name, rootpath, authorizer, utilities):
|
||||||
if not os.path.isdir(rootpath):
|
if not os.path.isdir(rootpath):
|
||||||
raise vclib.ReposNotFound(name)
|
raise vclib.ReposNotFound(name)
|
||||||
|
|
||||||
self.name = name
|
self.name = name
|
||||||
self.rootpath = rootpath
|
self.rootpath = rootpath
|
||||||
self.auth = authorizer
|
self.auth = authorizer
|
||||||
|
@ -53,7 +53,7 @@ class BaseCVSRepository(vclib.Repository):
|
||||||
|
|
||||||
def authorizer(self):
|
def authorizer(self):
|
||||||
return self.auth
|
return self.auth
|
||||||
|
|
||||||
def itemtype(self, path_parts, rev):
|
def itemtype(self, path_parts, rev):
|
||||||
basepath = self._getpath(path_parts)
|
basepath = self._getpath(path_parts)
|
||||||
kind = None
|
kind = None
|
||||||
|
@ -74,12 +74,12 @@ class BaseCVSRepository(vclib.Repository):
|
||||||
def itemprops(self, path_parts, rev):
|
def itemprops(self, path_parts, rev):
|
||||||
self.itemtype(path_parts, rev) # does auth-check
|
self.itemtype(path_parts, rev) # does auth-check
|
||||||
return {} # CVS doesn't support properties
|
return {} # CVS doesn't support properties
|
||||||
|
|
||||||
def listdir(self, path_parts, rev, options):
|
def listdir(self, path_parts, rev, options):
|
||||||
if self.itemtype(path_parts, rev) != vclib.DIR: # does auth-check
|
if self.itemtype(path_parts, rev) != vclib.DIR: # does auth-check
|
||||||
raise vclib.Error("Path '%s' is not a directory."
|
raise vclib.Error("Path '%s' is not a directory."
|
||||||
% (string.join(path_parts, "/")))
|
% (string.join(path_parts, "/")))
|
||||||
|
|
||||||
# Only RCS files (*,v) and subdirs are returned.
|
# Only RCS files (*,v) and subdirs are returned.
|
||||||
data = [ ]
|
data = [ ]
|
||||||
full_name = self._getpath(path_parts)
|
full_name = self._getpath(path_parts)
|
||||||
|
@ -115,7 +115,7 @@ class BaseCVSRepository(vclib.Repository):
|
||||||
data.append(CVSDirEntry(name, kind, errors, 1))
|
data.append(CVSDirEntry(name, kind, errors, 1))
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def _getpath(self, path_parts):
|
def _getpath(self, path_parts):
|
||||||
return apply(os.path.join, (self.rootpath,) + tuple(path_parts))
|
return apply(os.path.join, (self.rootpath,) + tuple(path_parts))
|
||||||
|
|
||||||
|
@ -177,7 +177,7 @@ class BinCVSRepository(BaseCVSRepository):
|
||||||
used_rlog = 0
|
used_rlog = 0
|
||||||
tip_rev = None # used only if we have to fallback to using rlog
|
tip_rev = None # used only if we have to fallback to using rlog
|
||||||
|
|
||||||
fp = self.rcs_popen('co', (rev_flag, full_name), 'rb')
|
fp = self.rcs_popen('co', (rev_flag, full_name), 'rb')
|
||||||
try:
|
try:
|
||||||
filename, revision = _parse_co_header(fp)
|
filename, revision = _parse_co_header(fp)
|
||||||
except COMissingRevision:
|
except COMissingRevision:
|
||||||
|
@ -191,14 +191,14 @@ class BinCVSRepository(BaseCVSRepository):
|
||||||
used_rlog = 1
|
used_rlog = 1
|
||||||
if not tip_rev:
|
if not tip_rev:
|
||||||
raise vclib.Error("Unable to find valid revision")
|
raise vclib.Error("Unable to find valid revision")
|
||||||
fp = self.rcs_popen('co', ('-p' + tip_rev.string, full_name), 'rb')
|
fp = self.rcs_popen('co', ('-p' + tip_rev.string, full_name), 'rb')
|
||||||
filename, revision = _parse_co_header(fp)
|
filename, revision = _parse_co_header(fp)
|
||||||
|
|
||||||
if filename is None:
|
if filename is None:
|
||||||
# CVSNT's co exits without any output if a dead revision is requested.
|
# CVSNT's co exits without any output if a dead revision is requested.
|
||||||
# Bug at http://www.cvsnt.org/cgi-bin/bugzilla/show_bug.cgi?id=190
|
# Bug at http://www.cvsnt.org/cgi-bin/bugzilla/show_bug.cgi?id=190
|
||||||
# As a workaround, we invoke rlog to find the first non-dead revision
|
# As a workaround, we invoke rlog to find the first non-dead revision
|
||||||
# that precedes it and check out that revision instead. Of course,
|
# that precedes it and check out that revision instead. Of course,
|
||||||
# if we've already invoked rlog above, we just reuse its output.
|
# if we've already invoked rlog above, we just reuse its output.
|
||||||
if not used_rlog:
|
if not used_rlog:
|
||||||
tip_rev = self._get_tip_revision(full_name + ',v', rev)
|
tip_rev = self._get_tip_revision(full_name + ',v', rev)
|
||||||
|
@ -207,7 +207,7 @@ class BinCVSRepository(BaseCVSRepository):
|
||||||
raise vclib.Error(
|
raise vclib.Error(
|
||||||
'Could not find non-dead revision preceding "%s"' % rev)
|
'Could not find non-dead revision preceding "%s"' % rev)
|
||||||
fp = self.rcs_popen('co', ('-p' + tip_rev.undead.string,
|
fp = self.rcs_popen('co', ('-p' + tip_rev.undead.string,
|
||||||
full_name), 'rb')
|
full_name), 'rb')
|
||||||
filename, revision = _parse_co_header(fp)
|
filename, revision = _parse_co_header(fp)
|
||||||
|
|
||||||
if filename is None:
|
if filename is None:
|
||||||
|
@ -278,7 +278,7 @@ class BinCVSRepository(BaseCVSRepository):
|
||||||
if self.itemtype(path_parts, rev) != vclib.FILE: # does auth-check
|
if self.itemtype(path_parts, rev) != vclib.FILE: # does auth-check
|
||||||
raise vclib.Error("Path '%s' is not a file."
|
raise vclib.Error("Path '%s' is not a file."
|
||||||
% (string.join(path_parts, "/")))
|
% (string.join(path_parts, "/")))
|
||||||
|
|
||||||
# Invoke rlog
|
# Invoke rlog
|
||||||
rcsfile = self.rcsfile(path_parts, 1)
|
rcsfile = self.rcsfile(path_parts, 1)
|
||||||
if rev and options.get('cvs_pass_rev', 0):
|
if rev and options.get('cvs_pass_rev', 0):
|
||||||
|
@ -341,7 +341,7 @@ class BinCVSRepository(BaseCVSRepository):
|
||||||
|
|
||||||
def revinfo(self, rev):
|
def revinfo(self, rev):
|
||||||
raise vclib.UnsupportedFeature
|
raise vclib.UnsupportedFeature
|
||||||
|
|
||||||
def rawdiff(self, path_parts1, rev1, path_parts2, rev2, type, options={}):
|
def rawdiff(self, path_parts1, rev1, path_parts2, rev2, type, options={}):
|
||||||
"""see vclib.Repository.rawdiff docstring
|
"""see vclib.Repository.rawdiff docstring
|
||||||
|
|
||||||
|
@ -439,9 +439,9 @@ def _match_revs_tags(revlist, taglist):
|
||||||
example: if revision is 1.2.3.4, parent is 1.2
|
example: if revision is 1.2.3.4, parent is 1.2
|
||||||
|
|
||||||
"undead"
|
"undead"
|
||||||
If the revision is dead, then this is a reference to the first
|
If the revision is dead, then this is a reference to the first
|
||||||
previous revision which isn't dead, otherwise it's a reference
|
previous revision which isn't dead, otherwise it's a reference
|
||||||
to itself. If all the previous revisions are dead it's None.
|
to itself. If all the previous revisions are dead it's None.
|
||||||
|
|
||||||
"branch_number"
|
"branch_number"
|
||||||
tuple representing branch number or empty tuple if on trunk
|
tuple representing branch number or empty tuple if on trunk
|
||||||
|
@ -653,7 +653,7 @@ def _parse_co_header(fp):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
raise COMalformedOutput, "Unable to find revision in co output stream"
|
raise COMalformedOutput, "Unable to find revision in co output stream"
|
||||||
|
|
||||||
# if your rlog doesn't use 77 '=' characters, then this must change
|
# if your rlog doesn't use 77 '=' characters, then this must change
|
||||||
|
@ -674,7 +674,7 @@ _EOF_ERROR = 'error message found' # rlog issued an error
|
||||||
# ^rlog\: (.*)(?:\:\d+)?\: (.*)$
|
# ^rlog\: (.*)(?:\:\d+)?\: (.*)$
|
||||||
#
|
#
|
||||||
# But for some reason the windows version of rlog omits the "rlog: " prefix
|
# But for some reason the windows version of rlog omits the "rlog: " prefix
|
||||||
# for the first error message when the standard error stream has been
|
# for the first error message when the standard error stream has been
|
||||||
# redirected to a file or pipe. (the prefix is present in subsequent errors
|
# redirected to a file or pipe. (the prefix is present in subsequent errors
|
||||||
# and when rlog is run from the console). So the expression below is more
|
# and when rlog is run from the console). So the expression below is more
|
||||||
# complicated
|
# complicated
|
||||||
|
@ -703,7 +703,7 @@ def _parse_log_header(fp):
|
||||||
Returns: filename, default branch, tag dictionary, lock dictionary,
|
Returns: filename, default branch, tag dictionary, lock dictionary,
|
||||||
rlog error message, and eof flag
|
rlog error message, and eof flag
|
||||||
"""
|
"""
|
||||||
|
|
||||||
filename = head = branch = msg = ""
|
filename = head = branch = msg = ""
|
||||||
taginfo = { } # tag name => number
|
taginfo = { } # tag name => number
|
||||||
lockinfo = { } # revision => locker
|
lockinfo = { } # revision => locker
|
||||||
|
@ -732,7 +732,7 @@ def _parse_log_header(fp):
|
||||||
else:
|
else:
|
||||||
# oops. this line isn't lock info. stop parsing tags.
|
# oops. this line isn't lock info. stop parsing tags.
|
||||||
state = 0
|
state = 0
|
||||||
|
|
||||||
if state == 0:
|
if state == 0:
|
||||||
if line[:9] == 'RCS file:':
|
if line[:9] == 'RCS file:':
|
||||||
filename = line[10:-1]
|
filename = line[10:-1]
|
||||||
|
@ -902,7 +902,7 @@ def _file_log(revs, taginfo, lockinfo, cur_branch, filter):
|
||||||
except ValueError:
|
except ValueError:
|
||||||
view_tag = None
|
view_tag = None
|
||||||
else:
|
else:
|
||||||
tags.append(view_tag)
|
tags.append(view_tag)
|
||||||
|
|
||||||
# Match up tags and revisions
|
# Match up tags and revisions
|
||||||
_match_revs_tags(revs, tags)
|
_match_revs_tags(revs, tags)
|
||||||
|
@ -910,13 +910,13 @@ def _file_log(revs, taginfo, lockinfo, cur_branch, filter):
|
||||||
# Match up lockinfo and revision
|
# Match up lockinfo and revision
|
||||||
for rev in revs:
|
for rev in revs:
|
||||||
rev.lockinfo = lockinfo.get(rev.string)
|
rev.lockinfo = lockinfo.get(rev.string)
|
||||||
|
|
||||||
# Add artificial ViewVC tag HEAD, which acts like a non-branch tag pointing
|
# Add artificial ViewVC tag HEAD, which acts like a non-branch tag pointing
|
||||||
# at the latest revision on the MAIN branch. The HEAD revision doesn't have
|
# at the latest revision on the MAIN branch. The HEAD revision doesn't have
|
||||||
# anything to do with the "head" revision number specified in the RCS file
|
# anything to do with the "head" revision number specified in the RCS file
|
||||||
# and in rlog output. HEAD refers to the revision that the CVS and RCS co
|
# and in rlog output. HEAD refers to the revision that the CVS and RCS co
|
||||||
# commands will check out by default, whereas the "head" field just refers
|
# commands will check out by default, whereas the "head" field just refers
|
||||||
# to the highest revision on the trunk.
|
# to the highest revision on the trunk.
|
||||||
taginfo['HEAD'] = _add_tag('HEAD', taginfo['MAIN'].co_rev)
|
taginfo['HEAD'] = _add_tag('HEAD', taginfo['MAIN'].co_rev)
|
||||||
|
|
||||||
# Determine what revisions to return
|
# Determine what revisions to return
|
||||||
|
@ -954,7 +954,7 @@ def _file_log(revs, taginfo, lockinfo, cur_branch, filter):
|
||||||
_remove_tag(view_tag)
|
_remove_tag(view_tag)
|
||||||
else:
|
else:
|
||||||
filtered_revs = revs
|
filtered_revs = revs
|
||||||
|
|
||||||
return filtered_revs
|
return filtered_revs
|
||||||
|
|
||||||
def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
|
def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
|
||||||
|
@ -1004,7 +1004,7 @@ def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
|
||||||
= _parse_log_header(rlog)
|
= _parse_log_header(rlog)
|
||||||
|
|
||||||
if eof == _EOF_LOG:
|
if eof == _EOF_LOG:
|
||||||
# the rlog output ended early. this can happen on errors that rlog
|
# the rlog output ended early. this can happen on errors that rlog
|
||||||
# thinks are so serious that it stops parsing the current file and
|
# thinks are so serious that it stops parsing the current file and
|
||||||
# refuses to parse any of the files that come after it. one of the
|
# refuses to parse any of the files that come after it. one of the
|
||||||
# errors that triggers this obnoxious behavior looks like:
|
# errors that triggers this obnoxious behavior looks like:
|
||||||
|
@ -1052,8 +1052,8 @@ def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
|
||||||
tag = None
|
tag = None
|
||||||
|
|
||||||
# we don't care about the specific values -- just the keys and whether
|
# we don't care about the specific values -- just the keys and whether
|
||||||
# the values point to branches or revisions. this the fastest way to
|
# the values point to branches or revisions. this the fastest way to
|
||||||
# merge the set of keys and keep values that allow us to make the
|
# merge the set of keys and keep values that allow us to make the
|
||||||
# distinction between branch tags and normal tags
|
# distinction between branch tags and normal tags
|
||||||
alltags.update(taginfo)
|
alltags.update(taginfo)
|
||||||
|
|
||||||
|
@ -1098,7 +1098,7 @@ def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
|
||||||
file.dead = 0
|
file.dead = 0
|
||||||
#file.errors.append("No revisions exist on %s" % (view_tag or "MAIN"))
|
#file.errors.append("No revisions exist on %s" % (view_tag or "MAIN"))
|
||||||
file.absent = 1
|
file.absent = 1
|
||||||
|
|
||||||
# done with this file now, skip the rest of this file's revisions
|
# done with this file now, skip the rest of this file's revisions
|
||||||
if not eof:
|
if not eof:
|
||||||
_skip_file(rlog)
|
_skip_file(rlog)
|
||||||
|
@ -1211,7 +1211,7 @@ def _newest_file(dirpath):
|
||||||
newest_time = 0
|
newest_time = 0
|
||||||
|
|
||||||
### FIXME: This sucker is leaking unauthorized paths! ###
|
### FIXME: This sucker is leaking unauthorized paths! ###
|
||||||
|
|
||||||
for subfile in os.listdir(dirpath):
|
for subfile in os.listdir(dirpath):
|
||||||
### filter CVS locks? stale NFS handles?
|
### filter CVS locks? stale NFS handles?
|
||||||
if subfile[-2:] != ',v':
|
if subfile[-2:] != ',v':
|
||||||
|
|
148
lib/viewvc.py
148
lib/viewvc.py
|
@ -1,4 +1,3 @@
|
||||||
#
|
|
||||||
# Copyright (C) 1999-2009 The ViewCVS Group. All Rights Reserved.
|
# Copyright (C) 1999-2009 The ViewCVS Group. All Rights Reserved.
|
||||||
#
|
#
|
||||||
# By using this file, you agree to the terms and conditions set forth in
|
# By using this file, you agree to the terms and conditions set forth in
|
||||||
|
@ -68,7 +67,6 @@ docroot_magic_path = '*docroot*'
|
||||||
viewcvs_mime_type = 'text/vnd.viewcvs-markup'
|
viewcvs_mime_type = 'text/vnd.viewcvs-markup'
|
||||||
alt_mime_type = 'text/x-cvsweb-markup'
|
alt_mime_type = 'text/x-cvsweb-markup'
|
||||||
view_roots_magic = '*viewroots*'
|
view_roots_magic = '*viewroots*'
|
||||||
magic_buf_size = 4096
|
|
||||||
default_mime_type = 'application/octet-stream'
|
default_mime_type = 'application/octet-stream'
|
||||||
|
|
||||||
# Put here the variables we need in order to hold our state - they
|
# Put here the variables we need in order to hold our state - they
|
||||||
|
@ -121,9 +119,8 @@ class Request:
|
||||||
# check for an authenticated username
|
# check for an authenticated username
|
||||||
self.username = server.getenv('REMOTE_USER')
|
self.username = server.getenv('REMOTE_USER')
|
||||||
|
|
||||||
# construct MIME magic
|
# repository object cache
|
||||||
self.ms = None
|
self.all_repos = {}
|
||||||
self.ms_fail = 0
|
|
||||||
|
|
||||||
# if we allow compressed output, see if the client does too
|
# if we allow compressed output, see if the client does too
|
||||||
self.gzip_compress_level = 0
|
self.gzip_compress_level = 0
|
||||||
|
@ -134,6 +131,9 @@ class Request:
|
||||||
string.split(http_accept_encoding, ","))):
|
string.split(http_accept_encoding, ","))):
|
||||||
self.gzip_compress_level = 9 # make this configurable?
|
self.gzip_compress_level = 9 # make this configurable?
|
||||||
|
|
||||||
|
def utf8(self, value):
|
||||||
|
return self.cfg.guesser().utf8(value)
|
||||||
|
|
||||||
def create_repos(self, rootname):
|
def create_repos(self, rootname):
|
||||||
if not rootname:
|
if not rootname:
|
||||||
return None
|
return None
|
||||||
|
@ -677,7 +677,7 @@ def _validate_mimetype(value):
|
||||||
return value in (viewcvs_mime_type, alt_mime_type, 'text/plain')
|
return value in (viewcvs_mime_type, alt_mime_type, 'text/plain')
|
||||||
|
|
||||||
# obvious things here. note that we don't need uppercase for alpha.
|
# obvious things here. note that we don't need uppercase for alpha.
|
||||||
_re_validate_alpha = re.compile('^[a-z]+$')
|
_re_validate_alpha = re.compile('^[a-z_]+$')
|
||||||
_re_validate_number = re.compile('^[0-9]+$')
|
_re_validate_number = re.compile('^[0-9]+$')
|
||||||
_re_validate_boolint = re.compile('^[01]$')
|
_re_validate_boolint = re.compile('^[01]$')
|
||||||
|
|
||||||
|
@ -743,6 +743,7 @@ _legal_params = {
|
||||||
'who_match' : _re_validate_alpha,
|
'who_match' : _re_validate_alpha,
|
||||||
'comment' : None,
|
'comment' : None,
|
||||||
'comment_match' : _re_validate_alpha,
|
'comment_match' : _re_validate_alpha,
|
||||||
|
'search_content': None,
|
||||||
'querysort' : _re_validate_alpha,
|
'querysort' : _re_validate_alpha,
|
||||||
'date' : _re_validate_alpha,
|
'date' : _re_validate_alpha,
|
||||||
'hours' : _re_validate_number,
|
'hours' : _re_validate_number,
|
||||||
|
@ -988,7 +989,7 @@ def nav_path(request):
|
||||||
is_last = len(path_parts) == len(request.path_parts)
|
is_last = len(path_parts) == len(request.path_parts)
|
||||||
|
|
||||||
if request.roottype == 'cvs':
|
if request.roottype == 'cvs':
|
||||||
item = _item(name=cvsdb.utf8string(part), href=None)
|
item = _item(name=request.utf8(part), href=None)
|
||||||
else:
|
else:
|
||||||
item = _item(name=part, href=None)
|
item = _item(name=part, href=None)
|
||||||
|
|
||||||
|
@ -1248,7 +1249,7 @@ def common_template_data(request, revision=None, mime_type=None):
|
||||||
cfg = request.cfg
|
cfg = request.cfg
|
||||||
where = request.where
|
where = request.where
|
||||||
if request.roottype == 'cvs':
|
if request.roottype == 'cvs':
|
||||||
where = cvsdb.utf8string(where)
|
where = request.utf8(where)
|
||||||
where = request.server.escape(where)
|
where = request.server.escape(where)
|
||||||
|
|
||||||
# Initialize data dictionary members (sorted alphanumerically)
|
# Initialize data dictionary members (sorted alphanumerically)
|
||||||
|
@ -1444,28 +1445,31 @@ def markup_stream_pygments(request, cfg, blame_data, fp, filename, mime_type):
|
||||||
get_lexer_for_mimetype, \
|
get_lexer_for_mimetype, \
|
||||||
get_lexer_for_filename
|
get_lexer_for_filename
|
||||||
from pygments.lexers._mapping import LEXERS
|
from pygments.lexers._mapping import LEXERS
|
||||||
|
# Hack for shell mime types:
|
||||||
LEXERS['BashLexer'] = ('pygments.lexers.other', 'Bash', ('bash', 'sh'), ('*.sh',), ('application/x-sh', 'application/x-shellscript', 'text/x-sh', 'text/x-shellscript'))
|
LEXERS['BashLexer'] = ('pygments.lexers.other', 'Bash', ('bash', 'sh'), ('*.sh',), ('application/x-sh', 'application/x-shellscript', 'text/x-sh', 'text/x-shellscript'))
|
||||||
encoding = 'guess'
|
|
||||||
if cfg.options.detect_encoding:
|
|
||||||
try:
|
|
||||||
import chardet
|
|
||||||
encoding = 'chardet'
|
|
||||||
except (SyntaxError, ImportError):
|
|
||||||
pass
|
|
||||||
try:
|
try:
|
||||||
lexer = get_lexer_for_mimetype(mime_type,
|
lexer = get_lexer_for_mimetype(mime_type,
|
||||||
encoding=encoding,
|
encoding='utf-8',
|
||||||
stripnl=False)
|
stripnl=False)
|
||||||
except ClassNotFound:
|
except ClassNotFound:
|
||||||
try:
|
try:
|
||||||
lexer = get_lexer_for_filename(filename,
|
lexer = get_lexer_for_filename(filename,
|
||||||
encoding=encoding,
|
encoding='utf-8',
|
||||||
stripnl=False)
|
stripnl=False)
|
||||||
except ClassNotFound:
|
except ClassNotFound:
|
||||||
use_pygments = 0
|
use_pygments = 0
|
||||||
except ImportError:
|
except ImportError:
|
||||||
use_pygments = 0
|
use_pygments = 0
|
||||||
|
|
||||||
|
# Detect encoding by calling chardet ourselves,
|
||||||
|
# to support it in non-highlighting mode
|
||||||
|
content = fp.read()
|
||||||
|
c, encoding = cfg.guesser().guess_charset(content)
|
||||||
|
if encoding:
|
||||||
|
content = c
|
||||||
|
else:
|
||||||
|
encoding = 'unknown'
|
||||||
|
|
||||||
# If we aren't going to be highlighting anything, just return the
|
# If we aren't going to be highlighting anything, just return the
|
||||||
# BLAME_SOURCE. If there's no blame_source, we'll generate a fake
|
# BLAME_SOURCE. If there's no blame_source, we'll generate a fake
|
||||||
# one from the file contents we fetch with PATH and REV.
|
# one from the file contents we fetch with PATH and REV.
|
||||||
|
@ -1475,11 +1479,7 @@ def markup_stream_pygments(request, cfg, blame_data, fp, filename, mime_type):
|
||||||
else:
|
else:
|
||||||
lines = []
|
lines = []
|
||||||
line_no = 0
|
line_no = 0
|
||||||
while 1:
|
for line in content.split('\n'):
|
||||||
line = fp.readline()
|
|
||||||
if not line:
|
|
||||||
break
|
|
||||||
line = cvsdb.utf8string(line)
|
|
||||||
line_no = line_no + 1
|
line_no = line_no + 1
|
||||||
item = vclib.Annotation(cgi.escape(line), line_no,
|
item = vclib.Annotation(cgi.escape(line), line_no,
|
||||||
None, None, None, None)
|
None, None, None, None)
|
||||||
|
@ -1508,19 +1508,11 @@ def markup_stream_pygments(request, cfg, blame_data, fp, filename, mime_type):
|
||||||
self.blame_data.append(item)
|
self.blame_data.append(item)
|
||||||
self.line_no = self.line_no + 1
|
self.line_no = self.line_no + 1
|
||||||
ps = PygmentsSink(blame_source)
|
ps = PygmentsSink(blame_source)
|
||||||
fpd = fp.read()
|
highlight(content, lexer,
|
||||||
try:
|
|
||||||
fpdat = unicode(fpd,'utf-8')
|
|
||||||
except:
|
|
||||||
try:
|
|
||||||
fpdat = unicode(fpd,'cp1251')
|
|
||||||
except:
|
|
||||||
fpdat = fpd
|
|
||||||
highlight(fpdat, lexer,
|
|
||||||
HtmlFormatter(nowrap=True,
|
HtmlFormatter(nowrap=True,
|
||||||
classprefix='pygments-',
|
classprefix='pygments-',
|
||||||
encoding='utf-8'), ps)
|
encoding='utf-8'), ps)
|
||||||
return ps.blame_data
|
return ps.blame_data, encoding
|
||||||
|
|
||||||
def make_time_string(date, cfg):
|
def make_time_string(date, cfg):
|
||||||
"""Returns formatted date string in either local time or UTC.
|
"""Returns formatted date string in either local time or UTC.
|
||||||
|
@ -1594,6 +1586,7 @@ def calculate_mime_type(request, path_parts, rev):
|
||||||
return mime_type
|
return mime_type
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
# FIXME rewrite to use viewvcmagic
|
||||||
return guess_mime(path_parts[-1])
|
return guess_mime(path_parts[-1])
|
||||||
|
|
||||||
def markup_or_annotate(request, is_annotate):
|
def markup_or_annotate(request, is_annotate):
|
||||||
|
@ -1605,21 +1598,12 @@ def markup_or_annotate(request, is_annotate):
|
||||||
mime_type = calculate_mime_type(request, path, rev)
|
mime_type = calculate_mime_type(request, path, rev)
|
||||||
|
|
||||||
if not mime_type or mime_type == default_mime_type:
|
if not mime_type or mime_type == default_mime_type:
|
||||||
if request.ms is None and not request.ms_fail:
|
try:
|
||||||
try:
|
fp, revision = request.repos.openfile(path, rev)
|
||||||
import magic
|
mime_type = request.cfg.guesser().guess_mime(None, None, fp)
|
||||||
request.ms = magic.open(magic.MAGIC_NONE | magic.MAGIC_MIME)
|
fp.close()
|
||||||
request.ms.load()
|
except:
|
||||||
except:
|
raise
|
||||||
request.ms_fail = 1
|
|
||||||
if request.ms:
|
|
||||||
try:
|
|
||||||
fp, revision = request.repos.openfile(path, rev)
|
|
||||||
buffer = fp.read(magic_buf_size)
|
|
||||||
fp.close()
|
|
||||||
mime_type = request.ms.buffer(buffer)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Is this a binary type?
|
# Is this a binary type?
|
||||||
if is_binary(request.cfg, mime_type):
|
if is_binary(request.cfg, mime_type):
|
||||||
|
@ -1657,9 +1641,10 @@ def markup_or_annotate(request, is_annotate):
|
||||||
if check_freshness(request, None, revision, weak=1):
|
if check_freshness(request, None, revision, weak=1):
|
||||||
fp.close()
|
fp.close()
|
||||||
return
|
return
|
||||||
lines = markup_stream_pygments(request, cfg, blame_source, fp,
|
lines, charset = markup_stream_pygments(request, cfg, blame_source, fp, path[-1], mime_type)
|
||||||
path[-1], mime_type)
|
|
||||||
fp.close()
|
fp.close()
|
||||||
|
if mime_type.find(';') < 0:
|
||||||
|
mime_type = mime_type+'; charset='+charset
|
||||||
|
|
||||||
data = common_template_data(request, revision)
|
data = common_template_data(request, revision)
|
||||||
data.merge(ezt.TemplateData({
|
data.merge(ezt.TemplateData({
|
||||||
|
@ -1910,7 +1895,7 @@ def view_directory(request):
|
||||||
row.short_log = format_log(file.log, cfg)
|
row.short_log = format_log(file.log, cfg)
|
||||||
row.log = htmlify(file.log, cfg.options.mangle_email_addresses)
|
row.log = htmlify(file.log, cfg.options.mangle_email_addresses)
|
||||||
row.lockinfo = file.lockinfo
|
row.lockinfo = file.lockinfo
|
||||||
row.name = request.server.escape(cvsdb.utf8string(file.name))
|
row.name = request.server.escape(request.utf8(file.name))
|
||||||
row.anchor = row.name
|
row.anchor = row.name
|
||||||
row.pathtype = (file.kind == vclib.FILE and 'file') or \
|
row.pathtype = (file.kind == vclib.FILE and 'file') or \
|
||||||
(file.kind == vclib.DIR and 'dir')
|
(file.kind == vclib.DIR and 'dir')
|
||||||
|
@ -2285,7 +2270,7 @@ def view_log(request):
|
||||||
entry.ago = html_time(request, rev.date, 1)
|
entry.ago = html_time(request, rev.date, 1)
|
||||||
entry.log = rev.log or ""
|
entry.log = rev.log or ""
|
||||||
if cvs:
|
if cvs:
|
||||||
entry.log = cvsdb.utf8string(entry.log)
|
entry.log = request.utf8(entry.log)
|
||||||
entry.log = htmlify(entry.log, cfg.options.mangle_email_addresses)
|
entry.log = htmlify(entry.log, cfg.options.mangle_email_addresses)
|
||||||
entry.size = rev.size
|
entry.size = rev.size
|
||||||
entry.lockinfo = rev.lockinfo
|
entry.lockinfo = rev.lockinfo
|
||||||
|
@ -2770,7 +2755,7 @@ class DiffSource:
|
||||||
self.save_line = None
|
self.save_line = None
|
||||||
self.line_number = None
|
self.line_number = None
|
||||||
self.prev_line_number = None
|
self.prev_line_number = None
|
||||||
|
|
||||||
# keep track of where we are during an iteration
|
# keep track of where we are during an iteration
|
||||||
self.idx = -1
|
self.idx = -1
|
||||||
self.last = None
|
self.last = None
|
||||||
|
@ -2867,7 +2852,7 @@ class DiffSource:
|
||||||
|
|
||||||
diff_code = line[0]
|
diff_code = line[0]
|
||||||
output = self._format_text(line[1:])
|
output = self._format_text(line[1:])
|
||||||
output = cvsdb.utf8string(output)
|
output = self.cfg.guesser().utf8(output)
|
||||||
|
|
||||||
if diff_code == '+':
|
if diff_code == '+':
|
||||||
if self.state == 'dump':
|
if self.state == 'dump':
|
||||||
|
@ -3644,6 +3629,7 @@ def view_queryform(request):
|
||||||
'who_match' : request.query_dict.get('who_match', 'exact'),
|
'who_match' : request.query_dict.get('who_match', 'exact'),
|
||||||
'comment' : request.query_dict.get('comment', ''),
|
'comment' : request.query_dict.get('comment', ''),
|
||||||
'comment_match' : request.query_dict.get('comment_match', 'fulltext'),
|
'comment_match' : request.query_dict.get('comment_match', 'fulltext'),
|
||||||
|
'search_content' : request.query_dict.get('search_content', ''),
|
||||||
'querysort' : request.query_dict.get('querysort', 'date'),
|
'querysort' : request.query_dict.get('querysort', 'date'),
|
||||||
'date' : request.query_dict.get('date', 'hours'),
|
'date' : request.query_dict.get('date', 'hours'),
|
||||||
'hours' : request.query_dict.get('hours', '2'),
|
'hours' : request.query_dict.get('hours', '2'),
|
||||||
|
@ -3653,6 +3639,7 @@ def view_queryform(request):
|
||||||
'query_hidden_values' : query_hidden_values,
|
'query_hidden_values' : query_hidden_values,
|
||||||
'limit_changes' : limit_changes,
|
'limit_changes' : limit_changes,
|
||||||
'dir_href' : dir_href,
|
'dir_href' : dir_href,
|
||||||
|
'enable_search_content' : request.cfg.cvsdb.index_content,
|
||||||
}))
|
}))
|
||||||
|
|
||||||
generate_page(request, "query_form", data)
|
generate_page(request, "query_form", data)
|
||||||
|
@ -3791,7 +3778,8 @@ def build_commit(request, files, max_files, dir_strip, format):
|
||||||
plus_count = 0
|
plus_count = 0
|
||||||
minus_count = 0
|
minus_count = 0
|
||||||
found_unreadable = 0
|
found_unreadable = 0
|
||||||
all_repos = {}
|
if not request.all_repos:
|
||||||
|
request.all_repos = {}
|
||||||
|
|
||||||
for f in files:
|
for f in files:
|
||||||
dirname = f.GetDirectory()
|
dirname = f.GetDirectory()
|
||||||
|
@ -3810,17 +3798,19 @@ def build_commit(request, files, max_files, dir_strip, format):
|
||||||
|
|
||||||
# Check path access (since the commits database logic bypasses the
|
# Check path access (since the commits database logic bypasses the
|
||||||
# vclib layer and, thus, the vcauth stuff that layer uses).
|
# vclib layer and, thus, the vcauth stuff that layer uses).
|
||||||
my_repos = all_repos.get(f.GetRepository(), '')
|
my_repos = request.all_repos.get(f.GetRepository(), '')
|
||||||
if not my_repos:
|
if not my_repos:
|
||||||
try:
|
try:
|
||||||
my_repos = all_repos[f.GetRepository()] = request.create_repos(f.GetRepository())
|
my_repos = request.all_repos[f.GetRepository()] = request.create_repos(f.GetRepository())
|
||||||
except:
|
except:
|
||||||
my_repos = None
|
my_repos = None
|
||||||
if not my_repos:
|
if not my_repos:
|
||||||
return None
|
return None
|
||||||
if my_repos['roottype'] == 'cvs':
|
if my_repos['roottype'] == 'cvs':
|
||||||
try: where = unicode(where,'utf-8')
|
# we store UTF-8 in the DB
|
||||||
|
try: where = where.decode('utf-8')
|
||||||
except: pass
|
except: pass
|
||||||
|
# FIXME maybe store "real" filesystem path in the DB instead of having such setting?
|
||||||
try: where = where.encode(cfg.options.cvs_ondisk_charset)
|
try: where = where.encode(cfg.options.cvs_ondisk_charset)
|
||||||
except: pass
|
except: pass
|
||||||
path_parts = _path_parts(where)
|
path_parts = _path_parts(where)
|
||||||
|
@ -3907,24 +3897,27 @@ def build_commit(request, files, max_files, dir_strip, format):
|
||||||
if max_files and num_allowed > max_files:
|
if max_files and num_allowed > max_files:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
commit_files.append(_item(date=commit_time,
|
commit_files.append(_item(
|
||||||
dir=request.server.escape(dirname),
|
date=commit_time,
|
||||||
file=request.server.escape(filename),
|
dir=request.server.escape(dirname),
|
||||||
author=request.server.escape(f.GetAuthor()),
|
file=request.server.escape(filename),
|
||||||
rev=rev,
|
author=request.server.escape(f.GetAuthor()),
|
||||||
branch=f.GetBranch(),
|
rev=rev,
|
||||||
plus=plus,
|
branch=f.GetBranch(),
|
||||||
minus=minus,
|
plus=plus,
|
||||||
type=change_type,
|
minus=minus,
|
||||||
dir_href=dir_href,
|
type=change_type,
|
||||||
log_href=log_href,
|
snippet=f.GetSnippet(),
|
||||||
view_href=view_href,
|
dir_href=dir_href,
|
||||||
download_href=download_href,
|
log_href=log_href,
|
||||||
prefer_markup=prefer_markup,
|
view_href=view_href,
|
||||||
diff_href=diff_href,
|
download_href=download_href,
|
||||||
root=my_repos,
|
prefer_markup=prefer_markup,
|
||||||
path=where,
|
diff_href=diff_href,
|
||||||
path_prev=path_prev))
|
root=my_repos,
|
||||||
|
path=where,
|
||||||
|
path_prev=path_prev,
|
||||||
|
))
|
||||||
|
|
||||||
# No files survived authz checks? Let's just pretend this
|
# No files survived authz checks? Let's just pretend this
|
||||||
# little commit didn't happen, shall we?
|
# little commit didn't happen, shall we?
|
||||||
|
@ -4115,6 +4108,7 @@ def view_query(request):
|
||||||
who_match = request.query_dict.get('who_match', 'exact')
|
who_match = request.query_dict.get('who_match', 'exact')
|
||||||
comment = request.query_dict.get('comment', '')
|
comment = request.query_dict.get('comment', '')
|
||||||
comment_match = request.query_dict.get('comment_match', 'fulltext')
|
comment_match = request.query_dict.get('comment_match', 'fulltext')
|
||||||
|
search_content = request.query_dict.get('search_content', '')
|
||||||
querysort = request.query_dict.get('querysort', 'date')
|
querysort = request.query_dict.get('querysort', 'date')
|
||||||
date = request.query_dict.get('date', 'hours')
|
date = request.query_dict.get('date', 'hours')
|
||||||
hours = request.query_dict.get('hours', '2')
|
hours = request.query_dict.get('hours', '2')
|
||||||
|
@ -4126,7 +4120,7 @@ def view_query(request):
|
||||||
cfg.options.limit_changes))
|
cfg.options.limit_changes))
|
||||||
|
|
||||||
match_types = { 'exact':1, 'like':1, 'glob':1, 'regex':1, 'notregex':1 }
|
match_types = { 'exact':1, 'like':1, 'glob':1, 'regex':1, 'notregex':1 }
|
||||||
sort_types = { 'date':1, 'author':1, 'file':1 }
|
sort_types = { 'date':1, 'date_rev':1, 'author':1, 'file':1, 'relevance':1 }
|
||||||
date_types = { 'hours':1, 'day':1, 'week':1, 'month':1,
|
date_types = { 'hours':1, 'day':1, 'week':1, 'month':1,
|
||||||
'all':1, 'explicit':1 }
|
'all':1, 'explicit':1 }
|
||||||
|
|
||||||
|
@ -4193,6 +4187,8 @@ def view_query(request):
|
||||||
query.SetComment(comment, comment_match)
|
query.SetComment(comment, comment_match)
|
||||||
else:
|
else:
|
||||||
query.SetTextQuery(comment)
|
query.SetTextQuery(comment)
|
||||||
|
if search_content:
|
||||||
|
query.SetContentQuery(search_content)
|
||||||
query.SetSortMethod(querysort)
|
query.SetSortMethod(querysort)
|
||||||
if date == 'hours':
|
if date == 'hours':
|
||||||
query.SetFromDateHoursAgo(int(hours))
|
query.SetFromDateHoursAgo(int(hours))
|
||||||
|
|
|
@ -0,0 +1,70 @@
|
||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
import mimetypes
|
||||||
|
|
||||||
|
have_chardet = 0
|
||||||
|
try:
|
||||||
|
import chardet
|
||||||
|
have_chardet = 1
|
||||||
|
except: pass
|
||||||
|
|
||||||
|
class ContentMagic:
|
||||||
|
|
||||||
|
def __init__(self, encodings):
|
||||||
|
self.encodings = encodings.split(':')
|
||||||
|
self.mime_magic = None
|
||||||
|
self.errors = []
|
||||||
|
# Try to load magic
|
||||||
|
try:
|
||||||
|
import magic
|
||||||
|
self.mime_magic = magic.open(magic.MAGIC_MIME_TYPE)
|
||||||
|
self.mime_magic.load()
|
||||||
|
except Exception, e:
|
||||||
|
self.errors.append(e)
|
||||||
|
|
||||||
|
# returns MIME type
|
||||||
|
def guess_mime(self, mime, filename, tempfile):
|
||||||
|
if mime == 'application/octet-stream':
|
||||||
|
mime = ''
|
||||||
|
if not mime and filename:
|
||||||
|
mime = mimetypes.guess_type(filename)[0]
|
||||||
|
if not mime and tempfile and self.mime_magic:
|
||||||
|
if type(tempfile) == type(''):
|
||||||
|
mime = self.mime_magic.file(tempfile)
|
||||||
|
else:
|
||||||
|
c = tempfile.read(4096)
|
||||||
|
mime = self.mime_magic.buffer(c)
|
||||||
|
return mime
|
||||||
|
|
||||||
|
# returns (utf8_content, charset)
|
||||||
|
def guess_charset(self, content):
|
||||||
|
# Try to guess with chardet
|
||||||
|
charset = None
|
||||||
|
if have_chardet:
|
||||||
|
# Try chardet
|
||||||
|
try:
|
||||||
|
charset = chardet.detect(content)
|
||||||
|
if charset and charset['encoding']:
|
||||||
|
charset = charset['encoding']
|
||||||
|
content = content.decode(charset)
|
||||||
|
except: charset = None
|
||||||
|
else:
|
||||||
|
# Try UTF-8
|
||||||
|
charset = 'utf-8'
|
||||||
|
try: content = content.decode('utf-8')
|
||||||
|
except: charset = None
|
||||||
|
# Then try to guess primitively
|
||||||
|
if charset is None:
|
||||||
|
for charset in self.encodings:
|
||||||
|
try:
|
||||||
|
content = content.decode(charset)
|
||||||
|
break
|
||||||
|
except: charset = None
|
||||||
|
return (content, charset)
|
||||||
|
|
||||||
|
# guess and encode return value into UTF-8
|
||||||
|
def utf8(self, content):
|
||||||
|
(uni, charset) = self.guess_charset(content)
|
||||||
|
if charset:
|
||||||
|
return uni.encode('utf-8')
|
||||||
|
return content
|
|
@ -144,7 +144,7 @@ Browse Directory</a></p>
|
||||||
<tr>
|
<tr>
|
||||||
<th style="text-align:right;vertical-align:top;">Comment:</th>
|
<th style="text-align:right;vertical-align:top;">Comment:</th>
|
||||||
<td>
|
<td>
|
||||||
<input type="text" name="comment" value="[comment]" /><br />
|
<input type="text" name="comment" value="[comment]" size="40" /><br />
|
||||||
<label for="comment_match_exact">
|
<label for="comment_match_exact">
|
||||||
<input type="radio" name="comment_match" id="comment_match_fulltext"
|
<input type="radio" name="comment_match" id="comment_match_fulltext"
|
||||||
value="fulltext" [is comment_match "fulltext"]checked=""[end] />
|
value="fulltext" [is comment_match "fulltext"]checked=""[end] />
|
||||||
|
@ -172,13 +172,21 @@ Browse Directory</a></p>
|
||||||
</label>
|
</label>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
[if-any enable_search_content]
|
||||||
|
<tr>
|
||||||
|
<th style="text-align:right;vertical-align:top;">Search content:</th>
|
||||||
|
<td><input type="text" name="search_content" value="[search_content]" size="60" /></td>
|
||||||
|
</tr>
|
||||||
|
[end]
|
||||||
<tr>
|
<tr>
|
||||||
<th style="text-align:right;vertical-align:top;">Sort By:</th>
|
<th style="text-align:right;vertical-align:top;">Sort By:</th>
|
||||||
<td>
|
<td>
|
||||||
<select name="querysort">
|
<select name="querysort">
|
||||||
<option value="date" [is querysort "date"]selected="selected"[end]>Date</option>
|
<option value="date" [is querysort "date"]selected="selected"[end]>Date</option>
|
||||||
|
<option value="date_rev" [is querysort "date_rev"]selected="selected"[end]>Date (oldest first)</option>
|
||||||
<option value="author" [is querysort "author"]selected="selected"[end]>Author</option>
|
<option value="author" [is querysort "author"]selected="selected"[end]>Author</option>
|
||||||
<option value="file" [is querysort "file"]selected="selected"[end]>File</option>
|
<option value="file" [is querysort "file"]selected="selected"[end]>File</option>
|
||||||
|
<option value="relevance" [is querysort "relevance"]selected="selected"[end]>Relevance</option>
|
||||||
</select>
|
</select>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
|
|
@ -46,15 +46,18 @@
|
||||||
<tr class="vc_row_[if-index commits even]even[else]odd[end]">
|
<tr class="vc_row_[if-index commits even]even[else]odd[end]">
|
||||||
<td style="vertical-align: top;">
|
<td style="vertical-align: top;">
|
||||||
[define rev_href][if-any commits.files.prefer_markup][commits.files.view_href][else][if-any commits.files.download_href][commits.files.download_href][end][end][end]
|
[define rev_href][if-any commits.files.prefer_markup][commits.files.view_href][else][if-any commits.files.download_href][commits.files.download_href][end][end][end]
|
||||||
[if-any commits.files.rev][if-any rev_href]<a href="[rev_href]">[end][commits.files.rev][if-any rev_href]</a>[end][else] [end]
|
[if-any commits.files.rev][if-any rev_href]<a href="[rev_href]">[end][commits.files.rev][if-any rev_href]</a>[end][else] [end]
|
||||||
</td>
|
</td>
|
||||||
<td style="vertical-align: top;">
|
<td style="vertical-align: top;">
|
||||||
<a href="[commits.files.dir_href]">[commits.files.dir]/</a>
|
<a href="[commits.files.dir_href]">[commits.files.dir]/</a>
|
||||||
<a href="[commits.files.log_href]">[commits.files.file]</a>
|
<a href="[commits.files.log_href]">[commits.files.file]</a>
|
||||||
|
[if-any commits.files.snippet]
|
||||||
|
<div class="snippet">[commits.files.snippet]</div>
|
||||||
|
[end]
|
||||||
</td>
|
</td>
|
||||||
[if-any show_branch]
|
[if-any show_branch]
|
||||||
<td style="vertical-align: top;">
|
<td style="vertical-align: top;">
|
||||||
[if-any commits.files.branch][commits.files.branch][else] [end]
|
[if-any commits.files.branch][commits.files.branch][else] [end]
|
||||||
</td>
|
</td>
|
||||||
[end]
|
[end]
|
||||||
<td style="vertical-align: top;">
|
<td style="vertical-align: top;">
|
||||||
|
@ -68,10 +71,10 @@
|
||||||
[is commits.files.type "Remove"]</del>[end]
|
[is commits.files.type "Remove"]</del>[end]
|
||||||
</td>
|
</td>
|
||||||
<td style="vertical-align: top;">
|
<td style="vertical-align: top;">
|
||||||
[if-any commits.files.date][commits.files.date][else] [end]
|
[if-any commits.files.date][commits.files.date][else] [end]
|
||||||
</td>
|
</td>
|
||||||
<td style="vertical-align: top;">
|
<td style="vertical-align: top;">
|
||||||
[if-any commits.files.author][commits.files.author][else] [end]
|
[if-any commits.files.author][commits.files.author][else] [end]
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
[end]
|
[end]
|
||||||
|
|
Loading…
Reference in New Issue