Bug 82651 - Tika&Sphinx&chardet content indexing (done!)

git-svn-id: svn://svn.office.custis.ru/3rdparty/viewvc.org/trunk@1388 6955db30-a419-402b-8a0d-67ecbb4d7f56
remotes/github/custis
vfilippov 2011-09-27 16:13:53 +00:00 committed by Vitaliy Filippov
parent 83c7e6fe49
commit e363cf19b1
10 changed files with 761 additions and 332 deletions

View File

@ -44,6 +44,7 @@ CREATE TABLE branches (
DROP TABLE IF EXISTS checkins;
CREATE TABLE checkins (
id int NOT NULL AUTO_INCREMENT PRIMARY KEY,
type enum('Change','Add','Remove'),
ci_when datetime DEFAULT '0000-00-00 00:00:00' NOT NULL,
whoid mediumint(9) DEFAULT '0' NOT NULL,
@ -57,7 +58,7 @@ CREATE TABLE checkins (
removedlines int(11) DEFAULT '0' NOT NULL,
descid mediumint(9),
UNIQUE repositoryid (repositoryid,dirid,fileid,revision),
KEY repository_when (repositoryid,ci_when),
KEY repositoryid_when (repositoryid,ci_when),
KEY ci_when (ci_when),
KEY whoid (whoid,ci_when),
KEY dirid (dirid),
@ -138,6 +139,7 @@ CREATE TABLE branches (
DROP TABLE IF EXISTS commits;
CREATE TABLE commits (
id int NOT NULL AUTO_INCREMENT PRIMARY KEY,
type enum('Change','Add','Remove'),
ci_when datetime DEFAULT '0000-00-00 00:00:00' NOT NULL,
whoid mediumint(9) DEFAULT '0' NOT NULL,
@ -151,9 +153,9 @@ CREATE TABLE commits (
removedlines int(11) DEFAULT '0' NOT NULL,
descid mediumint(9),
UNIQUE repositoryid (repositoryid,dirid,fileid,revision),
KEY repositoryid_when (repositoryid,ci_when),
KEY ci_when (ci_when),
KEY whoid (whoid),
KEY repositoryid_2 (repositoryid),
KEY whoid (whoid,ci_when),
KEY dirid (dirid),
KEY fileid (fileid),
KEY branchid (branchid),
@ -253,7 +255,7 @@ Options:
[Default: ViewVC]
--help Show this usage message.
--hostname=ARG Use ARG as the hostname for the MySQL connection.
[Default: localhost]
@ -264,7 +266,7 @@ Options:
--version=ARG Create the database using the schema employed by
version ARG of ViewVC. Valid values are:
[ "1.0" ]
""" % (os.path.basename(sys.argv[0])))
if errmsg is not None:
stream.write("[ERROR] %s.\n" % (errmsg))

View File

@ -58,7 +58,11 @@ else:
import os
import string
import socket
import select
import re
import mimetypes
import time
import svn.core
import svn.repos
@ -68,14 +72,20 @@ import svn.delta
import cvsdb
import viewvc
import vclib
from viewvcmagic import ContentMagic
class SvnRepo:
"""Class used to manage a connection to a SVN repository."""
def __init__(self, path):
def __init__(self, path, index_content = None, tika_client = None, guesser = None,
svn_ignore_mimetype = False):
self.path = path
self.repo = svn.repos.svn_repos_open(path)
self.fs = svn.repos.svn_repos_fs(self.repo)
self.rev_max = svn.fs.youngest_rev(self.fs)
self.index_content = index_content
self.tika_client = tika_client
self.guesser = guesser
self.svn_ignore_mimetype = svn_ignore_mimetype
def __getitem__(self, rev):
if rev is None:
rev = self.rev_max
@ -128,6 +138,74 @@ def _get_diff_counts(diff_fp):
line = diff_fp.readline()
return plus, minus
class TikaClient:
# Create tika client
def __init__(self, tika_server, mime_types):
self.tika_server = tika_server
self.mime_types = mime_types
self.addr = tika_server.split(':')
# Split address
if len(self.addr) != 2:
raise Exception('tika_server value is incorrect: \''+tika_server+'\', please use \'host:port\' format')
self.addr = (self.addr[0], int(self.addr[1]))
# Build regexp for MIME types
m = re.split('\s+', mime_types.strip())
self.mime_regexp = re.compile('|'.join('^'+re.escape(i).replace('\\*', '.*')+'$' for i in m))
# Extract text content from file using Tika which runs in server mode
def get_text(self, filename, mime_type, log_filename):
if not self.mime_regexp.match(mime_type):
# Tika can't handle this mime type, return nothing
return ''
fd = None
s = None
text = ''
fsize = 0
try:
# Read original file
fd = open(filename, 'rb')
data = fd.read()
fsize = len(data)
if not fsize:
return ''
# Connect to Tika
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(self.addr)
s.setblocking(0)
sockfd = s.fileno()
# Tika is somewhat delicate about network IO, so:
# Read and write using poll(2) system call
p = select.poll()
p.register(sockfd)
while 1:
fds = p.poll()
if not fds:
break
(pollfd, event) = fds[0]
if event & select.POLLIN:
# Exception or empty data means EOF...
try: part = os.read(sockfd, 65536)
except: break
if not part: break
text += part
if event & select.POLLOUT:
if not len(data):
# Shutdown output and forget about POLLOUT
s.shutdown(socket.SHUT_WR)
p.modify(sockfd, select.POLLIN)
else:
# Write and consume some data
l = os.write(sockfd, data)
data = data[l:]
if len(text) == 0:
raise Exception('Empty response from Tika server')
print "Extracted %d bytes from %s (%s) of size %d" % (len(text), log_filename, mime_type, fsize)
except Exception, e:
print "Error extracting text from %s (%s) of size %d: %s" % (log_filename, mime_type, fsize, str(e))
finally:
if fd: fd.close()
if s: s.close()
return text
class SvnRev:
"""Class used to hold information about a particular revision of
@ -151,7 +229,7 @@ class SvnRev:
# get a root for the current revisions
fsroot = self._get_root_for_rev(rev)
# find changes in the revision
editor = svn.repos.RevisionChangeCollector(repo.fs, rev)
e_ptr, e_baton = svn.delta.make_editor(editor)
@ -168,7 +246,7 @@ class SvnRev:
base_root = None
if change.base_path:
base_root = self._get_root_for_rev(change.base_rev)
if not change.path:
action = 'remove'
elif change.added:
@ -184,19 +262,53 @@ class SvnRev:
['-b', '-B'])
diff_fp = diffobj.get_pipe()
plus, minus = _get_diff_counts(diff_fp)
# TODO Indexing file contents
# For binary files: svn.fs.contents_changed(root1, path1, root2, path2)
# Temp file with contents is at: diffobj.tempfile2
# Apache Tika server may even be at another host!
# CustIS Bug 50473: a workaround for svnlib behaviour in file movements (FILE1 -> FILE2 + FILE1 -> null)
if change.base_path:
if not change.path and changes_hash.get(change.base_path, '') != '':
if not change.path and change.base_path in changes_hash:
minus = 0
elif change.path:
changes_hash[change.base_path] = change.path
self.changes.append((path, action, plus, minus))
content = ''
mime = ''
# need to check if binary file's content changed when copying,
# if not, don't extract it, just get it from previous revision later
if repo.index_content and change.path and (not change.base_path
or svn.fs.contents_changed(
base_root and base_root or None,
base_root and change.base_path or None,
fsroot, change.path
)):
props = svn.fs.node_proplist(fsroot, change.path)
if not repo.svn_ignore_mimetype:
mime = props.get('svn:mime-type', None)
else:
mime = None
mime = repo.guesser.guess_mime(
mime,
os.path.basename(change.path),
diffobj.tempfile2
)
# Read and guess charset by ourselves for text files
if mime.startswith('text/') or (mime.startswith('application/') and mime.endswith('xml')):
try:
fd = open(diffobj.tempfile2, 'rb')
content = fd.read()
fd.close()
except: pass
# Guess charset
if content:
content, charset = repo.guesser.guess_charset(content)
if charset:
content = content.encode('utf-8')
print 'Guessed %s for %s' % (charset, change.path)
else:
print 'Failed to guess charset for %s, not indexing' % (change.path, )
# Try to extract content using Tika from binary documents
elif repo.tika_client:
content = repo.tika_client.get_text(diffobj.tempfile2, mime, change.path)
self.changes.append((path, action, plus, minus, content, mime))
def _get_root_for_rev(self, rev):
"""Fetch a revision root from a cache of such, or a fresh root
@ -217,7 +329,7 @@ def handle_revision(db, command, repo, rev, verbose, force=0):
if verbose: print "skipped (no changes)."
return
for (path, action, plus, minus) in revision.changes:
for (path, action, plus, minus, content, mime) in revision.changes:
directory, file = os.path.split(path)
commit = cvsdb.CreateCommit()
commit.SetRepository(repo.path)
@ -230,6 +342,8 @@ def handle_revision(db, command, repo, rev, verbose, force=0):
commit.SetPlusCount(plus)
commit.SetMinusCount(minus)
commit.SetBranch(None)
commit.SetContent(content)
commit.SetMimeType(mime)
if action == 'add':
commit.SetTypeAdd()
@ -268,7 +382,16 @@ def main(command, repository, revs=[], verbose=0, force=0):
sys.stderr.write("ERROR: " + str(e) + "\n")
sys.exit(1)
repo = SvnRepo(repository)
tika_client = None
if cfg.utilities.tika_server:
tika_client = TikaClient(cfg.utilities.tika_server, cfg.utilities.tika_mime_types)
repo = SvnRepo(
path = repository,
index_content = cfg.cvsdb.index_content,
tika_client = tika_client,
guesser = cfg.guesser(),
svn_ignore_mimetype = cfg.options.svn_ignore_mimetype,
)
if command == 'rebuild' or (command == 'update' and not revs):
for rev in range(repo.rev_max+1):
handle_revision(db, command, repo, rev, verbose)
@ -312,7 +435,7 @@ Usage: 1. %s [-v] rebuild REPOS-PATH
the database. If a range is specified, the revisions will be
processed in ascending order, and you may specify "HEAD" to
indicate "the youngest revision currently in the repository".
3. Purge information specific to the repository located at REPOS-PATH
from the database.
@ -337,7 +460,7 @@ if __name__ == '__main__':
del args[index]
except ValueError:
pass
if len(args) < 3:
usage()

View File

@ -1,6 +1,6 @@
#---------------------------------------------------------------------------
#
# Configuration file for ViewVC
# Configuration file for ViewVC (4IntraNet patched version)
#
# Information on ViewVC is located at the following web site:
# http://viewvc.org/
@ -9,7 +9,7 @@
# THE FORMAT OF THIS CONFIGURATION FILE
#
# This file is delineated by sections, specified in [brackets]. Within
# This file is delineated by sections, specified in [brackets]. Within
# each section, are a number of configuration settings. These settings
# take the form of: name = value. Values may be continued on the
# following line by indenting the continued line.
@ -17,14 +17,14 @@
# WARNING: Indentation *always* means continuation. Name=value lines
# should always start in column zero.
#
# Comments should always start in column zero, and are identified
# Comments should always start in column zero, and are identified
# with "#".
#
# Certain configuration settings may have multiple values. These should
# be separated by a comma. The settings where this is allowed are noted
# Certain configuration settings may have multiple values. These should
# be separated by a comma. The settings where this is allowed are noted
# below. Any other setting that requires special syntax is noted at that
# setting.
#
#
#
# SOME TERMINOLOGY USED HEREIN
#
@ -50,10 +50,10 @@
# recommend you pay attention to. Of course, don't try to change the
# options here -- do so in the relevant section of the configuration
# file below.
#
#
# For correct operation, you will probably need to change the following
# configuration variables:
#
#
# cvs_roots (for CVS)
# svn_roots (for Subversion)
# root_parents (for CVS or Subversion)
@ -62,18 +62,18 @@
# rcs_dir
# mime_types_file
# the many options in the [utilities] section
#
#
# It is usually desirable to change the following variables:
#
#
# address
# forbidden
#
#
# To optimize delivery of ViewVC static files:
#
#
# docroot
#
#
# To customize the display of ViewVC for your site:
#
#
# template_dir
# the [templates] override section
#
@ -139,7 +139,7 @@ default_root = cvs
# provided only as a convenience for ViewVC installations which are
# using the default template set, where the value of this option will
# be displayed in the footer of every ViewVC page.)
address =
address =
#
# This option provides a mechanism for custom key/value pairs to be
@ -244,21 +244,47 @@ cvsnt =
# See also bin/cvsnt-rcsfile-inetd.pl
#rcsfile_socket = 'host:port'
# Example: rcsfile_socket = '127.0.0.1:8071'
#rcsfile_socket = host:port
# Example: rcsfile_socket = 127.0.0.1:8071
# Subversion command-line client, used for viewing Subversion repositories
svn =
# svn = /usr/bin/svn
# GNU diff, used for showing file version differences
diff =
diff =
# diff = /usr/bin/diff
# CvsGraph, a graphical CVS version graph generator (see options.use_cvsgraph)
cvsgraph =
# cvsgraph = /usr/local/bin/cvsgraph
# Apache Tika TCP server host and port, used to extract text from binary documents
# Note that as of 2011-09-12, Tika 0.9 has a bug which leads to hangups on processing
# M$Word documents in server mode. So you must use the fixed version, downloaded from:
# http://wiki.4intra.net/public/tika-app-0.9-fix-TIKA709.jar
# (mirror) http://code.google.com/p/mediawiki4intranet/downloads/detail?name=tika-app-0.9-fix-TIKA709.jar
# Or apply the patch by yourself and rebuild Tika from source, see patch here:
# https://issues.apache.org/jira/browse/TIKA-709
# Tika server should be started with command 'java -jar tika-app-0.9.jar -p PORT -t -eutf-8'
#tika_server = host:port
# Example: tika_server = 127.0.0.1:8072
# This lists MIME types that can be processed by Tika
# You may change it if your Tika is newer than 0.9 and supports more formats
# (note) *+xml examples: xhtml+xml, rss+xml, atom+xml, docbook+xml, rdf+xml
tika_mime_types =
text/*
application/*+xml
application/xml
application/vnd.oasis.opendocument.*
application/vnd.openxmlformats
application/vnd.ms-*
application/msaccess
application/msword
application/pdf
application/rtf
#---------------------------------------------------------------------------
[options]
@ -358,7 +384,7 @@ svn_ignore_mimetype = 0
# directory ViewVC should consult for various things, including cached
# remote authentication credentials. If unset, Subversion will use
# the default location(s) ($HOME/.subversion, etc.)
svn_config_dir =
svn_config_dir =
# use the rcsparse Python module to retrieve CVS repository
# information instead of invoking rcs utilities [EXPERIMENTAL]
@ -494,12 +520,18 @@ short_log_len = 80
# should we colorize known file content syntaxes? (requires Pygments module)
enable_syntax_coloration = 1
# detect_encoding: Should we attempt to detect versioned file
# character encodings? [Requires 'chardet' module]
# Used in file list, file content display and indexing
# See also options.encodings for naive guessing.
detect_encoding = 1
# Use CvsGraph. See http://www.akhphd.au.dk/~bertho/cvsgraph/ for
# documentation and download.
# documentation and download.
use_cvsgraph = 0
#use_cvsgraph = 1
# Location of the customized cvsgraph configuration file.
# Location of the customized cvsgraph configuration file.
cvsgraph_conf = cvsgraph.conf
#
@ -544,6 +576,17 @@ use_pagesize = 0
# Set to 0 to disable the limit.
limit_changes = 100
# You can also use primitive charset guessing instead of chardet (options.detect_encoding)
# Just set this to the list of possible charsets in your repository.
# ViewVC will simply try to decode content using each of them, and pick
# the first which succeeds. UTF-8 is always tried automatically.
#encodings = cp1251:iso-8859-1
# Sadly this is also required - for back-links from query results to files
# in CVS, because it doesn't recode file names to UTF-8 as Subversion does.
# Just set to cp1251 if you work with your CVS from Windowz.
#cvs_ondisk_charset = cp1251
#---------------------------------------------------------------------------
[templates]
@ -554,7 +597,7 @@ limit_changes = 100
# use a different template for a particular view, simply uncomment the
# appropriate option below and specify the currect location of the EZT
# template file you wish to use for that view.
#
#
# Templates are specified relative to the configured template
# directory (see the "template_dir" option), but absolute paths may
# also be used as well.
@ -569,13 +612,13 @@ limit_changes = 100
#diff = diff.ezt
#directory = directory.ezt
### an alternative directory view
#directory = dir_new.ezt
#directory = dir_new.ezt
#error = error.ezt
#file = file.ezt
#graph = graph.ezt
#log = log.ezt
### a table-based alternative log view
#log = log_table.ezt
#log = log_table.ezt
#query = query.ezt
#query_form = query_form.ezt
#query_results = query_results.ezt
@ -588,22 +631,51 @@ limit_changes = 100
# Set to 1 to enable the database integration feature, 0 otherwise.
enabled = 0
# Database hostname and port.
# Set to 1 to enable indexing of file contents using Sphinx and Tika
index_content = 0
# Database hostname, port, and socket
#host = localhost
#port = 3306
# On Debian Linux, enable this:
#socket = /var/run/mysqld/mysqld.sock
# ViewVC database name.
#database_name = ViewVC
# Username and password of user with read/write privileges to the ViewVC
# database.
#user =
#passwd =
#user =
#passwd =
# Username and password of user with read privileges to the ViewVC
# database.
#readonly_user =
#readonly_passwd =
#readonly_user =
#readonly_passwd =
# ViewVC can use Sphinx (http://sphinxsearch.com) full-text search engine
# to index file contents with full history and then search over them.
# Also, Apache Tika console application can be used in TCP server mode to
# add support for indexing binary documents (M$Word, PDF and etc).
# See tika_server in [utilities].
# Requires Sphinx >= 0.9.9 with a real-time updatable SphinxQL index.
# Index must be created in sphinx.conf by hand and have the following fields:
# rt_field = content
# rt_attr_string = content
# rt_attr_string = mimetype
# rt_attr_timestamp = ci_when
# rt_attr_uint = whoid
# rt_attr_uint = repositoryid
# rt_attr_uint = dirid
# rt_attr_uint = fileid
# rt_attr_uint = revision
# rt_attr_uint = branchid
# Sphinx connection parameters:
#sphinx_host =
#sphinx_port =
#sphinx_socket = /var/run/sphinxql.sock
#sphinx_index = viewvc
# Limit the number of rows returned by a given query to this number.
#row_limit = 1000
@ -616,7 +688,7 @@ enabled = 0
# Check if the repository is found in the database before showing
# the query link and RSS feeds. Set to 1 to enable check.
#
#
# WARNING: Enabling this check adds the cost of a database connection
# and query to most ViewVC requests. If all your roots are represented
# in the commits database, or if you don't care about the creation of
@ -640,7 +712,7 @@ enabled = 0
#
# ViewVC allows you to customize its configuration options for
# individual virtual hosts. You might, for example, wish to expose
# all of your Subversion repositories at http://svn.yourdomain.com/viewvc/
# all of your Subversion repositories at http://svn.yourdomain.com/viewvc/
# and all your CVS ones at http://cvs.yourdomain.com/viewvc/, with no
# cross-exposure. Using ViewVC's virtual host (vhost) configuration
# support, you can do this. Simply create two vhost configurations
@ -671,7 +743,7 @@ enabled = 0
# gui = guiproject.yourdomain.*
#
# [vhost-libs/general]
# cvs_roots =
# cvs_roots =
# svn_roots = svnroot: /var/svn/libs-repos
# default_root = svnroot
#
@ -680,7 +752,7 @@ enabled = 0
#
# [vhost-gui/general]
# cvs_roots = cvsroot: /var/cvs/guiproject
# svn_roots =
# svn_roots =
# default_root = cvsroot
#
@ -697,7 +769,7 @@ enabled = 0
#
# Here is an example showing how to enable Subversion authz-based
# authorization for only the single root named "svnroot":
#
#
# [root-svnroot/options]
# authorizer = svnauthz
#
@ -726,7 +798,7 @@ enabled = 0
#
# Tests are case-sensitive.
#
# NOTE: Again, this is for the hiding of modules within repositories, *not*
# NOTE: Again, this is for the hiding of modules within repositories, *not*
# for the hiding of repositories (roots) themselves.
#
# Some examples:
@ -749,7 +821,7 @@ enabled = 0
# Allow "xml", forbid other modules starting with "x", and allow the rest:
# forbidden = !xml, x*, !*
#
forbidden =
forbidden =
#---------------------------------------------------------------------------
[authz-forbiddenre]
@ -792,7 +864,7 @@ forbidden =
# Only allow visibility of HTML files and the directories that hold them:
# forbiddenre = !^([^/]+|.*(/|\.html))$
#
forbiddenre =
forbiddenre =
#---------------------------------------------------------------------------
[authz-svnauthz]

View File

@ -24,6 +24,7 @@ import vclib.ccvs
import vclib.svn
import cvsdb
import viewvc
from viewvcmagic import ContentMagic
#########################################################################
#
@ -47,6 +48,7 @@ class Config:
'root_parents', 'allowed_views', 'mime_types_files')
def __init__(self):
self.__guesser = None
for section in self._sections:
setattr(self, section, _sub_config())
@ -66,7 +68,6 @@ class Config:
if rootname:
self._process_root_options(self.parser, rootname)
self.expand_root_parents()
cvsdb.setencs(self.options.encodings.split(':'))
r = {}
for i in self.rewritehtml.__dict__.keys():
if i[-8:] == '.replace':
@ -201,7 +202,7 @@ class Config:
pass
else:
raise IllegalOverrideSection('root', section)
def overlay_root_options(self, rootname):
"Overly per-root options atop the existing option set."
if not self.conf_path:
@ -217,7 +218,7 @@ class Config:
for option in parser.options(section):
d[option] = parser.get(section, option)
return d.items()
def get_authorizer_params(self, authorizer, rootname=None):
if not self.conf_path:
return {}
@ -236,7 +237,12 @@ class Config:
params[key] = value
params['__config'] = self
return params
def guesser(self):
if not self.__guesser:
self.__guesser = ContentMagic(self.options.encodings)
return self.__guesser
def set_defaults(self):
"Set some default values in the configuration."
@ -258,6 +264,8 @@ class Config:
self.utilities.svn = ''
self.utilities.diff = ''
self.utilities.cvsgraph = ''
self.utilities.tika_server = ''
self.utilities.tika_mime_types = ''
self.options.root_as_url_component = 1
self.options.checkout_magic = 0
@ -302,7 +310,7 @@ class Config:
self.options.limit_changes = 100
self.options.cvs_ondisk_charset = 'cp1251'
self.options.binary_mime_re = '^(?!text/|.*\Wxml)'
self.options.encodings = 'utf-8:cp1251:iso-8859-1'
self.options.encodings = 'cp1251:iso-8859-1'
self.templates.diff = None
self.templates.directory = None
@ -316,6 +324,7 @@ class Config:
self.templates.roots = None
self.cvsdb.enabled = 0
self.cvsdb.index_content = 0
self.cvsdb.host = ''
self.cvsdb.port = 3306
self.cvsdb.socket = ''
@ -323,12 +332,17 @@ class Config:
self.cvsdb.user = ''
self.cvsdb.passwd = ''
self.cvsdb.readonly_user = ''
self.cvsdb.readonly_passwd = ''
self.cvsdb.readonly_passwd = ''
self.cvsdb.row_limit = 1000
self.cvsdb.rss_row_limit = 100
self.cvsdb.check_database_for_root = 0
self.cvsdb.fulltext_min_relevance = 0.2
self.cvsdb.sphinx_host = ''
self.cvsdb.sphinx_port = 3307
self.cvsdb.sphinx_socket = ''
self.cvsdb.sphinx_index = ''
def _startswith(somestr, substr):
return somestr[:len(substr)] == substr

View File

@ -15,6 +15,7 @@ import sys
import string
import time
import re
import cgi
import vclib
import dbi
@ -36,22 +37,12 @@ error = "cvsdb error"
## defined to actually be complete; it should run well off of any DBI 2.0
## complient database interface
encs = [ "utf-8", "cp1251", "iso-8859-1" ]
def utf8string(value):
for e in encs:
try:
value = value.decode(e)
break
except: pass
return value.encode("utf-8")
def setencs(e):
global encs
encs = e
class CheckinDatabase:
def __init__(self, host, port, socket, user, passwd, database, row_limit, min_relevance, authorizer = None):
def __init__(self, host, port, socket, user, passwd, database, row_limit, min_relevance, cfg,
authorizer = None, index_content = 0, sphinx_host = None, sphinx_port = None,
sphinx_socket = None, sphinx_index = None):
self.cfg = cfg
self._host = host
self._port = port
self._socket = socket
@ -63,11 +54,21 @@ class CheckinDatabase:
self._min_relevance = min_relevance
self.authorizer = authorizer
# Sphinx settings
self.index_content = index_content
self.sphinx_host = sphinx_host
self.sphinx_port = sphinx_port
self.sphinx_socket = sphinx_socket
self.sphinx_index = sphinx_index
## database lookup caches
self._get_cache = {}
self._get_id_cache = {}
self._desc_id_cache = {}
# Sphinx connection None by default
self.sphinx = None
def Connect(self):
self.db = dbi.connect(
self._host, self._port, self._socket, self._user, self._passwd, self._database)
@ -83,12 +84,17 @@ class CheckinDatabase:
else:
self._version = 0
if self._version > CURRENT_SCHEMA_VERSION:
raise DatabaseVersionError("Database version %d is newer than the "
"last version supported by this "
"software." % (self._version))
raise DatabaseVersionError("Database version %d is newer than the "
"last version supported by this "
"software." % (self._version))
if self.index_content:
self.sphinx = dbi.connect(self.sphinx_host, self.sphinx_port, self.sphinx_socket, '', '', '')
def utf8(self, value):
return self.cfg.guesser().utf8(value)
def sql_get_id(self, table, column, value, auto_set):
value = utf8string(value)
value = self.utf8(value)
sql = "SELECT id FROM %s WHERE %s=%%s" % (table, column)
sql_args = (value, )
@ -172,7 +178,7 @@ class CheckinDatabase:
temp2[id] = value
return value
def get_list(self, table, field_index):
sql = "SELECT * FROM %s" % (table)
cursor = self.db.cursor()
@ -198,7 +204,7 @@ class CheckinDatabase:
break
list.append(row[0])
return list
def GetMetadataValue(self, name):
sql = "SELECT value FROM metadata WHERE name=%s"
sql_args = (name)
@ -209,7 +215,7 @@ class CheckinDatabase:
except TypeError:
return None
return value
def SetMetadataValue(self, name, value):
assert(self._version > 0)
sql = "REPLACE INTO metadata (name, value) VALUES (%s, %s)"
@ -222,7 +228,7 @@ class CheckinDatabase:
"\tname = %s\n"
"\tvalue = %s\n"
% (str(e), name, value))
def GetBranchID(self, branch, auto_set = 1):
return self.get_id("branches", "branch", branch, auto_set)
@ -240,13 +246,13 @@ class CheckinDatabase:
def GetFile(self, id):
return self.get("files", "file", id)
def GetAuthorID(self, author, auto_set = 1):
return self.get_id("people", "who", author, auto_set)
def GetAuthor(self, id):
return self.get("people", "who", id)
def GetRepositoryID(self, repository, auto_set = 1):
return self.get_id("repositories", "repository", repository, auto_set)
@ -257,7 +263,7 @@ class CheckinDatabase:
return self.get_list("repositories", repository)
def SQLGetDescriptionID(self, description, auto_set = 1):
description = utf8string(description)
description = self.utf8(description)
## lame string hash, blame Netscape -JMP
hash = len(description)
@ -330,7 +336,7 @@ class CheckinDatabase:
ci_when = cursor.fetchone()[0]
except TypeError:
return None
return dbi.TicksFromDateTime(ci_when)
def AddCommitList(self, commit_list):
@ -338,48 +344,55 @@ class CheckinDatabase:
self.AddCommit(commit)
def AddCommit(self, commit):
ci_when = dbi.DateTimeFromTicks(commit.GetTime() or 0.0)
ci_type = commit.GetTypeString()
who_id = self.GetAuthorID(commit.GetAuthor())
repository_id = self.GetRepositoryID(commit.GetRepository())
directory_id = self.GetDirectoryID(commit.GetDirectory())
file_id = self.GetFileID(commit.GetFile())
revision = commit.GetRevision()
sticky_tag = "NULL"
branch_id = self.GetBranchID(commit.GetBranch())
plus_count = commit.GetPlusCount() or '0'
minus_count = commit.GetMinusCount() or '0'
description_id = self.GetDescriptionID(commit.GetDescription())
props = {
'type' : commit.GetTypeString(),
'ci_when' : dbi.DateTimeFromTicks(commit.GetTime() or 0.0),
'whoid' : self.GetAuthorID(commit.GetAuthor()),
'repositoryid' : self.GetRepositoryID(commit.GetRepository()),
'dirid' : self.GetDirectoryID(commit.GetDirectory()),
'fileid' : self.GetFileID(commit.GetFile()),
'revision' : commit.GetRevision(),
'branchid' : self.GetBranchID(commit.GetBranch()),
'addedlines' : commit.GetPlusCount() or '0',
'removedlines' : commit.GetMinusCount() or '0',
'descid' : self.GetDescriptionID(commit.GetDescription()),
}
commits_table = self._version >= 1 and 'commits' or 'checkins'
sql = "REPLACE INTO %s" % (commits_table)
sql = sql + \
" (type,ci_when,whoid,repositoryid,dirid,fileid,revision,"\
" stickytag,branchid,addedlines,removedlines,descid)"\
"VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
sql_args = (ci_type, ci_when, who_id, repository_id,
directory_id, file_id, revision, sticky_tag, branch_id,
plus_count, minus_count, description_id)
cursor = self.db.cursor()
try:
cursor.execute(sql, sql_args)
# MySQL-specific INSERT-or-UPDATE with ID retrieval
cursor.execute(
'INSERT INTO '+commits_table+'('+','.join(i for i in props)+') VALUES ('+
', '.join('%s' for i in props)+') ON DUPLICATE KEY UPDATE id=LAST_INSERT_ID(id), '+
', '.join(i+'=VALUES('+i+')' for i in props),
tuple(props[i] for i in props)
)
commit_id = cursor.lastrowid
if self.index_content:
sphcur = self.sphinx.cursor()
content = commit.GetContent()
props['ci_when'] = str(int(commit.GetTime() or 0))
if len(content):
props['content'] = content
# Now, stored MIME type is only needed while searching
# It is guessed again when the file is displayed
props['mimetype'] = commit.GetMimeType()
props['id'] = str(commit_id)
del props['addedlines']
del props['removedlines']
del props['descid']
del props['type']
sphcur.execute(
'INSERT INTO '+self.sphinx_index+'('+','.join(i for i in props)+') VALUES ('+
','.join('%s' for i in props)+')',
tuple(props[i] for i in props)
)
except Exception, e:
raise Exception("Error adding commit: '%s'\n"
"Values were:\n"
"\ttype = %s\n"
"\tci_when = %s\n"
"\twhoid = %s\n"
"\trepositoryid = %s\n"
"\tdirid = %s\n"
"\tfileid = %s\n"
"\trevision = %s\n"
"\tstickytag = %s\n"
"\tbranchid = %s\n"
"\taddedlines = %s\n"
"\tremovedlines = %s\n"
"\tdescid = %s\n"
% ((str(e), ) + sql_args))
print ("Error adding commit: '"+str(e)+"'\nValues were:\n"+
"\n".join(i+'='+str(props[i]) for i in props))
raise
def SQLQueryListString(self, field, query_entry_list):
sqlList = []
@ -414,6 +427,67 @@ class CheckinDatabase:
return "(%s)" % (string.join(sqlList, " OR "))
def query_ids(self, in_field, table, id_field, name_field, lst):
if not len(lst):
return None
cond = self.SQLQueryListString(name_field, lst)
cursor = self.db.cursor()
cursor.execute('SELECT %s FROM %s WHERE %s' % (id_field, table, cond))
ids = list(str(row[0]) for row in cursor)
if not len(ids):
return None
return "%s IN (%s)" % (in_field, ','.join(ids))
def CreateSphinxQueryString(self, query):
condList = [
'MATCH(%s)' % (self.db.literal(query.content_query), ),
self.query_ids('repositoryid', 'repositories', 'id', 'repository', query.repository_list),
self.query_ids('branchid', 'branches', 'id', 'branch', query.branch_list),
self.query_ids('dirid', 'dirs', 'id', 'dir', query.directory_list),
self.query_ids('fileid', 'files', 'id', 'file', query.file_list),
self.query_ids('authorid', 'people', 'id', 'who', query.author_list),
self.query_ids('descid', 'descs', 'id', 'description', query.comment_list),
]
if len(query.revision_list):
condList.append("revision IN ("+','.join(self.db.literal(s) for s in query.revision_list)+")")
if query.from_date:
condList.append('ci_when>='+str(dbi.TicksFromDateTime(query.from_date)))
if query.to_date:
condList.append('ci_when<='+str(dbi.TicksFromDateTime(query.to_date)))
if query.sort == 'date':
order_by = 'ORDER BY `ci_when` DESC, `relevance` DESC'
elif query.sort == 'date_rev':
order_by = 'ORDER BY `ci_when` ASC, `relevance` DESC'
else: # /* if query.sort == 'relevance' */
order_by = 'ORDER BY `relevance` DESC'
conditions = string.join((i for i in condList if i), " AND ")
conditions = conditions and "WHERE %s" % conditions
## limit the number of rows requested or we could really slam
## a server with a large database
limit = ""
if query.limit:
limit = "LIMIT %s" % (str(query.limit))
elif self._row_limit:
limit = "LIMIT %s" % (str(self._row_limit))
fields = "id `id`, WEIGHT() `relevance`, `content`, `mimetype`"
return "SELECT %s FROM %s %s %s %s" % (fields, self.sphinx_index, conditions, order_by, limit)
# Get commits by their IDs
def CreateIdQueryString(self, ids):
commits_table = self._version >= 1 and 'commits' or 'checkins'
return (
'SELECT %s.*, repositories.repository AS repository_name, dirs.dir AS dir_name, files.file AS file_name'
' FROM %s, repositories, dirs, files'
' WHERE %s.id IN (%s) AND repositoryid=repositories.id'
' AND dirid=dirs.id AND fileid=files.id' % (commits_table, commits_table, commits_table, ','.join(ids))
)
def CreateSQLQueryString(self, query):
commits_table = self._version >= 1 and 'commits' or 'checkins'
fields = [
@ -427,7 +501,7 @@ class CheckinDatabase:
("dirs", "(%s.dirid=dirs.id)" % (commits_table)),
("files", "(%s.fileid=files.id)" % (commits_table))]
condList = []
if len(query.text_query):
tableList.append(("descs", "(descs.id=%s.descid)" % (commits_table)))
temp = "MATCH (descs.description) AGAINST (%s" % (self.db.literal(query.text_query))
@ -435,6 +509,7 @@ class CheckinDatabase:
fields.append("%s) AS relevance" % temp)
else:
fields.append("'' AS relevance")
fields.append("'' AS snippet")
if len(query.repository_list):
temp = self.SQLQueryListString("repositories.repository",
@ -478,16 +553,18 @@ class CheckinDatabase:
temp = "(%s.ci_when<=\"%s\")" % (commits_table, str(query.to_date))
condList.append(temp)
if query.sort == "date":
order_by = "ORDER BY %s.ci_when DESC,descid,%s.repositoryid" % (commits_table, commits_table)
if query.sort == "relevance" and len(query.text_query):
order_by = "ORDER BY relevance DESC,%s.ci_when DESC,descid,%s.repositoryid" % (commits_table, commits_table)
elif query.sort == "date_rev":
order_by = "ORDER BY %s.ci_when ASC,descid,%s.repositoryid" % (commits_table, commits_table)
elif query.sort == "author":
tableList.append(("people", "(%s.whoid=people.id)" % (commits_table)))
order_by = "ORDER BY people.who,descid,%s.repositoryid" % (commits_table)
elif query.sort == "file":
tableList.append(("files", "(%s.fileid=files.id)" % (commits_table)))
order_by = "ORDER BY files.file,descid,%s.repositoryid" % (commits_table)
elif query.sort == "relevance" and len(query.text_query):
order_by = "ORDER BY relevance DESC,%s.ci_when DESC,descid,%s.repositoryid" % (commits_table, commits_table)
else: # /* if query.sort == "date": */
order_by = "ORDER BY %s.ci_when DESC,descid,%s.repositoryid" % (commits_table, commits_table)
## exclude duplicates from the table list, and split out join
## conditions from table names. In future, the join conditions
@ -517,7 +594,7 @@ class CheckinDatabase:
fields, tables, conditions, order_by, limit)
return sql
def check_commit_access(self, repos, dir, file, rev):
if self.authorizer:
rootname = repos.split('/')
@ -528,19 +605,60 @@ class CheckinDatabase:
return True
def RunQuery(self, query):
sql = self.CreateSQLQueryString(query)
cursor = self.db.cursor()
cursor.execute(sql)
if len(query.content_query) and self.sphinx:
# Use Sphinx to search on document content
sql = self.CreateSphinxQueryString(query)
cursor = self.sphinx.cursor()
cursor.execute(sql)
sphinx_rows = list((str(docid), rel, content, mimetype) for docid, rel, content, mimetype in cursor)
if len(sphinx_rows):
# Fetch snippets
snippet_options = {
'around': 15,
'limit': 200,
'before_match': '<span style="color:red">',
'after_match': '</span>',
'chunk_separator': ' ... ',
}
preformatted_mime = 'text/(?!html|xml).*'
snippets = {}
bm_html = cgi.escape(snippet_options['before_match'])
am_html = cgi.escape(snippet_options['after_match'])
for docid, rel, content, mimetype in sphinx_rows:
cursor.execute(
'CALL SNIPPETS(%s, %s, %s'+''.join(', %s AS '+i for i in snippet_options)+')',
(content, self.sphinx_index, query.content_query) + tuple(snippet_options.values())
)
s, = cursor.fetchone()
s = cgi.escape(s)
if re.match(preformatted_mime, mimetype):
s = s.replace('\n', '<br />')
s = s.replace(bm_html, snippet_options['before_match'])
s = s.replace(am_html, snippet_options['after_match'])
snippets[docid] = s
# Fetch all fields from MySQL
sql = self.CreateIdQueryString((docid for (docid, _, _, _) in sphinx_rows))
cursor = self.db.cursor()
cursor.execute(sql)
byid = {}
for row in cursor:
byid[str(row[0])] = row
rows = list(byid[docid] + (rel, snippets[docid]) for (docid, rel, _, _) in sphinx_rows if docid in byid)
else:
rows = []
else:
# Use regular queries when document content is not searched
sql = self.CreateSQLQueryString(query)
cursor = self.db.cursor()
cursor.execute(sql)
rows = list(cursor)
while 1:
row = cursor.fetchone()
if not row:
break
(dbType, dbCI_When, dbAuthorID, dbRepositoryID, dbDirID,
# Convert rows to commit objects
for row in rows:
(dbId, dbType, dbCI_When, dbAuthorID, dbRepositoryID, dbDirID,
dbFileID, dbRevision, dbStickyTag, dbBranchID, dbAddedLines,
dbRemovedLines, dbDescID, dbRepositoryName, dbDirName,
dbFileName, dbRelevance) = row
dbFileName, dbRelevance, dbSnippet) = row
if not self.check_commit_access(dbRepositoryName, dbDirName, dbFileName, dbRevision):
continue
@ -564,6 +682,7 @@ class CheckinDatabase:
commit.SetMinusCount(dbRemovedLines)
commit.SetDescriptionID(dbDescID)
commit.SetRelevance(dbRelevance)
commit.SetSnippet(dbSnippet)
query.AddCommit(commit)
@ -623,46 +742,21 @@ class CheckinDatabase:
raise UnknownRepositoryError("Unknown repository '%s'"
% (repository))
if (self._version >= 1):
self.sql_delete('repositories', 'id', rep_id)
self.sql_purge('commits', 'repositoryid', 'id', 'repositories')
self.sql_purge('files', 'id', 'fileid', 'commits')
self.sql_purge('dirs', 'id', 'dirid', 'commits')
self.sql_purge('branches', 'id', 'branchid', 'commits')
self.sql_purge('descs', 'id', 'descid', 'commits')
self.sql_purge('people', 'id', 'whoid', 'commits')
else:
sql = "SELECT * FROM checkins WHERE repositoryid=%s"
sql_args = (rep_id, )
cursor = self.db.cursor()
cursor.execute(sql, sql_args)
checkins = []
while 1:
try:
(ci_type, ci_when, who_id, repository_id,
dir_id, file_id, revision, sticky_tag, branch_id,
plus_count, minus_count, description_id) = \
cursor.fetchone()
except TypeError:
break
checkins.append([file_id, dir_id, branch_id,
description_id, who_id])
#self.sql_delete('repositories', 'id', rep_id)
self.sql_delete('checkins', 'repositoryid', rep_id)
for checkin in checkins:
self.sql_delete('files', 'id', checkin[0], 'fileid')
self.sql_delete('dirs', 'id', checkin[1], 'dirid')
self.sql_delete('branches', 'id', checkin[2], 'branchid')
self.sql_delete('descs', 'id', checkin[3], 'descid')
self.sql_delete('people', 'id', checkin[4], 'whoid')
checkins_table = self._version >= 1 and 'commits' or 'checkins'
self.sql_delete('repositories', 'id', rep_id)
self.sql_purge(checkins_table, 'repositoryid', 'id', 'repositories')
self.sql_purge('files', 'id', 'fileid', checkins_table)
self.sql_purge('dirs', 'id', 'dirid', checkins_table)
self.sql_purge('branches', 'id', 'branchid', checkins_table)
self.sql_purge('descs', 'id', 'descid', checkins_table)
self.sql_purge('people', 'id', 'whoid', checkins_table)
# Reset all internal id caches. We could be choosier here,
# but let's just be as safe as possible.
self._get_cache = {}
self._get_id_cache = {}
self._desc_id_cache = {}
class DatabaseVersionError(Exception):
pass
@ -678,7 +772,7 @@ class Commit:
CHANGE = 0
ADD = 1
REMOVE = 2
def __init__(self):
self.__directory = ''
self.__file = ''
@ -690,15 +784,20 @@ class Commit:
self.__minuscount = ''
self.__description = ''
self.__relevance = ''
self.__snippet = ''
self.__gmt_time = 0.0
self.__type = Commit.CHANGE
self.__content = ''
self.__mimetype = ''
self.__base_path = ''
self.__base_rev = ''
def SetRepository(self, repository):
self.__repository = repository
def GetRepository(self):
return self.__repository
def SetDirectory(self, dir):
self.__directory = dir
@ -710,7 +809,7 @@ class Commit:
def GetFile(self):
return self.__file
def SetRevision(self, revision):
self.__revision = revision
@ -758,12 +857,19 @@ class Commit:
def GetDescription(self):
return self.__description
# Relevance and snippet are used when querying commit database
def SetRelevance(self, relevance):
self.__relevance = relevance
def GetRelevance(self):
return self.__relevance
def SetSnippet(self, snippet):
self.__snippet = snippet
def GetSnippet(self):
return self.__snippet
def SetTypeChange(self):
self.__type = Commit.CHANGE
@ -784,66 +890,80 @@ class Commit:
elif self.__type == Commit.REMOVE:
return 'Remove'
# File content (extracted text), optional, indexed with Sphinx
def SetContent(self, content):
self.__content = content
def GetContent(self):
return self.__content
# MIME type, optional, now only stored in Sphinx
def SetMimeType(self, mimetype):
self.__mimetype = mimetype
def GetMimeType(self):
return self.__mimetype
## LazyCommit overrides a few methods of Commit to only retrieve
## it's properties as they are needed
class LazyCommit(Commit):
def __init__(self, db):
Commit.__init__(self)
self.__db = db
def __init__(self, db):
Commit.__init__(self)
self.__db = db
def SetFileID(self, dbFileID):
self.__dbFileID = dbFileID
def SetFileID(self, dbFileID):
self.__dbFileID = dbFileID
def GetFileID(self):
return self.__dbFileID
def GetFileID(self):
return self.__dbFileID
def GetFile(self):
return self.__db.GetFile(self.__dbFileID)
def GetFile(self):
return self.__db.GetFile(self.__dbFileID)
def SetDirectoryID(self, dbDirID):
self.__dbDirID = dbDirID
def SetDirectoryID(self, dbDirID):
self.__dbDirID = dbDirID
def GetDirectoryID(self):
return self.__dbDirID
def GetDirectoryID(self):
return self.__dbDirID
def GetDirectory(self):
return self.__db.GetDirectory(self.__dbDirID)
def GetDirectory(self):
return self.__db.GetDirectory(self.__dbDirID)
def SetRepositoryID(self, dbRepositoryID):
self.__dbRepositoryID = dbRepositoryID
def SetRepositoryID(self, dbRepositoryID):
self.__dbRepositoryID = dbRepositoryID
def GetRepositoryID(self):
return self.__dbRepositoryID
def GetRepositoryID(self):
return self.__dbRepositoryID
def GetRepository(self):
return self.__db.GetRepository(self.__dbRepositoryID)
def GetRepository(self):
return self.__db.GetRepository(self.__dbRepositoryID)
def SetAuthorID(self, dbAuthorID):
self.__dbAuthorID = dbAuthorID
def SetAuthorID(self, dbAuthorID):
self.__dbAuthorID = dbAuthorID
def GetAuthorID(self):
return self.__dbAuthorID
def GetAuthorID(self):
return self.__dbAuthorID
def GetAuthor(self):
return self.__db.GetAuthor(self.__dbAuthorID)
def GetAuthor(self):
return self.__db.GetAuthor(self.__dbAuthorID)
def SetBranchID(self, dbBranchID):
self.__dbBranchID = dbBranchID
def SetBranchID(self, dbBranchID):
self.__dbBranchID = dbBranchID
def GetBranchID(self):
return self.__dbBranchID
def GetBranchID(self):
return self.__dbBranchID
def GetBranch(self):
return self.__db.GetBranch(self.__dbBranchID)
def GetBranch(self):
return self.__db.GetBranch(self.__dbBranchID)
def SetDescriptionID(self, dbDescID):
self.__dbDescID = dbDescID
def SetDescriptionID(self, dbDescID):
self.__dbDescID = dbDescID
def GetDescriptionID(self):
return self.__dbDescID
def GetDescriptionID(self):
return self.__dbDescID
def GetDescription(self):
return self.__db.GetDescription(self.__dbDescID)
def GetDescription(self):
return self.__db.GetDescription(self.__dbDescID)
## QueryEntry holds data on one match-type in the SQL database
## match is: "exact", "like", or "regex"
@ -858,8 +978,8 @@ class CheckinDatabaseQuery:
def __init__(self):
## sorting
self.sort = "date"
## repository to query
## repository, branch, etc to query
self.repository_list = []
self.branch_list = []
self.directory_list = []
@ -867,7 +987,11 @@ class CheckinDatabaseQuery:
self.revision_list = []
self.author_list = []
self.comment_list = []
## text_query = Fulltext query on comments
## content_query = Fulltext query on content
self.text_query = ""
self.content_query = ""
## date range in DBI 2.0 timedate objects
self.from_date = None
@ -886,6 +1010,9 @@ class CheckinDatabaseQuery:
def SetTextQuery(self, query):
self.text_query = query
def SetContentQuery(self, query):
self.content_query = query
def SetRepository(self, repository, match = "exact"):
self.repository_list.append(QueryEntry(repository, match))
@ -921,7 +1048,7 @@ class CheckinDatabaseQuery:
def SetFromDateHoursAgo(self, hours_ago):
ticks = time.time() - (3600 * hours_ago)
self.from_date = dbi.DateTimeFromTicks(ticks)
def SetFromDateDaysAgo(self, days_ago):
ticks = time.time() - (86400 * days_ago)
self.from_date = dbi.DateTimeFromTicks(ticks)
@ -942,7 +1069,7 @@ class CheckinDatabaseQuery:
##
def CreateCommit():
return Commit()
def CreateCheckinQuery():
return CheckinDatabaseQuery()
@ -953,9 +1080,23 @@ def ConnectDatabase(cfg, authorizer=None, readonly=0):
else:
user = cfg.cvsdb.user
passwd = cfg.cvsdb.passwd
db = CheckinDatabase(cfg.cvsdb.host, cfg.cvsdb.port, cfg.cvsdb.socket, user, passwd,
cfg.cvsdb.database_name, cfg.cvsdb.row_limit, cfg.cvsdb.fulltext_min_relevance,
authorizer)
db = CheckinDatabase(
host = cfg.cvsdb.host,
port = cfg.cvsdb.port,
socket = cfg.cvsdb.socket,
user = user,
passwd = passwd,
database = cfg.cvsdb.database_name,
row_limit = cfg.cvsdb.row_limit,
min_relevance = cfg.cvsdb.fulltext_min_relevance,
authorizer = authorizer,
index_content = cfg.cvsdb.index_content,
sphinx_host = cfg.cvsdb.sphinx_host,
sphinx_port = int(cfg.cvsdb.sphinx_port),
sphinx_socket = cfg.cvsdb.sphinx_socket,
sphinx_index = cfg.cvsdb.sphinx_index,
cfg = cfg,
)
db.Connect()
return db

View File

@ -31,8 +31,8 @@ import popen
class BaseCVSRepository(vclib.Repository):
def __init__(self, name, rootpath, authorizer, utilities):
if not os.path.isdir(rootpath):
raise vclib.ReposNotFound(name)
raise vclib.ReposNotFound(name)
self.name = name
self.rootpath = rootpath
self.auth = authorizer
@ -53,7 +53,7 @@ class BaseCVSRepository(vclib.Repository):
def authorizer(self):
return self.auth
def itemtype(self, path_parts, rev):
basepath = self._getpath(path_parts)
kind = None
@ -74,12 +74,12 @@ class BaseCVSRepository(vclib.Repository):
def itemprops(self, path_parts, rev):
self.itemtype(path_parts, rev) # does auth-check
return {} # CVS doesn't support properties
def listdir(self, path_parts, rev, options):
if self.itemtype(path_parts, rev) != vclib.DIR: # does auth-check
raise vclib.Error("Path '%s' is not a directory."
% (string.join(path_parts, "/")))
# Only RCS files (*,v) and subdirs are returned.
data = [ ]
full_name = self._getpath(path_parts)
@ -115,7 +115,7 @@ class BaseCVSRepository(vclib.Repository):
data.append(CVSDirEntry(name, kind, errors, 1))
return data
def _getpath(self, path_parts):
return apply(os.path.join, (self.rootpath,) + tuple(path_parts))
@ -177,7 +177,7 @@ class BinCVSRepository(BaseCVSRepository):
used_rlog = 0
tip_rev = None # used only if we have to fallback to using rlog
fp = self.rcs_popen('co', (rev_flag, full_name), 'rb')
fp = self.rcs_popen('co', (rev_flag, full_name), 'rb')
try:
filename, revision = _parse_co_header(fp)
except COMissingRevision:
@ -191,14 +191,14 @@ class BinCVSRepository(BaseCVSRepository):
used_rlog = 1
if not tip_rev:
raise vclib.Error("Unable to find valid revision")
fp = self.rcs_popen('co', ('-p' + tip_rev.string, full_name), 'rb')
fp = self.rcs_popen('co', ('-p' + tip_rev.string, full_name), 'rb')
filename, revision = _parse_co_header(fp)
if filename is None:
# CVSNT's co exits without any output if a dead revision is requested.
# Bug at http://www.cvsnt.org/cgi-bin/bugzilla/show_bug.cgi?id=190
# As a workaround, we invoke rlog to find the first non-dead revision
# that precedes it and check out that revision instead. Of course,
# that precedes it and check out that revision instead. Of course,
# if we've already invoked rlog above, we just reuse its output.
if not used_rlog:
tip_rev = self._get_tip_revision(full_name + ',v', rev)
@ -207,7 +207,7 @@ class BinCVSRepository(BaseCVSRepository):
raise vclib.Error(
'Could not find non-dead revision preceding "%s"' % rev)
fp = self.rcs_popen('co', ('-p' + tip_rev.undead.string,
full_name), 'rb')
full_name), 'rb')
filename, revision = _parse_co_header(fp)
if filename is None:
@ -278,7 +278,7 @@ class BinCVSRepository(BaseCVSRepository):
if self.itemtype(path_parts, rev) != vclib.FILE: # does auth-check
raise vclib.Error("Path '%s' is not a file."
% (string.join(path_parts, "/")))
# Invoke rlog
rcsfile = self.rcsfile(path_parts, 1)
if rev and options.get('cvs_pass_rev', 0):
@ -341,7 +341,7 @@ class BinCVSRepository(BaseCVSRepository):
def revinfo(self, rev):
raise vclib.UnsupportedFeature
def rawdiff(self, path_parts1, rev1, path_parts2, rev2, type, options={}):
"""see vclib.Repository.rawdiff docstring
@ -439,9 +439,9 @@ def _match_revs_tags(revlist, taglist):
example: if revision is 1.2.3.4, parent is 1.2
"undead"
If the revision is dead, then this is a reference to the first
If the revision is dead, then this is a reference to the first
previous revision which isn't dead, otherwise it's a reference
to itself. If all the previous revisions are dead it's None.
to itself. If all the previous revisions are dead it's None.
"branch_number"
tuple representing branch number or empty tuple if on trunk
@ -653,7 +653,7 @@ def _parse_co_header(fp):
pass
else:
break
raise COMalformedOutput, "Unable to find revision in co output stream"
# if your rlog doesn't use 77 '=' characters, then this must change
@ -674,7 +674,7 @@ _EOF_ERROR = 'error message found' # rlog issued an error
# ^rlog\: (.*)(?:\:\d+)?\: (.*)$
#
# But for some reason the windows version of rlog omits the "rlog: " prefix
# for the first error message when the standard error stream has been
# for the first error message when the standard error stream has been
# redirected to a file or pipe. (the prefix is present in subsequent errors
# and when rlog is run from the console). So the expression below is more
# complicated
@ -703,7 +703,7 @@ def _parse_log_header(fp):
Returns: filename, default branch, tag dictionary, lock dictionary,
rlog error message, and eof flag
"""
filename = head = branch = msg = ""
taginfo = { } # tag name => number
lockinfo = { } # revision => locker
@ -732,7 +732,7 @@ def _parse_log_header(fp):
else:
# oops. this line isn't lock info. stop parsing tags.
state = 0
if state == 0:
if line[:9] == 'RCS file:':
filename = line[10:-1]
@ -902,7 +902,7 @@ def _file_log(revs, taginfo, lockinfo, cur_branch, filter):
except ValueError:
view_tag = None
else:
tags.append(view_tag)
tags.append(view_tag)
# Match up tags and revisions
_match_revs_tags(revs, tags)
@ -910,13 +910,13 @@ def _file_log(revs, taginfo, lockinfo, cur_branch, filter):
# Match up lockinfo and revision
for rev in revs:
rev.lockinfo = lockinfo.get(rev.string)
# Add artificial ViewVC tag HEAD, which acts like a non-branch tag pointing
# at the latest revision on the MAIN branch. The HEAD revision doesn't have
# anything to do with the "head" revision number specified in the RCS file
# and in rlog output. HEAD refers to the revision that the CVS and RCS co
# commands will check out by default, whereas the "head" field just refers
# to the highest revision on the trunk.
# to the highest revision on the trunk.
taginfo['HEAD'] = _add_tag('HEAD', taginfo['MAIN'].co_rev)
# Determine what revisions to return
@ -954,7 +954,7 @@ def _file_log(revs, taginfo, lockinfo, cur_branch, filter):
_remove_tag(view_tag)
else:
filtered_revs = revs
return filtered_revs
def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
@ -1004,7 +1004,7 @@ def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
= _parse_log_header(rlog)
if eof == _EOF_LOG:
# the rlog output ended early. this can happen on errors that rlog
# the rlog output ended early. this can happen on errors that rlog
# thinks are so serious that it stops parsing the current file and
# refuses to parse any of the files that come after it. one of the
# errors that triggers this obnoxious behavior looks like:
@ -1052,8 +1052,8 @@ def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
tag = None
# we don't care about the specific values -- just the keys and whether
# the values point to branches or revisions. this the fastest way to
# merge the set of keys and keep values that allow us to make the
# the values point to branches or revisions. this the fastest way to
# merge the set of keys and keep values that allow us to make the
# distinction between branch tags and normal tags
alltags.update(taginfo)
@ -1098,7 +1098,7 @@ def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
file.dead = 0
#file.errors.append("No revisions exist on %s" % (view_tag or "MAIN"))
file.absent = 1
# done with this file now, skip the rest of this file's revisions
if not eof:
_skip_file(rlog)
@ -1211,7 +1211,7 @@ def _newest_file(dirpath):
newest_time = 0
### FIXME: This sucker is leaking unauthorized paths! ###
for subfile in os.listdir(dirpath):
### filter CVS locks? stale NFS handles?
if subfile[-2:] != ',v':

View File

@ -1,4 +1,3 @@
#
# Copyright (C) 1999-2009 The ViewCVS Group. All Rights Reserved.
#
# By using this file, you agree to the terms and conditions set forth in
@ -68,7 +67,6 @@ docroot_magic_path = '*docroot*'
viewcvs_mime_type = 'text/vnd.viewcvs-markup'
alt_mime_type = 'text/x-cvsweb-markup'
view_roots_magic = '*viewroots*'
magic_buf_size = 4096
default_mime_type = 'application/octet-stream'
# Put here the variables we need in order to hold our state - they
@ -121,9 +119,8 @@ class Request:
# check for an authenticated username
self.username = server.getenv('REMOTE_USER')
# construct MIME magic
self.ms = None
self.ms_fail = 0
# repository object cache
self.all_repos = {}
# if we allow compressed output, see if the client does too
self.gzip_compress_level = 0
@ -134,6 +131,9 @@ class Request:
string.split(http_accept_encoding, ","))):
self.gzip_compress_level = 9 # make this configurable?
def utf8(self, value):
return self.cfg.guesser().utf8(value)
def create_repos(self, rootname):
if not rootname:
return None
@ -677,7 +677,7 @@ def _validate_mimetype(value):
return value in (viewcvs_mime_type, alt_mime_type, 'text/plain')
# obvious things here. note that we don't need uppercase for alpha.
_re_validate_alpha = re.compile('^[a-z]+$')
_re_validate_alpha = re.compile('^[a-z_]+$')
_re_validate_number = re.compile('^[0-9]+$')
_re_validate_boolint = re.compile('^[01]$')
@ -743,6 +743,7 @@ _legal_params = {
'who_match' : _re_validate_alpha,
'comment' : None,
'comment_match' : _re_validate_alpha,
'search_content': None,
'querysort' : _re_validate_alpha,
'date' : _re_validate_alpha,
'hours' : _re_validate_number,
@ -988,7 +989,7 @@ def nav_path(request):
is_last = len(path_parts) == len(request.path_parts)
if request.roottype == 'cvs':
item = _item(name=cvsdb.utf8string(part), href=None)
item = _item(name=request.utf8(part), href=None)
else:
item = _item(name=part, href=None)
@ -1248,7 +1249,7 @@ def common_template_data(request, revision=None, mime_type=None):
cfg = request.cfg
where = request.where
if request.roottype == 'cvs':
where = cvsdb.utf8string(where)
where = request.utf8(where)
where = request.server.escape(where)
# Initialize data dictionary members (sorted alphanumerically)
@ -1444,28 +1445,31 @@ def markup_stream_pygments(request, cfg, blame_data, fp, filename, mime_type):
get_lexer_for_mimetype, \
get_lexer_for_filename
from pygments.lexers._mapping import LEXERS
# Hack for shell mime types:
LEXERS['BashLexer'] = ('pygments.lexers.other', 'Bash', ('bash', 'sh'), ('*.sh',), ('application/x-sh', 'application/x-shellscript', 'text/x-sh', 'text/x-shellscript'))
encoding = 'guess'
if cfg.options.detect_encoding:
try:
import chardet
encoding = 'chardet'
except (SyntaxError, ImportError):
pass
try:
lexer = get_lexer_for_mimetype(mime_type,
encoding=encoding,
encoding='utf-8',
stripnl=False)
except ClassNotFound:
try:
lexer = get_lexer_for_filename(filename,
encoding=encoding,
encoding='utf-8',
stripnl=False)
except ClassNotFound:
use_pygments = 0
except ImportError:
use_pygments = 0
# Detect encoding by calling chardet ourselves,
# to support it in non-highlighting mode
content = fp.read()
c, encoding = cfg.guesser().guess_charset(content)
if encoding:
content = c
else:
encoding = 'unknown'
# If we aren't going to be highlighting anything, just return the
# BLAME_SOURCE. If there's no blame_source, we'll generate a fake
# one from the file contents we fetch with PATH and REV.
@ -1475,11 +1479,7 @@ def markup_stream_pygments(request, cfg, blame_data, fp, filename, mime_type):
else:
lines = []
line_no = 0
while 1:
line = fp.readline()
if not line:
break
line = cvsdb.utf8string(line)
for line in content.split('\n'):
line_no = line_no + 1
item = vclib.Annotation(cgi.escape(line), line_no,
None, None, None, None)
@ -1508,19 +1508,11 @@ def markup_stream_pygments(request, cfg, blame_data, fp, filename, mime_type):
self.blame_data.append(item)
self.line_no = self.line_no + 1
ps = PygmentsSink(blame_source)
fpd = fp.read()
try:
fpdat = unicode(fpd,'utf-8')
except:
try:
fpdat = unicode(fpd,'cp1251')
except:
fpdat = fpd
highlight(fpdat, lexer,
highlight(content, lexer,
HtmlFormatter(nowrap=True,
classprefix='pygments-',
encoding='utf-8'), ps)
return ps.blame_data
return ps.blame_data, encoding
def make_time_string(date, cfg):
"""Returns formatted date string in either local time or UTC.
@ -1594,6 +1586,7 @@ def calculate_mime_type(request, path_parts, rev):
return mime_type
except:
pass
# FIXME rewrite to use viewvcmagic
return guess_mime(path_parts[-1])
def markup_or_annotate(request, is_annotate):
@ -1605,21 +1598,12 @@ def markup_or_annotate(request, is_annotate):
mime_type = calculate_mime_type(request, path, rev)
if not mime_type or mime_type == default_mime_type:
if request.ms is None and not request.ms_fail:
try:
import magic
request.ms = magic.open(magic.MAGIC_NONE | magic.MAGIC_MIME)
request.ms.load()
except:
request.ms_fail = 1
if request.ms:
try:
fp, revision = request.repos.openfile(path, rev)
buffer = fp.read(magic_buf_size)
fp.close()
mime_type = request.ms.buffer(buffer)
except:
pass
try:
fp, revision = request.repos.openfile(path, rev)
mime_type = request.cfg.guesser().guess_mime(None, None, fp)
fp.close()
except:
raise
# Is this a binary type?
if is_binary(request.cfg, mime_type):
@ -1657,9 +1641,10 @@ def markup_or_annotate(request, is_annotate):
if check_freshness(request, None, revision, weak=1):
fp.close()
return
lines = markup_stream_pygments(request, cfg, blame_source, fp,
path[-1], mime_type)
lines, charset = markup_stream_pygments(request, cfg, blame_source, fp, path[-1], mime_type)
fp.close()
if mime_type.find(';') < 0:
mime_type = mime_type+'; charset='+charset
data = common_template_data(request, revision)
data.merge(ezt.TemplateData({
@ -1910,7 +1895,7 @@ def view_directory(request):
row.short_log = format_log(file.log, cfg)
row.log = htmlify(file.log, cfg.options.mangle_email_addresses)
row.lockinfo = file.lockinfo
row.name = request.server.escape(cvsdb.utf8string(file.name))
row.name = request.server.escape(request.utf8(file.name))
row.anchor = row.name
row.pathtype = (file.kind == vclib.FILE and 'file') or \
(file.kind == vclib.DIR and 'dir')
@ -2285,7 +2270,7 @@ def view_log(request):
entry.ago = html_time(request, rev.date, 1)
entry.log = rev.log or ""
if cvs:
entry.log = cvsdb.utf8string(entry.log)
entry.log = request.utf8(entry.log)
entry.log = htmlify(entry.log, cfg.options.mangle_email_addresses)
entry.size = rev.size
entry.lockinfo = rev.lockinfo
@ -2770,7 +2755,7 @@ class DiffSource:
self.save_line = None
self.line_number = None
self.prev_line_number = None
# keep track of where we are during an iteration
self.idx = -1
self.last = None
@ -2867,7 +2852,7 @@ class DiffSource:
diff_code = line[0]
output = self._format_text(line[1:])
output = cvsdb.utf8string(output)
output = self.cfg.guesser().utf8(output)
if diff_code == '+':
if self.state == 'dump':
@ -3644,6 +3629,7 @@ def view_queryform(request):
'who_match' : request.query_dict.get('who_match', 'exact'),
'comment' : request.query_dict.get('comment', ''),
'comment_match' : request.query_dict.get('comment_match', 'fulltext'),
'search_content' : request.query_dict.get('search_content', ''),
'querysort' : request.query_dict.get('querysort', 'date'),
'date' : request.query_dict.get('date', 'hours'),
'hours' : request.query_dict.get('hours', '2'),
@ -3653,6 +3639,7 @@ def view_queryform(request):
'query_hidden_values' : query_hidden_values,
'limit_changes' : limit_changes,
'dir_href' : dir_href,
'enable_search_content' : request.cfg.cvsdb.index_content,
}))
generate_page(request, "query_form", data)
@ -3791,7 +3778,8 @@ def build_commit(request, files, max_files, dir_strip, format):
plus_count = 0
minus_count = 0
found_unreadable = 0
all_repos = {}
if not request.all_repos:
request.all_repos = {}
for f in files:
dirname = f.GetDirectory()
@ -3810,17 +3798,19 @@ def build_commit(request, files, max_files, dir_strip, format):
# Check path access (since the commits database logic bypasses the
# vclib layer and, thus, the vcauth stuff that layer uses).
my_repos = all_repos.get(f.GetRepository(), '')
my_repos = request.all_repos.get(f.GetRepository(), '')
if not my_repos:
try:
my_repos = all_repos[f.GetRepository()] = request.create_repos(f.GetRepository())
my_repos = request.all_repos[f.GetRepository()] = request.create_repos(f.GetRepository())
except:
my_repos = None
if not my_repos:
return None
if my_repos['roottype'] == 'cvs':
try: where = unicode(where,'utf-8')
# we store UTF-8 in the DB
try: where = where.decode('utf-8')
except: pass
# FIXME maybe store "real" filesystem path in the DB instead of having such setting?
try: where = where.encode(cfg.options.cvs_ondisk_charset)
except: pass
path_parts = _path_parts(where)
@ -3907,24 +3897,27 @@ def build_commit(request, files, max_files, dir_strip, format):
if max_files and num_allowed > max_files:
continue
commit_files.append(_item(date=commit_time,
dir=request.server.escape(dirname),
file=request.server.escape(filename),
author=request.server.escape(f.GetAuthor()),
rev=rev,
branch=f.GetBranch(),
plus=plus,
minus=minus,
type=change_type,
dir_href=dir_href,
log_href=log_href,
view_href=view_href,
download_href=download_href,
prefer_markup=prefer_markup,
diff_href=diff_href,
root=my_repos,
path=where,
path_prev=path_prev))
commit_files.append(_item(
date=commit_time,
dir=request.server.escape(dirname),
file=request.server.escape(filename),
author=request.server.escape(f.GetAuthor()),
rev=rev,
branch=f.GetBranch(),
plus=plus,
minus=minus,
type=change_type,
snippet=f.GetSnippet(),
dir_href=dir_href,
log_href=log_href,
view_href=view_href,
download_href=download_href,
prefer_markup=prefer_markup,
diff_href=diff_href,
root=my_repos,
path=where,
path_prev=path_prev,
))
# No files survived authz checks? Let's just pretend this
# little commit didn't happen, shall we?
@ -4115,6 +4108,7 @@ def view_query(request):
who_match = request.query_dict.get('who_match', 'exact')
comment = request.query_dict.get('comment', '')
comment_match = request.query_dict.get('comment_match', 'fulltext')
search_content = request.query_dict.get('search_content', '')
querysort = request.query_dict.get('querysort', 'date')
date = request.query_dict.get('date', 'hours')
hours = request.query_dict.get('hours', '2')
@ -4126,7 +4120,7 @@ def view_query(request):
cfg.options.limit_changes))
match_types = { 'exact':1, 'like':1, 'glob':1, 'regex':1, 'notregex':1 }
sort_types = { 'date':1, 'author':1, 'file':1 }
sort_types = { 'date':1, 'date_rev':1, 'author':1, 'file':1, 'relevance':1 }
date_types = { 'hours':1, 'day':1, 'week':1, 'month':1,
'all':1, 'explicit':1 }
@ -4193,6 +4187,8 @@ def view_query(request):
query.SetComment(comment, comment_match)
else:
query.SetTextQuery(comment)
if search_content:
query.SetContentQuery(search_content)
query.SetSortMethod(querysort)
if date == 'hours':
query.SetFromDateHoursAgo(int(hours))

70
lib/viewvcmagic.py Normal file
View File

@ -0,0 +1,70 @@
#!/usr/bin/python
import mimetypes
have_chardet = 0
try:
import chardet
have_chardet = 1
except: pass
class ContentMagic:
def __init__(self, encodings):
self.encodings = encodings.split(':')
self.mime_magic = None
self.errors = []
# Try to load magic
try:
import magic
self.mime_magic = magic.open(magic.MAGIC_MIME_TYPE)
self.mime_magic.load()
except Exception, e:
self.errors.append(e)
# returns MIME type
def guess_mime(self, mime, filename, tempfile):
if mime == 'application/octet-stream':
mime = ''
if not mime and filename:
mime = mimetypes.guess_type(filename)[0]
if not mime and tempfile and self.mime_magic:
if type(tempfile) == type(''):
mime = self.mime_magic.file(tempfile)
else:
c = tempfile.read(4096)
mime = self.mime_magic.buffer(c)
return mime
# returns (utf8_content, charset)
def guess_charset(self, content):
# Try to guess with chardet
charset = None
if have_chardet:
# Try chardet
try:
charset = chardet.detect(content)
if charset and charset['encoding']:
charset = charset['encoding']
content = content.decode(charset)
except: charset = None
else:
# Try UTF-8
charset = 'utf-8'
try: content = content.decode('utf-8')
except: charset = None
# Then try to guess primitively
if charset is None:
for charset in self.encodings:
try:
content = content.decode(charset)
break
except: charset = None
return (content, charset)
# guess and encode return value into UTF-8
def utf8(self, content):
(uni, charset) = self.guess_charset(content)
if charset:
return uni.encode('utf-8')
return content

View File

@ -144,7 +144,7 @@ Browse Directory</a></p>
<tr>
<th style="text-align:right;vertical-align:top;">Comment:</th>
<td>
<input type="text" name="comment" value="[comment]" /><br />
<input type="text" name="comment" value="[comment]" size="40" /><br />
<label for="comment_match_exact">
<input type="radio" name="comment_match" id="comment_match_fulltext"
value="fulltext" [is comment_match "fulltext"]checked=""[end] />
@ -172,13 +172,21 @@ Browse Directory</a></p>
</label>
</td>
</tr>
[if-any enable_search_content]
<tr>
<th style="text-align:right;vertical-align:top;">Search content:</th>
<td><input type="text" name="search_content" value="[search_content]" size="60" /></td>
</tr>
[end]
<tr>
<th style="text-align:right;vertical-align:top;">Sort By:</th>
<td>
<select name="querysort">
<option value="date" [is querysort "date"]selected="selected"[end]>Date</option>
<option value="date_rev" [is querysort "date_rev"]selected="selected"[end]>Date (oldest first)</option>
<option value="author" [is querysort "author"]selected="selected"[end]>Author</option>
<option value="file" [is querysort "file"]selected="selected"[end]>File</option>
<option value="relevance" [is querysort "relevance"]selected="selected"[end]>Relevance</option>
</select>
</td>
</tr>

View File

@ -46,15 +46,18 @@
<tr class="vc_row_[if-index commits even]even[else]odd[end]">
<td style="vertical-align: top;">
[define rev_href][if-any commits.files.prefer_markup][commits.files.view_href][else][if-any commits.files.download_href][commits.files.download_href][end][end][end]
[if-any commits.files.rev][if-any rev_href]<a href="[rev_href]">[end][commits.files.rev][if-any rev_href]</a>[end][else]&nbsp;[end]
[if-any commits.files.rev][if-any rev_href]<a href="[rev_href]">[end][commits.files.rev][if-any rev_href]</a>[end][else]&nbsp;[end]
</td>
<td style="vertical-align: top;">
<a href="[commits.files.dir_href]">[commits.files.dir]/</a>
<a href="[commits.files.log_href]">[commits.files.file]</a>
[if-any commits.files.snippet]
<div class="snippet">[commits.files.snippet]</div>
[end]
</td>
[if-any show_branch]
<td style="vertical-align: top;">
[if-any commits.files.branch][commits.files.branch][else]&nbsp;[end]
[if-any commits.files.branch][commits.files.branch][else]&nbsp;[end]
</td>
[end]
<td style="vertical-align: top;">
@ -68,10 +71,10 @@
[is commits.files.type "Remove"]</del>[end]
</td>
<td style="vertical-align: top;">
[if-any commits.files.date][commits.files.date][else]&nbsp;[end]
[if-any commits.files.date][commits.files.date][else]&nbsp;[end]
</td>
<td style="vertical-align: top;">
[if-any commits.files.author][commits.files.author][else]&nbsp;[end]
[if-any commits.files.author][commits.files.author][else]&nbsp;[end]
</td>
</tr>
[end]