From e363cf19b162e7770ab9ebe2e8a5a2857212db87 Mon Sep 17 00:00:00 2001
From: vfilippov
')
+ s = s.replace(bm_html, snippet_options['before_match'])
+ s = s.replace(am_html, snippet_options['after_match'])
+ snippets[docid] = s
+ # Fetch all fields from MySQL
+ sql = self.CreateIdQueryString((docid for (docid, _, _, _) in sphinx_rows))
+ cursor = self.db.cursor()
+ cursor.execute(sql)
+ byid = {}
+ for row in cursor:
+ byid[str(row[0])] = row
+ rows = list(byid[docid] + (rel, snippets[docid]) for (docid, rel, _, _) in sphinx_rows if docid in byid)
+ else:
+ rows = []
+ else:
+ # Use regular queries when document content is not searched
+ sql = self.CreateSQLQueryString(query)
+ cursor = self.db.cursor()
+ cursor.execute(sql)
+ rows = list(cursor)
- while 1:
- row = cursor.fetchone()
- if not row:
- break
-
- (dbType, dbCI_When, dbAuthorID, dbRepositoryID, dbDirID,
+ # Convert rows to commit objects
+ for row in rows:
+ (dbId, dbType, dbCI_When, dbAuthorID, dbRepositoryID, dbDirID,
dbFileID, dbRevision, dbStickyTag, dbBranchID, dbAddedLines,
dbRemovedLines, dbDescID, dbRepositoryName, dbDirName,
- dbFileName, dbRelevance) = row
+ dbFileName, dbRelevance, dbSnippet) = row
if not self.check_commit_access(dbRepositoryName, dbDirName, dbFileName, dbRevision):
continue
@@ -564,6 +682,7 @@ class CheckinDatabase:
commit.SetMinusCount(dbRemovedLines)
commit.SetDescriptionID(dbDescID)
commit.SetRelevance(dbRelevance)
+ commit.SetSnippet(dbSnippet)
query.AddCommit(commit)
@@ -623,46 +742,21 @@ class CheckinDatabase:
raise UnknownRepositoryError("Unknown repository '%s'"
% (repository))
- if (self._version >= 1):
- self.sql_delete('repositories', 'id', rep_id)
- self.sql_purge('commits', 'repositoryid', 'id', 'repositories')
- self.sql_purge('files', 'id', 'fileid', 'commits')
- self.sql_purge('dirs', 'id', 'dirid', 'commits')
- self.sql_purge('branches', 'id', 'branchid', 'commits')
- self.sql_purge('descs', 'id', 'descid', 'commits')
- self.sql_purge('people', 'id', 'whoid', 'commits')
- else:
- sql = "SELECT * FROM checkins WHERE repositoryid=%s"
- sql_args = (rep_id, )
- cursor = self.db.cursor()
- cursor.execute(sql, sql_args)
- checkins = []
- while 1:
- try:
- (ci_type, ci_when, who_id, repository_id,
- dir_id, file_id, revision, sticky_tag, branch_id,
- plus_count, minus_count, description_id) = \
- cursor.fetchone()
- except TypeError:
- break
- checkins.append([file_id, dir_id, branch_id,
- description_id, who_id])
-
- #self.sql_delete('repositories', 'id', rep_id)
- self.sql_delete('checkins', 'repositoryid', rep_id)
- for checkin in checkins:
- self.sql_delete('files', 'id', checkin[0], 'fileid')
- self.sql_delete('dirs', 'id', checkin[1], 'dirid')
- self.sql_delete('branches', 'id', checkin[2], 'branchid')
- self.sql_delete('descs', 'id', checkin[3], 'descid')
- self.sql_delete('people', 'id', checkin[4], 'whoid')
+ checkins_table = self._version >= 1 and 'commits' or 'checkins'
+ self.sql_delete('repositories', 'id', rep_id)
+ self.sql_purge(checkins_table, 'repositoryid', 'id', 'repositories')
+ self.sql_purge('files', 'id', 'fileid', checkins_table)
+ self.sql_purge('dirs', 'id', 'dirid', checkins_table)
+ self.sql_purge('branches', 'id', 'branchid', checkins_table)
+ self.sql_purge('descs', 'id', 'descid', checkins_table)
+ self.sql_purge('people', 'id', 'whoid', checkins_table)
# Reset all internal id caches. We could be choosier here,
# but let's just be as safe as possible.
self._get_cache = {}
self._get_id_cache = {}
self._desc_id_cache = {}
-
+
class DatabaseVersionError(Exception):
pass
@@ -678,7 +772,7 @@ class Commit:
CHANGE = 0
ADD = 1
REMOVE = 2
-
+
def __init__(self):
self.__directory = ''
self.__file = ''
@@ -690,15 +784,20 @@ class Commit:
self.__minuscount = ''
self.__description = ''
self.__relevance = ''
+ self.__snippet = ''
self.__gmt_time = 0.0
self.__type = Commit.CHANGE
+ self.__content = ''
+ self.__mimetype = ''
+ self.__base_path = ''
+ self.__base_rev = ''
def SetRepository(self, repository):
self.__repository = repository
def GetRepository(self):
return self.__repository
-
+
def SetDirectory(self, dir):
self.__directory = dir
@@ -710,7 +809,7 @@ class Commit:
def GetFile(self):
return self.__file
-
+
def SetRevision(self, revision):
self.__revision = revision
@@ -758,12 +857,19 @@ class Commit:
def GetDescription(self):
return self.__description
+ # Relevance and snippet are used when querying commit database
def SetRelevance(self, relevance):
self.__relevance = relevance
def GetRelevance(self):
return self.__relevance
+ def SetSnippet(self, snippet):
+ self.__snippet = snippet
+
+ def GetSnippet(self):
+ return self.__snippet
+
def SetTypeChange(self):
self.__type = Commit.CHANGE
@@ -784,66 +890,80 @@ class Commit:
elif self.__type == Commit.REMOVE:
return 'Remove'
+ # File content (extracted text), optional, indexed with Sphinx
+ def SetContent(self, content):
+ self.__content = content
+
+ def GetContent(self):
+ return self.__content
+
+ # MIME type, optional, now only stored in Sphinx
+ def SetMimeType(self, mimetype):
+ self.__mimetype = mimetype
+
+ def GetMimeType(self):
+ return self.__mimetype
+
## LazyCommit overrides a few methods of Commit to only retrieve
## it's properties as they are needed
class LazyCommit(Commit):
- def __init__(self, db):
- Commit.__init__(self)
- self.__db = db
+ def __init__(self, db):
+ Commit.__init__(self)
+ self.__db = db
- def SetFileID(self, dbFileID):
- self.__dbFileID = dbFileID
+ def SetFileID(self, dbFileID):
+ self.__dbFileID = dbFileID
- def GetFileID(self):
- return self.__dbFileID
+ def GetFileID(self):
+ return self.__dbFileID
- def GetFile(self):
- return self.__db.GetFile(self.__dbFileID)
+ def GetFile(self):
+ return self.__db.GetFile(self.__dbFileID)
- def SetDirectoryID(self, dbDirID):
- self.__dbDirID = dbDirID
+ def SetDirectoryID(self, dbDirID):
+ self.__dbDirID = dbDirID
- def GetDirectoryID(self):
- return self.__dbDirID
+ def GetDirectoryID(self):
+ return self.__dbDirID
- def GetDirectory(self):
- return self.__db.GetDirectory(self.__dbDirID)
+ def GetDirectory(self):
+ return self.__db.GetDirectory(self.__dbDirID)
- def SetRepositoryID(self, dbRepositoryID):
- self.__dbRepositoryID = dbRepositoryID
+ def SetRepositoryID(self, dbRepositoryID):
+ self.__dbRepositoryID = dbRepositoryID
- def GetRepositoryID(self):
- return self.__dbRepositoryID
+ def GetRepositoryID(self):
+ return self.__dbRepositoryID
- def GetRepository(self):
- return self.__db.GetRepository(self.__dbRepositoryID)
+ def GetRepository(self):
+ return self.__db.GetRepository(self.__dbRepositoryID)
- def SetAuthorID(self, dbAuthorID):
- self.__dbAuthorID = dbAuthorID
+ def SetAuthorID(self, dbAuthorID):
+ self.__dbAuthorID = dbAuthorID
- def GetAuthorID(self):
- return self.__dbAuthorID
+ def GetAuthorID(self):
+ return self.__dbAuthorID
- def GetAuthor(self):
- return self.__db.GetAuthor(self.__dbAuthorID)
+ def GetAuthor(self):
+ return self.__db.GetAuthor(self.__dbAuthorID)
- def SetBranchID(self, dbBranchID):
- self.__dbBranchID = dbBranchID
+ def SetBranchID(self, dbBranchID):
+ self.__dbBranchID = dbBranchID
- def GetBranchID(self):
- return self.__dbBranchID
+ def GetBranchID(self):
+ return self.__dbBranchID
- def GetBranch(self):
- return self.__db.GetBranch(self.__dbBranchID)
+ def GetBranch(self):
+ return self.__db.GetBranch(self.__dbBranchID)
- def SetDescriptionID(self, dbDescID):
- self.__dbDescID = dbDescID
+ def SetDescriptionID(self, dbDescID):
+ self.__dbDescID = dbDescID
- def GetDescriptionID(self):
- return self.__dbDescID
+ def GetDescriptionID(self):
+ return self.__dbDescID
- def GetDescription(self):
- return self.__db.GetDescription(self.__dbDescID)
+ def GetDescription(self):
+ return self.__db.GetDescription(self.__dbDescID)
## QueryEntry holds data on one match-type in the SQL database
## match is: "exact", "like", or "regex"
@@ -858,8 +978,8 @@ class CheckinDatabaseQuery:
def __init__(self):
## sorting
self.sort = "date"
-
- ## repository to query
+
+ ## repository, branch, etc to query
self.repository_list = []
self.branch_list = []
self.directory_list = []
@@ -867,7 +987,11 @@ class CheckinDatabaseQuery:
self.revision_list = []
self.author_list = []
self.comment_list = []
+
+ ## text_query = Fulltext query on comments
+ ## content_query = Fulltext query on content
self.text_query = ""
+ self.content_query = ""
## date range in DBI 2.0 timedate objects
self.from_date = None
@@ -886,6 +1010,9 @@ class CheckinDatabaseQuery:
def SetTextQuery(self, query):
self.text_query = query
+ def SetContentQuery(self, query):
+ self.content_query = query
+
def SetRepository(self, repository, match = "exact"):
self.repository_list.append(QueryEntry(repository, match))
@@ -921,7 +1048,7 @@ class CheckinDatabaseQuery:
def SetFromDateHoursAgo(self, hours_ago):
ticks = time.time() - (3600 * hours_ago)
self.from_date = dbi.DateTimeFromTicks(ticks)
-
+
def SetFromDateDaysAgo(self, days_ago):
ticks = time.time() - (86400 * days_ago)
self.from_date = dbi.DateTimeFromTicks(ticks)
@@ -942,7 +1069,7 @@ class CheckinDatabaseQuery:
##
def CreateCommit():
return Commit()
-
+
def CreateCheckinQuery():
return CheckinDatabaseQuery()
@@ -953,9 +1080,23 @@ def ConnectDatabase(cfg, authorizer=None, readonly=0):
else:
user = cfg.cvsdb.user
passwd = cfg.cvsdb.passwd
- db = CheckinDatabase(cfg.cvsdb.host, cfg.cvsdb.port, cfg.cvsdb.socket, user, passwd,
- cfg.cvsdb.database_name, cfg.cvsdb.row_limit, cfg.cvsdb.fulltext_min_relevance,
- authorizer)
+ db = CheckinDatabase(
+ host = cfg.cvsdb.host,
+ port = cfg.cvsdb.port,
+ socket = cfg.cvsdb.socket,
+ user = user,
+ passwd = passwd,
+ database = cfg.cvsdb.database_name,
+ row_limit = cfg.cvsdb.row_limit,
+ min_relevance = cfg.cvsdb.fulltext_min_relevance,
+ authorizer = authorizer,
+ index_content = cfg.cvsdb.index_content,
+ sphinx_host = cfg.cvsdb.sphinx_host,
+ sphinx_port = int(cfg.cvsdb.sphinx_port),
+ sphinx_socket = cfg.cvsdb.sphinx_socket,
+ sphinx_index = cfg.cvsdb.sphinx_index,
+ cfg = cfg,
+ )
db.Connect()
return db
diff --git a/lib/vclib/ccvs/bincvs.py b/lib/vclib/ccvs/bincvs.py
index c2b9430d..fee243fb 100644
--- a/lib/vclib/ccvs/bincvs.py
+++ b/lib/vclib/ccvs/bincvs.py
@@ -31,8 +31,8 @@ import popen
class BaseCVSRepository(vclib.Repository):
def __init__(self, name, rootpath, authorizer, utilities):
if not os.path.isdir(rootpath):
- raise vclib.ReposNotFound(name)
-
+ raise vclib.ReposNotFound(name)
+
self.name = name
self.rootpath = rootpath
self.auth = authorizer
@@ -53,7 +53,7 @@ class BaseCVSRepository(vclib.Repository):
def authorizer(self):
return self.auth
-
+
def itemtype(self, path_parts, rev):
basepath = self._getpath(path_parts)
kind = None
@@ -74,12 +74,12 @@ class BaseCVSRepository(vclib.Repository):
def itemprops(self, path_parts, rev):
self.itemtype(path_parts, rev) # does auth-check
return {} # CVS doesn't support properties
-
+
def listdir(self, path_parts, rev, options):
if self.itemtype(path_parts, rev) != vclib.DIR: # does auth-check
raise vclib.Error("Path '%s' is not a directory."
% (string.join(path_parts, "/")))
-
+
# Only RCS files (*,v) and subdirs are returned.
data = [ ]
full_name = self._getpath(path_parts)
@@ -115,7 +115,7 @@ class BaseCVSRepository(vclib.Repository):
data.append(CVSDirEntry(name, kind, errors, 1))
return data
-
+
def _getpath(self, path_parts):
return apply(os.path.join, (self.rootpath,) + tuple(path_parts))
@@ -177,7 +177,7 @@ class BinCVSRepository(BaseCVSRepository):
used_rlog = 0
tip_rev = None # used only if we have to fallback to using rlog
- fp = self.rcs_popen('co', (rev_flag, full_name), 'rb')
+ fp = self.rcs_popen('co', (rev_flag, full_name), 'rb')
try:
filename, revision = _parse_co_header(fp)
except COMissingRevision:
@@ -191,14 +191,14 @@ class BinCVSRepository(BaseCVSRepository):
used_rlog = 1
if not tip_rev:
raise vclib.Error("Unable to find valid revision")
- fp = self.rcs_popen('co', ('-p' + tip_rev.string, full_name), 'rb')
+ fp = self.rcs_popen('co', ('-p' + tip_rev.string, full_name), 'rb')
filename, revision = _parse_co_header(fp)
-
+
if filename is None:
# CVSNT's co exits without any output if a dead revision is requested.
# Bug at http://www.cvsnt.org/cgi-bin/bugzilla/show_bug.cgi?id=190
# As a workaround, we invoke rlog to find the first non-dead revision
- # that precedes it and check out that revision instead. Of course,
+ # that precedes it and check out that revision instead. Of course,
# if we've already invoked rlog above, we just reuse its output.
if not used_rlog:
tip_rev = self._get_tip_revision(full_name + ',v', rev)
@@ -207,7 +207,7 @@ class BinCVSRepository(BaseCVSRepository):
raise vclib.Error(
'Could not find non-dead revision preceding "%s"' % rev)
fp = self.rcs_popen('co', ('-p' + tip_rev.undead.string,
- full_name), 'rb')
+ full_name), 'rb')
filename, revision = _parse_co_header(fp)
if filename is None:
@@ -278,7 +278,7 @@ class BinCVSRepository(BaseCVSRepository):
if self.itemtype(path_parts, rev) != vclib.FILE: # does auth-check
raise vclib.Error("Path '%s' is not a file."
% (string.join(path_parts, "/")))
-
+
# Invoke rlog
rcsfile = self.rcsfile(path_parts, 1)
if rev and options.get('cvs_pass_rev', 0):
@@ -341,7 +341,7 @@ class BinCVSRepository(BaseCVSRepository):
def revinfo(self, rev):
raise vclib.UnsupportedFeature
-
+
def rawdiff(self, path_parts1, rev1, path_parts2, rev2, type, options={}):
"""see vclib.Repository.rawdiff docstring
@@ -439,9 +439,9 @@ def _match_revs_tags(revlist, taglist):
example: if revision is 1.2.3.4, parent is 1.2
"undead"
- If the revision is dead, then this is a reference to the first
+ If the revision is dead, then this is a reference to the first
previous revision which isn't dead, otherwise it's a reference
- to itself. If all the previous revisions are dead it's None.
+ to itself. If all the previous revisions are dead it's None.
"branch_number"
tuple representing branch number or empty tuple if on trunk
@@ -653,7 +653,7 @@ def _parse_co_header(fp):
pass
else:
break
-
+
raise COMalformedOutput, "Unable to find revision in co output stream"
# if your rlog doesn't use 77 '=' characters, then this must change
@@ -674,7 +674,7 @@ _EOF_ERROR = 'error message found' # rlog issued an error
# ^rlog\: (.*)(?:\:\d+)?\: (.*)$
#
# But for some reason the windows version of rlog omits the "rlog: " prefix
-# for the first error message when the standard error stream has been
+# for the first error message when the standard error stream has been
# redirected to a file or pipe. (the prefix is present in subsequent errors
# and when rlog is run from the console). So the expression below is more
# complicated
@@ -703,7 +703,7 @@ def _parse_log_header(fp):
Returns: filename, default branch, tag dictionary, lock dictionary,
rlog error message, and eof flag
"""
-
+
filename = head = branch = msg = ""
taginfo = { } # tag name => number
lockinfo = { } # revision => locker
@@ -732,7 +732,7 @@ def _parse_log_header(fp):
else:
# oops. this line isn't lock info. stop parsing tags.
state = 0
-
+
if state == 0:
if line[:9] == 'RCS file:':
filename = line[10:-1]
@@ -902,7 +902,7 @@ def _file_log(revs, taginfo, lockinfo, cur_branch, filter):
except ValueError:
view_tag = None
else:
- tags.append(view_tag)
+ tags.append(view_tag)
# Match up tags and revisions
_match_revs_tags(revs, tags)
@@ -910,13 +910,13 @@ def _file_log(revs, taginfo, lockinfo, cur_branch, filter):
# Match up lockinfo and revision
for rev in revs:
rev.lockinfo = lockinfo.get(rev.string)
-
+
# Add artificial ViewVC tag HEAD, which acts like a non-branch tag pointing
# at the latest revision on the MAIN branch. The HEAD revision doesn't have
# anything to do with the "head" revision number specified in the RCS file
# and in rlog output. HEAD refers to the revision that the CVS and RCS co
# commands will check out by default, whereas the "head" field just refers
- # to the highest revision on the trunk.
+ # to the highest revision on the trunk.
taginfo['HEAD'] = _add_tag('HEAD', taginfo['MAIN'].co_rev)
# Determine what revisions to return
@@ -954,7 +954,7 @@ def _file_log(revs, taginfo, lockinfo, cur_branch, filter):
_remove_tag(view_tag)
else:
filtered_revs = revs
-
+
return filtered_revs
def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
@@ -1004,7 +1004,7 @@ def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
= _parse_log_header(rlog)
if eof == _EOF_LOG:
- # the rlog output ended early. this can happen on errors that rlog
+ # the rlog output ended early. this can happen on errors that rlog
# thinks are so serious that it stops parsing the current file and
# refuses to parse any of the files that come after it. one of the
# errors that triggers this obnoxious behavior looks like:
@@ -1052,8 +1052,8 @@ def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
tag = None
# we don't care about the specific values -- just the keys and whether
- # the values point to branches or revisions. this the fastest way to
- # merge the set of keys and keep values that allow us to make the
+ # the values point to branches or revisions. this the fastest way to
+ # merge the set of keys and keep values that allow us to make the
# distinction between branch tags and normal tags
alltags.update(taginfo)
@@ -1098,7 +1098,7 @@ def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
file.dead = 0
#file.errors.append("No revisions exist on %s" % (view_tag or "MAIN"))
file.absent = 1
-
+
# done with this file now, skip the rest of this file's revisions
if not eof:
_skip_file(rlog)
@@ -1211,7 +1211,7 @@ def _newest_file(dirpath):
newest_time = 0
### FIXME: This sucker is leaking unauthorized paths! ###
-
+
for subfile in os.listdir(dirpath):
### filter CVS locks? stale NFS handles?
if subfile[-2:] != ',v':
diff --git a/lib/viewvc.py b/lib/viewvc.py
index 84f5db9a..44a93a5d 100644
--- a/lib/viewvc.py
+++ b/lib/viewvc.py
@@ -1,4 +1,3 @@
-#
# Copyright (C) 1999-2009 The ViewCVS Group. All Rights Reserved.
#
# By using this file, you agree to the terms and conditions set forth in
@@ -68,7 +67,6 @@ docroot_magic_path = '*docroot*'
viewcvs_mime_type = 'text/vnd.viewcvs-markup'
alt_mime_type = 'text/x-cvsweb-markup'
view_roots_magic = '*viewroots*'
-magic_buf_size = 4096
default_mime_type = 'application/octet-stream'
# Put here the variables we need in order to hold our state - they
@@ -121,9 +119,8 @@ class Request:
# check for an authenticated username
self.username = server.getenv('REMOTE_USER')
- # construct MIME magic
- self.ms = None
- self.ms_fail = 0
+ # repository object cache
+ self.all_repos = {}
# if we allow compressed output, see if the client does too
self.gzip_compress_level = 0
@@ -134,6 +131,9 @@ class Request:
string.split(http_accept_encoding, ","))):
self.gzip_compress_level = 9 # make this configurable?
+ def utf8(self, value):
+ return self.cfg.guesser().utf8(value)
+
def create_repos(self, rootname):
if not rootname:
return None
@@ -677,7 +677,7 @@ def _validate_mimetype(value):
return value in (viewcvs_mime_type, alt_mime_type, 'text/plain')
# obvious things here. note that we don't need uppercase for alpha.
-_re_validate_alpha = re.compile('^[a-z]+$')
+_re_validate_alpha = re.compile('^[a-z_]+$')
_re_validate_number = re.compile('^[0-9]+$')
_re_validate_boolint = re.compile('^[01]$')
@@ -743,6 +743,7 @@ _legal_params = {
'who_match' : _re_validate_alpha,
'comment' : None,
'comment_match' : _re_validate_alpha,
+ 'search_content': None,
'querysort' : _re_validate_alpha,
'date' : _re_validate_alpha,
'hours' : _re_validate_number,
@@ -988,7 +989,7 @@ def nav_path(request):
is_last = len(path_parts) == len(request.path_parts)
if request.roottype == 'cvs':
- item = _item(name=cvsdb.utf8string(part), href=None)
+ item = _item(name=request.utf8(part), href=None)
else:
item = _item(name=part, href=None)
@@ -1248,7 +1249,7 @@ def common_template_data(request, revision=None, mime_type=None):
cfg = request.cfg
where = request.where
if request.roottype == 'cvs':
- where = cvsdb.utf8string(where)
+ where = request.utf8(where)
where = request.server.escape(where)
# Initialize data dictionary members (sorted alphanumerically)
@@ -1444,28 +1445,31 @@ def markup_stream_pygments(request, cfg, blame_data, fp, filename, mime_type):
get_lexer_for_mimetype, \
get_lexer_for_filename
from pygments.lexers._mapping import LEXERS
+ # Hack for shell mime types:
LEXERS['BashLexer'] = ('pygments.lexers.other', 'Bash', ('bash', 'sh'), ('*.sh',), ('application/x-sh', 'application/x-shellscript', 'text/x-sh', 'text/x-shellscript'))
- encoding = 'guess'
- if cfg.options.detect_encoding:
- try:
- import chardet
- encoding = 'chardet'
- except (SyntaxError, ImportError):
- pass
try:
lexer = get_lexer_for_mimetype(mime_type,
- encoding=encoding,
+ encoding='utf-8',
stripnl=False)
except ClassNotFound:
try:
lexer = get_lexer_for_filename(filename,
- encoding=encoding,
+ encoding='utf-8',
stripnl=False)
except ClassNotFound:
use_pygments = 0
except ImportError:
use_pygments = 0
+ # Detect encoding by calling chardet ourselves,
+ # to support it in non-highlighting mode
+ content = fp.read()
+ c, encoding = cfg.guesser().guess_charset(content)
+ if encoding:
+ content = c
+ else:
+ encoding = 'unknown'
+
# If we aren't going to be highlighting anything, just return the
# BLAME_SOURCE. If there's no blame_source, we'll generate a fake
# one from the file contents we fetch with PATH and REV.
@@ -1475,11 +1479,7 @@ def markup_stream_pygments(request, cfg, blame_data, fp, filename, mime_type):
else:
lines = []
line_no = 0
- while 1:
- line = fp.readline()
- if not line:
- break
- line = cvsdb.utf8string(line)
+ for line in content.split('\n'):
line_no = line_no + 1
item = vclib.Annotation(cgi.escape(line), line_no,
None, None, None, None)
@@ -1508,19 +1508,11 @@ def markup_stream_pygments(request, cfg, blame_data, fp, filename, mime_type):
self.blame_data.append(item)
self.line_no = self.line_no + 1
ps = PygmentsSink(blame_source)
- fpd = fp.read()
- try:
- fpdat = unicode(fpd,'utf-8')
- except:
- try:
- fpdat = unicode(fpd,'cp1251')
- except:
- fpdat = fpd
- highlight(fpdat, lexer,
+ highlight(content, lexer,
HtmlFormatter(nowrap=True,
classprefix='pygments-',
encoding='utf-8'), ps)
- return ps.blame_data
+ return ps.blame_data, encoding
def make_time_string(date, cfg):
"""Returns formatted date string in either local time or UTC.
@@ -1594,6 +1586,7 @@ def calculate_mime_type(request, path_parts, rev):
return mime_type
except:
pass
+ # FIXME rewrite to use viewvcmagic
return guess_mime(path_parts[-1])
def markup_or_annotate(request, is_annotate):
@@ -1605,21 +1598,12 @@ def markup_or_annotate(request, is_annotate):
mime_type = calculate_mime_type(request, path, rev)
if not mime_type or mime_type == default_mime_type:
- if request.ms is None and not request.ms_fail:
- try:
- import magic
- request.ms = magic.open(magic.MAGIC_NONE | magic.MAGIC_MIME)
- request.ms.load()
- except:
- request.ms_fail = 1
- if request.ms:
- try:
- fp, revision = request.repos.openfile(path, rev)
- buffer = fp.read(magic_buf_size)
- fp.close()
- mime_type = request.ms.buffer(buffer)
- except:
- pass
+ try:
+ fp, revision = request.repos.openfile(path, rev)
+ mime_type = request.cfg.guesser().guess_mime(None, None, fp)
+ fp.close()
+ except:
+ raise
# Is this a binary type?
if is_binary(request.cfg, mime_type):
@@ -1657,9 +1641,10 @@ def markup_or_annotate(request, is_annotate):
if check_freshness(request, None, revision, weak=1):
fp.close()
return
- lines = markup_stream_pygments(request, cfg, blame_source, fp,
- path[-1], mime_type)
+ lines, charset = markup_stream_pygments(request, cfg, blame_source, fp, path[-1], mime_type)
fp.close()
+ if mime_type.find(';') < 0:
+ mime_type = mime_type+'; charset='+charset
data = common_template_data(request, revision)
data.merge(ezt.TemplateData({
@@ -1910,7 +1895,7 @@ def view_directory(request):
row.short_log = format_log(file.log, cfg)
row.log = htmlify(file.log, cfg.options.mangle_email_addresses)
row.lockinfo = file.lockinfo
- row.name = request.server.escape(cvsdb.utf8string(file.name))
+ row.name = request.server.escape(request.utf8(file.name))
row.anchor = row.name
row.pathtype = (file.kind == vclib.FILE and 'file') or \
(file.kind == vclib.DIR and 'dir')
@@ -2285,7 +2270,7 @@ def view_log(request):
entry.ago = html_time(request, rev.date, 1)
entry.log = rev.log or ""
if cvs:
- entry.log = cvsdb.utf8string(entry.log)
+ entry.log = request.utf8(entry.log)
entry.log = htmlify(entry.log, cfg.options.mangle_email_addresses)
entry.size = rev.size
entry.lockinfo = rev.lockinfo
@@ -2770,7 +2755,7 @@ class DiffSource:
self.save_line = None
self.line_number = None
self.prev_line_number = None
-
+
# keep track of where we are during an iteration
self.idx = -1
self.last = None
@@ -2867,7 +2852,7 @@ class DiffSource:
diff_code = line[0]
output = self._format_text(line[1:])
- output = cvsdb.utf8string(output)
+ output = self.cfg.guesser().utf8(output)
if diff_code == '+':
if self.state == 'dump':
@@ -3644,6 +3629,7 @@ def view_queryform(request):
'who_match' : request.query_dict.get('who_match', 'exact'),
'comment' : request.query_dict.get('comment', ''),
'comment_match' : request.query_dict.get('comment_match', 'fulltext'),
+ 'search_content' : request.query_dict.get('search_content', ''),
'querysort' : request.query_dict.get('querysort', 'date'),
'date' : request.query_dict.get('date', 'hours'),
'hours' : request.query_dict.get('hours', '2'),
@@ -3653,6 +3639,7 @@ def view_queryform(request):
'query_hidden_values' : query_hidden_values,
'limit_changes' : limit_changes,
'dir_href' : dir_href,
+ 'enable_search_content' : request.cfg.cvsdb.index_content,
}))
generate_page(request, "query_form", data)
@@ -3791,7 +3778,8 @@ def build_commit(request, files, max_files, dir_strip, format):
plus_count = 0
minus_count = 0
found_unreadable = 0
- all_repos = {}
+ if not request.all_repos:
+ request.all_repos = {}
for f in files:
dirname = f.GetDirectory()
@@ -3810,17 +3798,19 @@ def build_commit(request, files, max_files, dir_strip, format):
# Check path access (since the commits database logic bypasses the
# vclib layer and, thus, the vcauth stuff that layer uses).
- my_repos = all_repos.get(f.GetRepository(), '')
+ my_repos = request.all_repos.get(f.GetRepository(), '')
if not my_repos:
try:
- my_repos = all_repos[f.GetRepository()] = request.create_repos(f.GetRepository())
+ my_repos = request.all_repos[f.GetRepository()] = request.create_repos(f.GetRepository())
except:
my_repos = None
if not my_repos:
return None
if my_repos['roottype'] == 'cvs':
- try: where = unicode(where,'utf-8')
+ # we store UTF-8 in the DB
+ try: where = where.decode('utf-8')
except: pass
+ # FIXME maybe store "real" filesystem path in the DB instead of having such setting?
try: where = where.encode(cfg.options.cvs_ondisk_charset)
except: pass
path_parts = _path_parts(where)
@@ -3907,24 +3897,27 @@ def build_commit(request, files, max_files, dir_strip, format):
if max_files and num_allowed > max_files:
continue
- commit_files.append(_item(date=commit_time,
- dir=request.server.escape(dirname),
- file=request.server.escape(filename),
- author=request.server.escape(f.GetAuthor()),
- rev=rev,
- branch=f.GetBranch(),
- plus=plus,
- minus=minus,
- type=change_type,
- dir_href=dir_href,
- log_href=log_href,
- view_href=view_href,
- download_href=download_href,
- prefer_markup=prefer_markup,
- diff_href=diff_href,
- root=my_repos,
- path=where,
- path_prev=path_prev))
+ commit_files.append(_item(
+ date=commit_time,
+ dir=request.server.escape(dirname),
+ file=request.server.escape(filename),
+ author=request.server.escape(f.GetAuthor()),
+ rev=rev,
+ branch=f.GetBranch(),
+ plus=plus,
+ minus=minus,
+ type=change_type,
+ snippet=f.GetSnippet(),
+ dir_href=dir_href,
+ log_href=log_href,
+ view_href=view_href,
+ download_href=download_href,
+ prefer_markup=prefer_markup,
+ diff_href=diff_href,
+ root=my_repos,
+ path=where,
+ path_prev=path_prev,
+ ))
# No files survived authz checks? Let's just pretend this
# little commit didn't happen, shall we?
@@ -4115,6 +4108,7 @@ def view_query(request):
who_match = request.query_dict.get('who_match', 'exact')
comment = request.query_dict.get('comment', '')
comment_match = request.query_dict.get('comment_match', 'fulltext')
+ search_content = request.query_dict.get('search_content', '')
querysort = request.query_dict.get('querysort', 'date')
date = request.query_dict.get('date', 'hours')
hours = request.query_dict.get('hours', '2')
@@ -4126,7 +4120,7 @@ def view_query(request):
cfg.options.limit_changes))
match_types = { 'exact':1, 'like':1, 'glob':1, 'regex':1, 'notregex':1 }
- sort_types = { 'date':1, 'author':1, 'file':1 }
+ sort_types = { 'date':1, 'date_rev':1, 'author':1, 'file':1, 'relevance':1 }
date_types = { 'hours':1, 'day':1, 'week':1, 'month':1,
'all':1, 'explicit':1 }
@@ -4193,6 +4187,8 @@ def view_query(request):
query.SetComment(comment, comment_match)
else:
query.SetTextQuery(comment)
+ if search_content:
+ query.SetContentQuery(search_content)
query.SetSortMethod(querysort)
if date == 'hours':
query.SetFromDateHoursAgo(int(hours))
diff --git a/lib/viewvcmagic.py b/lib/viewvcmagic.py
new file mode 100644
index 00000000..5f8b3ea8
--- /dev/null
+++ b/lib/viewvcmagic.py
@@ -0,0 +1,70 @@
+#!/usr/bin/python
+
+import mimetypes
+
+have_chardet = 0
+try:
+ import chardet
+ have_chardet = 1
+except: pass
+
+class ContentMagic:
+
+ def __init__(self, encodings):
+ self.encodings = encodings.split(':')
+ self.mime_magic = None
+ self.errors = []
+ # Try to load magic
+ try:
+ import magic
+ self.mime_magic = magic.open(magic.MAGIC_MIME_TYPE)
+ self.mime_magic.load()
+ except Exception, e:
+ self.errors.append(e)
+
+ # returns MIME type
+ def guess_mime(self, mime, filename, tempfile):
+ if mime == 'application/octet-stream':
+ mime = ''
+ if not mime and filename:
+ mime = mimetypes.guess_type(filename)[0]
+ if not mime and tempfile and self.mime_magic:
+ if type(tempfile) == type(''):
+ mime = self.mime_magic.file(tempfile)
+ else:
+ c = tempfile.read(4096)
+ mime = self.mime_magic.buffer(c)
+ return mime
+
+ # returns (utf8_content, charset)
+ def guess_charset(self, content):
+ # Try to guess with chardet
+ charset = None
+ if have_chardet:
+ # Try chardet
+ try:
+ charset = chardet.detect(content)
+ if charset and charset['encoding']:
+ charset = charset['encoding']
+ content = content.decode(charset)
+ except: charset = None
+ else:
+ # Try UTF-8
+ charset = 'utf-8'
+ try: content = content.decode('utf-8')
+ except: charset = None
+ # Then try to guess primitively
+ if charset is None:
+ for charset in self.encodings:
+ try:
+ content = content.decode(charset)
+ break
+ except: charset = None
+ return (content, charset)
+
+ # guess and encode return value into UTF-8
+ def utf8(self, content):
+ (uni, charset) = self.guess_charset(content)
+ if charset:
+ return uni.encode('utf-8')
+ return content
diff --git a/templates/query_form.ezt b/templates/query_form.ezt
index ec28ffdc..4919bc5b 100644
--- a/templates/query_form.ezt
+++ b/templates/query_form.ezt
@@ -144,7 +144,7 @@ Browse Directory