Bug 82651 - Remove rest of UTF-8 ugly hacks

git-svn-id: svn://svn.office.custis.ru/3rdparty/viewvc.org/trunk@1394 6955db30-a419-402b-8a0d-67ecbb4d7f56
remotes/github/custis
vfilippov 2011-09-29 12:27:02 +00:00 committed by Vitaliy Filippov
parent 8dc02448bc
commit ecadbf9fd3
7 changed files with 34 additions and 22 deletions

View File

@ -195,7 +195,7 @@ if __name__ == '__main__':
if command in ('rebuild', 'update'):
repository = vclib.ccvs.CVSRepository(None, rootpath, None,
cfg.utilities, 0)
cfg.utilities, 0, cfg.guesser())
latest_checkin = db.GetLatestCheckinTime(repository)
if latest_checkin is None:
command = 'rebuild'

View File

@ -373,6 +373,9 @@ class CheckinDatabase:
if self.index_content:
sphcur = self.sphinx.cursor()
content = commit.GetContent()
# Sphinx has 4 MB text field limit
if len(content) >= 4*1024*1024:
content = content[0:4*1024*1024]
props['ci_when'] = str(int(commit.GetTime() or 0))
if len(content):
props['content'] = content
@ -461,7 +464,7 @@ class CheckinDatabase:
elif query.sort == 'date_rev':
order_by = 'ORDER BY `ci_when` ASC, `relevance` DESC'
else: # /* if query.sort == 'relevance' */
order_by = 'ORDER BY `relevance` DESC'
order_by = 'ORDER BY `relevance` DESC, `ci_when` DESC'
conditions = string.join((i for i in condList if i), " AND ")
conditions = conditions and "WHERE %s" % conditions
@ -618,7 +621,7 @@ class CheckinDatabase:
'limit': 200,
'before_match': '<span style="color:red">',
'after_match': '</span>',
'chunk_separator': ' ... ',
'chunk_separator': ' ...\n',
}
preformatted_mime = 'text/(?!html|xml).*'
snippets = {}
@ -700,7 +703,7 @@ class CheckinDatabase:
return None
commits_table = self._version >= 1 and 'commits' or 'checkins'
sql = "SELECT * FROM %s WHERE "\
sql = "SELECT whoid FROM %s WHERE "\
" repositoryid=%%s "\
" AND dirid=%%s"\
" AND fileid=%%s"\
@ -711,9 +714,7 @@ class CheckinDatabase:
cursor = self.db.cursor()
cursor.execute(sql, sql_args)
try:
(ci_type, ci_when, who_id, repository_id,
dir_id, file_id, revision, sticky_tag, branch_id,
plus_count, minus_count, description_id) = cursor.fetchone()
who_id, = cursor.fetchone()
except TypeError:
return None

View File

@ -33,11 +33,11 @@ def expand_root_parent(parent_path):
return roots
def CVSRepository(name, rootpath, authorizer, utilities, use_rcsparse):
def CVSRepository(name, rootpath, authorizer, utilities, use_rcsparse, charset_guesser = None):
rootpath = canonicalize_rootpath(rootpath)
if use_rcsparse:
import ccvs
return ccvs.CCVSRepository(name, rootpath, authorizer, utilities)
return ccvs.CCVSRepository(name, rootpath, authorizer, utilities, charset_guesser)
else:
import bincvs
return bincvs.BinCVSRepository(name, rootpath, authorizer, utilities)
return bincvs.BinCVSRepository(name, rootpath, authorizer, utilities, charset_guesser)

View File

@ -29,7 +29,7 @@ import compat
import popen
class BaseCVSRepository(vclib.Repository):
def __init__(self, name, rootpath, authorizer, utilities):
def __init__(self, name, rootpath, authorizer, utilities, charset_guesser = None):
if not os.path.isdir(rootpath):
raise vclib.ReposNotFound(name)
@ -37,6 +37,7 @@ class BaseCVSRepository(vclib.Repository):
self.rootpath = rootpath
self.auth = authorizer
self.utilities = utilities
self.guesser = charset_guesser
# See if this repository is even viewable, authz-wise.
if not vclib.check_root_access(self):
@ -156,7 +157,7 @@ class BinCVSRepository(BaseCVSRepository):
filename, default_branch, tags, lockinfo, msg, eof = _parse_log_header(fp)
revs = []
while not eof:
revision, eof = _parse_log_entry(fp)
revision, eof = _parse_log_entry(fp, self.guesser)
if revision:
revs.append(revision)
revs = _file_log(revs, tags, lockinfo, default_branch, rev)
@ -246,7 +247,7 @@ class BinCVSRepository(BaseCVSRepository):
for entry in entries:
if vclib.check_path_access(self, path_parts + [entry.name], None, rev):
entries_to_fetch.append(entry)
alltags = _get_logs(self, path_parts, entries_to_fetch, rev, subdirs)
alltags = _get_logs(self, path_parts, entries_to_fetch, rev, subdirs, self.guesser)
branches = options['cvs_branches'] = []
tags = options['cvs_tags'] = []
for name, rev in alltags.items():
@ -292,7 +293,7 @@ class BinCVSRepository(BaseCVSRepository):
# Retrieve revision objects
revs = []
while not eof:
revision, eof = _parse_log_entry(fp)
revision, eof = _parse_log_entry(fp, self.guesser)
if revision:
revs.append(revision)
@ -783,7 +784,7 @@ _re_log_info = re.compile(r'^date:\s+([^;]+);'
r'(\s+commitid:\s+([a-zA-Z0-9]+))?\n$')
### _re_rev should be updated to extract the "locked" flag
_re_rev = re.compile(r'^revision\s+([0-9.]+).*')
def _parse_log_entry(fp):
def _parse_log_entry(fp, guesser):
"""Parse a single log entry.
On entry, fp should point to the first line of the entry (the "revision"
@ -849,7 +850,8 @@ def _parse_log_entry(fp):
raise ValueError, 'invalid year'
date = compat.timegm(tm)
log = cvsdb.utf8string(log)
if guesser:
log = guesser.utf8(log)
return Revision(rev, date,
# author, state, lines changed
@ -957,7 +959,7 @@ def _file_log(revs, taginfo, lockinfo, cur_branch, filter):
return filtered_revs
def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs, guesser):
alltags = { # all the tags seen in the files of this dir
'MAIN' : '',
'HEAD' : '1.1'
@ -1062,7 +1064,7 @@ def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
while not eof:
# fetch one of the log entries
entry, eof = _parse_log_entry(rlog)
entry, eof = _parse_log_entry(rlog, guesser)
if not entry:
# parsing error

View File

@ -415,7 +415,7 @@ class CVSParser(rcsparse.Sink):
class BlameSource:
def __init__(self, rcs_file, opt_rev=None):
def __init__(self, rcs_file, opt_rev=None, charset_guesser=None):
# Parse the CVS file
parser = CVSParser()
revision = parser.parse_cvs_file(rcs_file, opt_rev)
@ -429,6 +429,7 @@ class BlameSource:
self.lines = lines
self.num_lines = count
self.parser = parser
self.guesser = charset_guesser
# keep track of where we are during an iteration
self.idx = -1
@ -447,7 +448,10 @@ class BlameSource:
prev_rev = self.parser.prev_revision.get(rev)
line_number = idx + 1
author = self.parser.revision_author[rev]
thisline = cvsdb.utf8string(self.lines[idx])
if self.guesser:
thisline = self.guesser.utf8(self.lines[idx])
### TODO: Put a real date in here.
item = vclib.Annotation(thisline, line_number, rev, prev_rev, author, None)
self.last = item

View File

@ -67,7 +67,8 @@ class CCVSRepository(BaseCVSRepository):
entry.path = path
try:
rcsparse.parse(open(path, 'rb'), InfoSink(entry, rev, alltags))
entry.log = cvsdb.utf8string(entry.log)
if self.guesser:
entry.log = self.guesser.utf8(entry.log)
except IOError, e:
entry.errors.append("rcsparse error: %s" % e)
except RuntimeError, e:

View File

@ -40,7 +40,11 @@ class ContentMagic:
if have_chardet:
# Try chardet
try:
charset = chardet.detect(content)
# Only detect on first 256KB if content is longer
if len(content) > 256*1024:
charset = chardet.detect(content[0:256*1024])
else:
charset = chardet.detect(content)
if charset and charset['encoding']:
charset = charset['encoding']
if charset == 'MacCyrillic':