Bug 82651 - Remove rest of UTF-8 ugly hacks
git-svn-id: svn://svn.office.custis.ru/3rdparty/viewvc.org/trunk@1394 6955db30-a419-402b-8a0d-67ecbb4d7f56remotes/github/custis
parent
8dc02448bc
commit
ecadbf9fd3
|
@ -195,7 +195,7 @@ if __name__ == '__main__':
|
|||
|
||||
if command in ('rebuild', 'update'):
|
||||
repository = vclib.ccvs.CVSRepository(None, rootpath, None,
|
||||
cfg.utilities, 0)
|
||||
cfg.utilities, 0, cfg.guesser())
|
||||
latest_checkin = db.GetLatestCheckinTime(repository)
|
||||
if latest_checkin is None:
|
||||
command = 'rebuild'
|
||||
|
|
13
lib/cvsdb.py
13
lib/cvsdb.py
|
@ -373,6 +373,9 @@ class CheckinDatabase:
|
|||
if self.index_content:
|
||||
sphcur = self.sphinx.cursor()
|
||||
content = commit.GetContent()
|
||||
# Sphinx has 4 MB text field limit
|
||||
if len(content) >= 4*1024*1024:
|
||||
content = content[0:4*1024*1024]
|
||||
props['ci_when'] = str(int(commit.GetTime() or 0))
|
||||
if len(content):
|
||||
props['content'] = content
|
||||
|
@ -461,7 +464,7 @@ class CheckinDatabase:
|
|||
elif query.sort == 'date_rev':
|
||||
order_by = 'ORDER BY `ci_when` ASC, `relevance` DESC'
|
||||
else: # /* if query.sort == 'relevance' */
|
||||
order_by = 'ORDER BY `relevance` DESC'
|
||||
order_by = 'ORDER BY `relevance` DESC, `ci_when` DESC'
|
||||
|
||||
conditions = string.join((i for i in condList if i), " AND ")
|
||||
conditions = conditions and "WHERE %s" % conditions
|
||||
|
@ -618,7 +621,7 @@ class CheckinDatabase:
|
|||
'limit': 200,
|
||||
'before_match': '<span style="color:red">',
|
||||
'after_match': '</span>',
|
||||
'chunk_separator': ' ... ',
|
||||
'chunk_separator': ' ...\n',
|
||||
}
|
||||
preformatted_mime = 'text/(?!html|xml).*'
|
||||
snippets = {}
|
||||
|
@ -700,7 +703,7 @@ class CheckinDatabase:
|
|||
return None
|
||||
|
||||
commits_table = self._version >= 1 and 'commits' or 'checkins'
|
||||
sql = "SELECT * FROM %s WHERE "\
|
||||
sql = "SELECT whoid FROM %s WHERE "\
|
||||
" repositoryid=%%s "\
|
||||
" AND dirid=%%s"\
|
||||
" AND fileid=%%s"\
|
||||
|
@ -711,9 +714,7 @@ class CheckinDatabase:
|
|||
cursor = self.db.cursor()
|
||||
cursor.execute(sql, sql_args)
|
||||
try:
|
||||
(ci_type, ci_when, who_id, repository_id,
|
||||
dir_id, file_id, revision, sticky_tag, branch_id,
|
||||
plus_count, minus_count, description_id) = cursor.fetchone()
|
||||
who_id, = cursor.fetchone()
|
||||
except TypeError:
|
||||
return None
|
||||
|
||||
|
|
|
@ -33,11 +33,11 @@ def expand_root_parent(parent_path):
|
|||
return roots
|
||||
|
||||
|
||||
def CVSRepository(name, rootpath, authorizer, utilities, use_rcsparse):
|
||||
def CVSRepository(name, rootpath, authorizer, utilities, use_rcsparse, charset_guesser = None):
|
||||
rootpath = canonicalize_rootpath(rootpath)
|
||||
if use_rcsparse:
|
||||
import ccvs
|
||||
return ccvs.CCVSRepository(name, rootpath, authorizer, utilities)
|
||||
return ccvs.CCVSRepository(name, rootpath, authorizer, utilities, charset_guesser)
|
||||
else:
|
||||
import bincvs
|
||||
return bincvs.BinCVSRepository(name, rootpath, authorizer, utilities)
|
||||
return bincvs.BinCVSRepository(name, rootpath, authorizer, utilities, charset_guesser)
|
||||
|
|
|
@ -29,7 +29,7 @@ import compat
|
|||
import popen
|
||||
|
||||
class BaseCVSRepository(vclib.Repository):
|
||||
def __init__(self, name, rootpath, authorizer, utilities):
|
||||
def __init__(self, name, rootpath, authorizer, utilities, charset_guesser = None):
|
||||
if not os.path.isdir(rootpath):
|
||||
raise vclib.ReposNotFound(name)
|
||||
|
||||
|
@ -37,6 +37,7 @@ class BaseCVSRepository(vclib.Repository):
|
|||
self.rootpath = rootpath
|
||||
self.auth = authorizer
|
||||
self.utilities = utilities
|
||||
self.guesser = charset_guesser
|
||||
|
||||
# See if this repository is even viewable, authz-wise.
|
||||
if not vclib.check_root_access(self):
|
||||
|
@ -156,7 +157,7 @@ class BinCVSRepository(BaseCVSRepository):
|
|||
filename, default_branch, tags, lockinfo, msg, eof = _parse_log_header(fp)
|
||||
revs = []
|
||||
while not eof:
|
||||
revision, eof = _parse_log_entry(fp)
|
||||
revision, eof = _parse_log_entry(fp, self.guesser)
|
||||
if revision:
|
||||
revs.append(revision)
|
||||
revs = _file_log(revs, tags, lockinfo, default_branch, rev)
|
||||
|
@ -246,7 +247,7 @@ class BinCVSRepository(BaseCVSRepository):
|
|||
for entry in entries:
|
||||
if vclib.check_path_access(self, path_parts + [entry.name], None, rev):
|
||||
entries_to_fetch.append(entry)
|
||||
alltags = _get_logs(self, path_parts, entries_to_fetch, rev, subdirs)
|
||||
alltags = _get_logs(self, path_parts, entries_to_fetch, rev, subdirs, self.guesser)
|
||||
branches = options['cvs_branches'] = []
|
||||
tags = options['cvs_tags'] = []
|
||||
for name, rev in alltags.items():
|
||||
|
@ -292,7 +293,7 @@ class BinCVSRepository(BaseCVSRepository):
|
|||
# Retrieve revision objects
|
||||
revs = []
|
||||
while not eof:
|
||||
revision, eof = _parse_log_entry(fp)
|
||||
revision, eof = _parse_log_entry(fp, self.guesser)
|
||||
if revision:
|
||||
revs.append(revision)
|
||||
|
||||
|
@ -783,7 +784,7 @@ _re_log_info = re.compile(r'^date:\s+([^;]+);'
|
|||
r'(\s+commitid:\s+([a-zA-Z0-9]+))?\n$')
|
||||
### _re_rev should be updated to extract the "locked" flag
|
||||
_re_rev = re.compile(r'^revision\s+([0-9.]+).*')
|
||||
def _parse_log_entry(fp):
|
||||
def _parse_log_entry(fp, guesser):
|
||||
"""Parse a single log entry.
|
||||
|
||||
On entry, fp should point to the first line of the entry (the "revision"
|
||||
|
@ -849,7 +850,8 @@ def _parse_log_entry(fp):
|
|||
raise ValueError, 'invalid year'
|
||||
date = compat.timegm(tm)
|
||||
|
||||
log = cvsdb.utf8string(log)
|
||||
if guesser:
|
||||
log = guesser.utf8(log)
|
||||
|
||||
return Revision(rev, date,
|
||||
# author, state, lines changed
|
||||
|
@ -957,7 +959,7 @@ def _file_log(revs, taginfo, lockinfo, cur_branch, filter):
|
|||
|
||||
return filtered_revs
|
||||
|
||||
def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
|
||||
def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs, guesser):
|
||||
alltags = { # all the tags seen in the files of this dir
|
||||
'MAIN' : '',
|
||||
'HEAD' : '1.1'
|
||||
|
@ -1062,7 +1064,7 @@ def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
|
|||
while not eof:
|
||||
|
||||
# fetch one of the log entries
|
||||
entry, eof = _parse_log_entry(rlog)
|
||||
entry, eof = _parse_log_entry(rlog, guesser)
|
||||
|
||||
if not entry:
|
||||
# parsing error
|
||||
|
|
|
@ -415,7 +415,7 @@ class CVSParser(rcsparse.Sink):
|
|||
|
||||
|
||||
class BlameSource:
|
||||
def __init__(self, rcs_file, opt_rev=None):
|
||||
def __init__(self, rcs_file, opt_rev=None, charset_guesser=None):
|
||||
# Parse the CVS file
|
||||
parser = CVSParser()
|
||||
revision = parser.parse_cvs_file(rcs_file, opt_rev)
|
||||
|
@ -429,6 +429,7 @@ class BlameSource:
|
|||
self.lines = lines
|
||||
self.num_lines = count
|
||||
self.parser = parser
|
||||
self.guesser = charset_guesser
|
||||
|
||||
# keep track of where we are during an iteration
|
||||
self.idx = -1
|
||||
|
@ -447,7 +448,10 @@ class BlameSource:
|
|||
prev_rev = self.parser.prev_revision.get(rev)
|
||||
line_number = idx + 1
|
||||
author = self.parser.revision_author[rev]
|
||||
thisline = cvsdb.utf8string(self.lines[idx])
|
||||
|
||||
if self.guesser:
|
||||
thisline = self.guesser.utf8(self.lines[idx])
|
||||
|
||||
### TODO: Put a real date in here.
|
||||
item = vclib.Annotation(thisline, line_number, rev, prev_rev, author, None)
|
||||
self.last = item
|
||||
|
|
|
@ -67,7 +67,8 @@ class CCVSRepository(BaseCVSRepository):
|
|||
entry.path = path
|
||||
try:
|
||||
rcsparse.parse(open(path, 'rb'), InfoSink(entry, rev, alltags))
|
||||
entry.log = cvsdb.utf8string(entry.log)
|
||||
if self.guesser:
|
||||
entry.log = self.guesser.utf8(entry.log)
|
||||
except IOError, e:
|
||||
entry.errors.append("rcsparse error: %s" % e)
|
||||
except RuntimeError, e:
|
||||
|
|
|
@ -40,7 +40,11 @@ class ContentMagic:
|
|||
if have_chardet:
|
||||
# Try chardet
|
||||
try:
|
||||
charset = chardet.detect(content)
|
||||
# Only detect on first 256KB if content is longer
|
||||
if len(content) > 256*1024:
|
||||
charset = chardet.detect(content[0:256*1024])
|
||||
else:
|
||||
charset = chardet.detect(content)
|
||||
if charset and charset['encoding']:
|
||||
charset = charset['encoding']
|
||||
if charset == 'MacCyrillic':
|
||||
|
|
Loading…
Reference in New Issue