diff --git a/bin/cvsdbadmin b/bin/cvsdbadmin index 45189ce0..7aa031c4 100755 --- a/bin/cvsdbadmin +++ b/bin/cvsdbadmin @@ -195,7 +195,7 @@ if __name__ == '__main__': if command in ('rebuild', 'update'): repository = vclib.ccvs.CVSRepository(None, rootpath, None, - cfg.utilities, 0) + cfg.utilities, 0, cfg.guesser()) latest_checkin = db.GetLatestCheckinTime(repository) if latest_checkin is None: command = 'rebuild' diff --git a/lib/cvsdb.py b/lib/cvsdb.py index a614c5e0..d137329d 100644 --- a/lib/cvsdb.py +++ b/lib/cvsdb.py @@ -373,6 +373,9 @@ class CheckinDatabase: if self.index_content: sphcur = self.sphinx.cursor() content = commit.GetContent() + # Sphinx has 4 MB text field limit + if len(content) >= 4*1024*1024: + content = content[0:4*1024*1024] props['ci_when'] = str(int(commit.GetTime() or 0)) if len(content): props['content'] = content @@ -461,7 +464,7 @@ class CheckinDatabase: elif query.sort == 'date_rev': order_by = 'ORDER BY `ci_when` ASC, `relevance` DESC' else: # /* if query.sort == 'relevance' */ - order_by = 'ORDER BY `relevance` DESC' + order_by = 'ORDER BY `relevance` DESC, `ci_when` DESC' conditions = string.join((i for i in condList if i), " AND ") conditions = conditions and "WHERE %s" % conditions @@ -618,7 +621,7 @@ class CheckinDatabase: 'limit': 200, 'before_match': '', 'after_match': '', - 'chunk_separator': ' ... ', + 'chunk_separator': ' ...\n', } preformatted_mime = 'text/(?!html|xml).*' snippets = {} @@ -700,7 +703,7 @@ class CheckinDatabase: return None commits_table = self._version >= 1 and 'commits' or 'checkins' - sql = "SELECT * FROM %s WHERE "\ + sql = "SELECT whoid FROM %s WHERE "\ " repositoryid=%%s "\ " AND dirid=%%s"\ " AND fileid=%%s"\ @@ -711,9 +714,7 @@ class CheckinDatabase: cursor = self.db.cursor() cursor.execute(sql, sql_args) try: - (ci_type, ci_when, who_id, repository_id, - dir_id, file_id, revision, sticky_tag, branch_id, - plus_count, minus_count, description_id) = cursor.fetchone() + who_id, = cursor.fetchone() except TypeError: return None diff --git a/lib/vclib/ccvs/__init__.py b/lib/vclib/ccvs/__init__.py index 374eb522..f54c47b8 100644 --- a/lib/vclib/ccvs/__init__.py +++ b/lib/vclib/ccvs/__init__.py @@ -33,11 +33,11 @@ def expand_root_parent(parent_path): return roots -def CVSRepository(name, rootpath, authorizer, utilities, use_rcsparse): +def CVSRepository(name, rootpath, authorizer, utilities, use_rcsparse, charset_guesser = None): rootpath = canonicalize_rootpath(rootpath) if use_rcsparse: import ccvs - return ccvs.CCVSRepository(name, rootpath, authorizer, utilities) + return ccvs.CCVSRepository(name, rootpath, authorizer, utilities, charset_guesser) else: import bincvs - return bincvs.BinCVSRepository(name, rootpath, authorizer, utilities) + return bincvs.BinCVSRepository(name, rootpath, authorizer, utilities, charset_guesser) diff --git a/lib/vclib/ccvs/bincvs.py b/lib/vclib/ccvs/bincvs.py index fee243fb..3dce52cc 100644 --- a/lib/vclib/ccvs/bincvs.py +++ b/lib/vclib/ccvs/bincvs.py @@ -29,7 +29,7 @@ import compat import popen class BaseCVSRepository(vclib.Repository): - def __init__(self, name, rootpath, authorizer, utilities): + def __init__(self, name, rootpath, authorizer, utilities, charset_guesser = None): if not os.path.isdir(rootpath): raise vclib.ReposNotFound(name) @@ -37,6 +37,7 @@ class BaseCVSRepository(vclib.Repository): self.rootpath = rootpath self.auth = authorizer self.utilities = utilities + self.guesser = charset_guesser # See if this repository is even viewable, authz-wise. if not vclib.check_root_access(self): @@ -156,7 +157,7 @@ class BinCVSRepository(BaseCVSRepository): filename, default_branch, tags, lockinfo, msg, eof = _parse_log_header(fp) revs = [] while not eof: - revision, eof = _parse_log_entry(fp) + revision, eof = _parse_log_entry(fp, self.guesser) if revision: revs.append(revision) revs = _file_log(revs, tags, lockinfo, default_branch, rev) @@ -246,7 +247,7 @@ class BinCVSRepository(BaseCVSRepository): for entry in entries: if vclib.check_path_access(self, path_parts + [entry.name], None, rev): entries_to_fetch.append(entry) - alltags = _get_logs(self, path_parts, entries_to_fetch, rev, subdirs) + alltags = _get_logs(self, path_parts, entries_to_fetch, rev, subdirs, self.guesser) branches = options['cvs_branches'] = [] tags = options['cvs_tags'] = [] for name, rev in alltags.items(): @@ -292,7 +293,7 @@ class BinCVSRepository(BaseCVSRepository): # Retrieve revision objects revs = [] while not eof: - revision, eof = _parse_log_entry(fp) + revision, eof = _parse_log_entry(fp, self.guesser) if revision: revs.append(revision) @@ -783,7 +784,7 @@ _re_log_info = re.compile(r'^date:\s+([^;]+);' r'(\s+commitid:\s+([a-zA-Z0-9]+))?\n$') ### _re_rev should be updated to extract the "locked" flag _re_rev = re.compile(r'^revision\s+([0-9.]+).*') -def _parse_log_entry(fp): +def _parse_log_entry(fp, guesser): """Parse a single log entry. On entry, fp should point to the first line of the entry (the "revision" @@ -849,7 +850,8 @@ def _parse_log_entry(fp): raise ValueError, 'invalid year' date = compat.timegm(tm) - log = cvsdb.utf8string(log) + if guesser: + log = guesser.utf8(log) return Revision(rev, date, # author, state, lines changed @@ -957,7 +959,7 @@ def _file_log(revs, taginfo, lockinfo, cur_branch, filter): return filtered_revs -def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs): +def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs, guesser): alltags = { # all the tags seen in the files of this dir 'MAIN' : '', 'HEAD' : '1.1' @@ -1062,7 +1064,7 @@ def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs): while not eof: # fetch one of the log entries - entry, eof = _parse_log_entry(rlog) + entry, eof = _parse_log_entry(rlog, guesser) if not entry: # parsing error diff --git a/lib/vclib/ccvs/blame.py b/lib/vclib/ccvs/blame.py index c748ade5..4714da6f 100644 --- a/lib/vclib/ccvs/blame.py +++ b/lib/vclib/ccvs/blame.py @@ -415,7 +415,7 @@ class CVSParser(rcsparse.Sink): class BlameSource: - def __init__(self, rcs_file, opt_rev=None): + def __init__(self, rcs_file, opt_rev=None, charset_guesser=None): # Parse the CVS file parser = CVSParser() revision = parser.parse_cvs_file(rcs_file, opt_rev) @@ -429,6 +429,7 @@ class BlameSource: self.lines = lines self.num_lines = count self.parser = parser + self.guesser = charset_guesser # keep track of where we are during an iteration self.idx = -1 @@ -447,7 +448,10 @@ class BlameSource: prev_rev = self.parser.prev_revision.get(rev) line_number = idx + 1 author = self.parser.revision_author[rev] - thisline = cvsdb.utf8string(self.lines[idx]) + + if self.guesser: + thisline = self.guesser.utf8(self.lines[idx]) + ### TODO: Put a real date in here. item = vclib.Annotation(thisline, line_number, rev, prev_rev, author, None) self.last = item diff --git a/lib/vclib/ccvs/ccvs.py b/lib/vclib/ccvs/ccvs.py index 9a9979b0..28dc46ee 100644 --- a/lib/vclib/ccvs/ccvs.py +++ b/lib/vclib/ccvs/ccvs.py @@ -67,7 +67,8 @@ class CCVSRepository(BaseCVSRepository): entry.path = path try: rcsparse.parse(open(path, 'rb'), InfoSink(entry, rev, alltags)) - entry.log = cvsdb.utf8string(entry.log) + if self.guesser: + entry.log = self.guesser.utf8(entry.log) except IOError, e: entry.errors.append("rcsparse error: %s" % e) except RuntimeError, e: diff --git a/lib/viewvcmagic.py b/lib/viewvcmagic.py index 6b4e72ce..b76236f6 100644 --- a/lib/viewvcmagic.py +++ b/lib/viewvcmagic.py @@ -40,7 +40,11 @@ class ContentMagic: if have_chardet: # Try chardet try: - charset = chardet.detect(content) + # Only detect on first 256KB if content is longer + if len(content) > 256*1024: + charset = chardet.detect(content[0:256*1024]) + else: + charset = chardet.detect(content) if charset and charset['encoding']: charset = charset['encoding'] if charset == 'MacCyrillic':