Bug 82651 - Remove rest of UTF-8 ugly hacks

git-svn-id: svn://svn.office.custis.ru/3rdparty/viewvc.org/trunk@1394 6955db30-a419-402b-8a0d-67ecbb4d7f56
2011-09-29 12:27:02 +00:00 · 2011-09-29 12:27:02 +00:00 · ecadbf9fd3
parent 8dc02448bc
commit ecadbf9fd3
7 changed files with 34 additions and 22 deletions
--- a/bin/cvsdbadmin
+++ b/bin/cvsdbadmin
@ -195,7 +195,7 @@ if __name__ == '__main__':

        if command in ('rebuild', 'update'):
            repository = vclib.ccvs.CVSRepository(None, rootpath, None,
-                                                  cfg.utilities, 0)
+                                                  cfg.utilities, 0, cfg.guesser())
            latest_checkin = db.GetLatestCheckinTime(repository)
            if latest_checkin is None:
                command = 'rebuild'
--- a/lib/cvsdb.py
+++ b/lib/cvsdb.py
@ -373,6 +373,9 @@ class CheckinDatabase:
            if self.index_content:
                sphcur = self.sphinx.cursor()
                content = commit.GetContent()
+                # Sphinx has 4 MB text field limit
+                if len(content) >= 4*1024*1024:
+                    content = content[0:4*1024*1024]
                props['ci_when'] = str(int(commit.GetTime() or 0))
                if len(content):
                    props['content'] = content
@ -461,7 +464,7 @@ class CheckinDatabase:
        elif query.sort == 'date_rev':
            order_by = 'ORDER BY `ci_when` ASC, `relevance` DESC'
        else: # /* if query.sort == 'relevance' */
-            order_by = 'ORDER BY `relevance` DESC'
+            order_by = 'ORDER BY `relevance` DESC, `ci_when` DESC'

        conditions = string.join((i for i in condList if i), " AND ")
        conditions = conditions and "WHERE %s" % conditions
@ -618,7 +621,7 @@ class CheckinDatabase:
                    'limit': 200,
                    'before_match': '<span style="color:red">',
                    'after_match': '</span>',
-                    'chunk_separator': ' ... ',
+                    'chunk_separator': ' ...\n',
                }
                preformatted_mime = 'text/(?!html|xml).*'
                snippets = {}
@ -700,7 +703,7 @@ class CheckinDatabase:
            return None

        commits_table = self._version >= 1 and 'commits' or 'checkins'
-        sql = "SELECT * FROM %s WHERE "\
+        sql = "SELECT whoid FROM %s WHERE "\
              "  repositoryid=%%s "\
              "  AND dirid=%%s"\
              "  AND fileid=%%s"\
@ -711,9 +714,7 @@ class CheckinDatabase:
        cursor = self.db.cursor()
        cursor.execute(sql, sql_args)
        try:
-            (ci_type, ci_when, who_id, repository_id,
-             dir_id, file_id, revision, sticky_tag, branch_id,
-             plus_count, minus_count, description_id) = cursor.fetchone()
+            who_id, = cursor.fetchone()
        except TypeError:
            return None

--- a/lib/vclib/ccvs/init.py
+++ b/lib/vclib/ccvs/init.py
@ -33,11 +33,11 @@ def expand_root_parent(parent_path):
  return roots


-def CVSRepository(name, rootpath, authorizer, utilities, use_rcsparse):
+def CVSRepository(name, rootpath, authorizer, utilities, use_rcsparse, charset_guesser = None):
  rootpath = canonicalize_rootpath(rootpath)
  if use_rcsparse:
    import ccvs
-    return ccvs.CCVSRepository(name, rootpath, authorizer, utilities)
+    return ccvs.CCVSRepository(name, rootpath, authorizer, utilities, charset_guesser)
  else:
    import bincvs
-    return bincvs.BinCVSRepository(name, rootpath, authorizer, utilities)
+    return bincvs.BinCVSRepository(name, rootpath, authorizer, utilities, charset_guesser)
--- a/lib/vclib/ccvs/bincvs.py
+++ b/lib/vclib/ccvs/bincvs.py
@ -29,7 +29,7 @@ import compat
 import popen

 class BaseCVSRepository(vclib.Repository):
-  def __init__(self, name, rootpath, authorizer, utilities):
+  def __init__(self, name, rootpath, authorizer, utilities, charset_guesser = None):
    if not os.path.isdir(rootpath):
      raise vclib.ReposNotFound(name)

@ -37,6 +37,7 @@ class BaseCVSRepository(vclib.Repository):
    self.rootpath = rootpath
    self.auth = authorizer
    self.utilities = utilities
+    self.guesser = charset_guesser

    # See if this repository is even viewable, authz-wise.
    if not vclib.check_root_access(self):
@ -156,7 +157,7 @@ class BinCVSRepository(BaseCVSRepository):
    filename, default_branch, tags, lockinfo, msg, eof = _parse_log_header(fp)
    revs = []
    while not eof:
-      revision, eof = _parse_log_entry(fp)
+      revision, eof = _parse_log_entry(fp, self.guesser)
      if revision:
        revs.append(revision)
    revs = _file_log(revs, tags, lockinfo, default_branch, rev)
@ -246,7 +247,7 @@ class BinCVSRepository(BaseCVSRepository):
    for entry in entries:
      if vclib.check_path_access(self, path_parts + [entry.name], None, rev):
        entries_to_fetch.append(entry)
-    alltags = _get_logs(self, path_parts, entries_to_fetch, rev, subdirs)
+    alltags = _get_logs(self, path_parts, entries_to_fetch, rev, subdirs, self.guesser)
    branches = options['cvs_branches'] = []
    tags = options['cvs_tags'] = []
    for name, rev in alltags.items():
@ -292,7 +293,7 @@ class BinCVSRepository(BaseCVSRepository):
    # Retrieve revision objects
    revs = []
    while not eof:
-      revision, eof = _parse_log_entry(fp)
+      revision, eof = _parse_log_entry(fp, self.guesser)
      if revision:
        revs.append(revision)

@ -783,7 +784,7 @@ _re_log_info = re.compile(r'^date:\s+([^;]+);'
                          r'(\s+commitid:\s+([a-zA-Z0-9]+))?\n$')
 ### _re_rev should be updated to extract the "locked" flag
 _re_rev = re.compile(r'^revision\s+([0-9.]+).*')
-def _parse_log_entry(fp):
+def _parse_log_entry(fp, guesser):
  """Parse a single log entry.

  On entry, fp should point to the first line of the entry (the "revision"
@ -849,7 +850,8 @@ def _parse_log_entry(fp):
      raise ValueError, 'invalid year'
  date = compat.timegm(tm)

-  log = cvsdb.utf8string(log)
+  if guesser:
+    log = guesser.utf8(log)

  return Revision(rev, date,
                  # author, state, lines changed
@ -957,7 +959,7 @@ def _file_log(revs, taginfo, lockinfo, cur_branch, filter):

  return filtered_revs

-def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
+def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs, guesser):
  alltags = {           # all the tags seen in the files of this dir
    'MAIN' : '',
    'HEAD' : '1.1'
@ -1062,7 +1064,7 @@ def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
      while not eof:

        # fetch one of the log entries
-        entry, eof = _parse_log_entry(rlog)
+        entry, eof = _parse_log_entry(rlog, guesser)

        if not entry:
          # parsing error
--- a/lib/vclib/ccvs/blame.py
+++ b/lib/vclib/ccvs/blame.py
@ -415,7 +415,7 @@ class CVSParser(rcsparse.Sink):


 class BlameSource:
-  def __init__(self, rcs_file, opt_rev=None):
+  def __init__(self, rcs_file, opt_rev=None, charset_guesser=None):
    # Parse the CVS file
    parser = CVSParser()
    revision = parser.parse_cvs_file(rcs_file, opt_rev)
@ -429,6 +429,7 @@ class BlameSource:
    self.lines = lines
    self.num_lines = count
    self.parser = parser
+    self.guesser = charset_guesser

    # keep track of where we are during an iteration
    self.idx = -1
@ -447,7 +448,10 @@ class BlameSource:
    prev_rev = self.parser.prev_revision.get(rev)
    line_number = idx + 1
    author = self.parser.revision_author[rev]
-    thisline = cvsdb.utf8string(self.lines[idx])
+
+    if self.guesser:
+      thisline = self.guesser.utf8(self.lines[idx])
+
    ### TODO:  Put a real date in here.
    item = vclib.Annotation(thisline, line_number, rev, prev_rev, author, None)
    self.last = item
--- a/lib/vclib/ccvs/ccvs.py
+++ b/lib/vclib/ccvs/ccvs.py
@ -67,7 +67,8 @@ class CCVSRepository(BaseCVSRepository):
        entry.path = path
        try:
          rcsparse.parse(open(path, 'rb'), InfoSink(entry, rev, alltags))
-          entry.log = cvsdb.utf8string(entry.log)
+          if self.guesser:
+            entry.log = self.guesser.utf8(entry.log)
        except IOError, e:
          entry.errors.append("rcsparse error: %s" % e)
        except RuntimeError, e:
--- a/lib/viewvcmagic.py
+++ b/lib/viewvcmagic.py
@ -40,7 +40,11 @@ class ContentMagic:
        if have_chardet:
            # Try chardet
            try:
-                charset = chardet.detect(content)
+                # Only detect on first 256KB if content is longer
+                if len(content) > 256*1024:
+                    charset = chardet.detect(content[0:256*1024])
+                else:
+                    charset = chardet.detect(content)
                if charset and charset['encoding']:
                    charset = charset['encoding']
                if charset == 'MacCyrillic':