diff --git a/bin/cvsdbadmin b/bin/cvsdbadmin index e580c94b..5b8b51f9 100755 --- a/bin/cvsdbadmin +++ b/bin/cvsdbadmin @@ -47,12 +47,12 @@ import vclib.ccvs from stat import * -def UpdateFile(db, repository, path, update, latest_checkin, quiet_level): +def UpdateFile(db, repository, path, update, latest_checkin, quiet_level, encoding = None): try: if update: mtime = os.stat(repository.rcsfile(path, 1))[ST_MTIME] if mtime < latest_checkin: - return + return commit_list = cvsdb.GetUnrecordedCommitList(repository, path, db) else: commit_list = cvsdb.GetCommitListFromRCSFile(repository, path) @@ -75,6 +75,8 @@ def UpdateFile(db, repository, path, update, latest_checkin, quiet_level): ## add the commits into the database for commit in commit_list: + if encoding: + commit.SetFile(commit.GetFile().decode(encoding).encode('utf-8')) db.AddCommit(commit) if printing: sys.stdout.write('.') @@ -84,7 +86,7 @@ def UpdateFile(db, repository, path, update, latest_checkin, quiet_level): def RecurseUpdate(db, repository, directory, update, latest_checkin, - quiet_level): + quiet_level, encoding = None): for entry in repository.listdir(directory, None, {}): path = directory + [entry.name] @@ -93,12 +95,13 @@ def RecurseUpdate(db, repository, directory, update, latest_checkin, if entry.kind is vclib.DIR: RecurseUpdate(db, repository, path, update, latest_checkin, - quiet_level) + quiet_level, encoding) continue if entry.kind is vclib.FILE: UpdateFile(db, repository, path, update, latest_checkin, - quiet_level) + quiet_level, encoding) + def RootPath(path, quiet_level): """Break os path into cvs root path and other parts""" @@ -200,9 +203,10 @@ if __name__ == '__main__': if latest_checkin is None: command = 'rebuild' RecurseUpdate(db, repository, path_parts, - command == 'update', latest_checkin, quiet_level) + command == 'update', latest_checkin, quiet_level, + cfg.options.cvs_ondisk_charset) except KeyboardInterrupt: print print '** break **' - + sys.exit(0) diff --git a/lib/cvsdb.py b/lib/cvsdb.py index b81b9949..0afa5495 100644 --- a/lib/cvsdb.py +++ b/lib/cvsdb.py @@ -1188,6 +1188,7 @@ def ConnectDatabase(cfg, request=None, readonly=0): def ConnectDatabaseReadOnly(cfg, request): return ConnectDatabase(cfg, request, 1) +# Get all commits from rcsfile (CVS) def GetCommitListFromRCSFile(repository, path_parts, revision=None): commit_list = [] @@ -1227,6 +1228,7 @@ def GetCommitListFromRCSFile(repository, path_parts, revision=None): return commit_list +# Get unrecorded commits from rcsfile (CVS) def GetUnrecordedCommitList(repository, path_parts, db): commit_list = GetCommitListFromRCSFile(repository, path_parts) diff --git a/lib/viewvc.py b/lib/viewvc.py index 284966e5..e7750e4e 100644 --- a/lib/viewvc.py +++ b/lib/viewvc.py @@ -3811,11 +3811,11 @@ def build_commit(request, files, max_files, dir_strip, format): return None if my_repos.roottype == 'cvs': # we store UTF8 in the DB - try: where = where.decode('utf-8') - except: pass # FIXME maybe also store "real" non-UTF8 filesystem path in the DB instead of having such setting? - try: where = where.encode(cfg.options.cvs_ondisk_charset) - except: pass + try: + where = where.decode('utf-8').encode(cfg.options.cvs_ondisk_charset) + except: + raise Exception("Invalid string encoding: "+where) path_parts = _path_parts(where) # In CVS, we can actually look at deleted revisions; in Subversion diff --git a/lib/viewvcmagic.py b/lib/viewvcmagic.py index b76236f6..b58b1c91 100644 --- a/lib/viewvcmagic.py +++ b/lib/viewvcmagic.py @@ -35,10 +35,12 @@ class ContentMagic: # returns (utf8_content, charset) def guess_charset(self, content): - # Try to guess with chardet - charset = None - if have_chardet: - # Try chardet + # Try UTF-8 + charset = 'utf-8' + try: content = content.decode('utf-8') + except: charset = None + if charset is None and have_chardet: + # Try to guess with chardet try: # Only detect on first 256KB if content is longer if len(content) > 256*1024: @@ -56,11 +58,6 @@ class ContentMagic: else: content = content.decode(charset) except: charset = None - else: - # Try UTF-8 - charset = 'utf-8' - try: content = content.decode('utf-8') - except: charset = None # Then try to guess primitively if charset is None: for charset in self.encodings: