Bug 88900 - Force cvs_ondisk_charset

git-svn-id: svn://svn.office.custis.ru/3rdparty/viewvc.org/trunk@1451 6955db30-a419-402b-8a0d-67ecbb4d7f56
remotes/github/custis
vfilippov 2011-10-27 13:46:05 +00:00 committed by Vitaliy Filippov
parent 7d06bd21bc
commit 24b979d9e2
4 changed files with 23 additions and 20 deletions

View File

@ -47,12 +47,12 @@ import vclib.ccvs
from stat import *
def UpdateFile(db, repository, path, update, latest_checkin, quiet_level):
def UpdateFile(db, repository, path, update, latest_checkin, quiet_level, encoding = None):
try:
if update:
mtime = os.stat(repository.rcsfile(path, 1))[ST_MTIME]
if mtime < latest_checkin:
return
return
commit_list = cvsdb.GetUnrecordedCommitList(repository, path, db)
else:
commit_list = cvsdb.GetCommitListFromRCSFile(repository, path)
@ -75,6 +75,8 @@ def UpdateFile(db, repository, path, update, latest_checkin, quiet_level):
## add the commits into the database
for commit in commit_list:
if encoding:
commit.SetFile(commit.GetFile().decode(encoding).encode('utf-8'))
db.AddCommit(commit)
if printing:
sys.stdout.write('.')
@ -84,7 +86,7 @@ def UpdateFile(db, repository, path, update, latest_checkin, quiet_level):
def RecurseUpdate(db, repository, directory, update, latest_checkin,
quiet_level):
quiet_level, encoding = None):
for entry in repository.listdir(directory, None, {}):
path = directory + [entry.name]
@ -93,12 +95,13 @@ def RecurseUpdate(db, repository, directory, update, latest_checkin,
if entry.kind is vclib.DIR:
RecurseUpdate(db, repository, path, update, latest_checkin,
quiet_level)
quiet_level, encoding)
continue
if entry.kind is vclib.FILE:
UpdateFile(db, repository, path, update, latest_checkin,
quiet_level)
quiet_level, encoding)
def RootPath(path, quiet_level):
"""Break os path into cvs root path and other parts"""
@ -200,9 +203,10 @@ if __name__ == '__main__':
if latest_checkin is None:
command = 'rebuild'
RecurseUpdate(db, repository, path_parts,
command == 'update', latest_checkin, quiet_level)
command == 'update', latest_checkin, quiet_level,
cfg.options.cvs_ondisk_charset)
except KeyboardInterrupt:
print
print '** break **'
sys.exit(0)

View File

@ -1188,6 +1188,7 @@ def ConnectDatabase(cfg, request=None, readonly=0):
def ConnectDatabaseReadOnly(cfg, request):
return ConnectDatabase(cfg, request, 1)
# Get all commits from rcsfile (CVS)
def GetCommitListFromRCSFile(repository, path_parts, revision=None):
commit_list = []
@ -1227,6 +1228,7 @@ def GetCommitListFromRCSFile(repository, path_parts, revision=None):
return commit_list
# Get unrecorded commits from rcsfile (CVS)
def GetUnrecordedCommitList(repository, path_parts, db):
commit_list = GetCommitListFromRCSFile(repository, path_parts)

View File

@ -3811,11 +3811,11 @@ def build_commit(request, files, max_files, dir_strip, format):
return None
if my_repos.roottype == 'cvs':
# we store UTF8 in the DB
try: where = where.decode('utf-8')
except: pass
# FIXME maybe also store "real" non-UTF8 filesystem path in the DB instead of having such setting?
try: where = where.encode(cfg.options.cvs_ondisk_charset)
except: pass
try:
where = where.decode('utf-8').encode(cfg.options.cvs_ondisk_charset)
except:
raise Exception("Invalid string encoding: "+where)
path_parts = _path_parts(where)
# In CVS, we can actually look at deleted revisions; in Subversion

View File

@ -35,10 +35,12 @@ class ContentMagic:
# returns (utf8_content, charset)
def guess_charset(self, content):
# Try to guess with chardet
charset = None
if have_chardet:
# Try chardet
# Try UTF-8
charset = 'utf-8'
try: content = content.decode('utf-8')
except: charset = None
if charset is None and have_chardet:
# Try to guess with chardet
try:
# Only detect on first 256KB if content is longer
if len(content) > 256*1024:
@ -56,11 +58,6 @@ class ContentMagic:
else:
content = content.decode(charset)
except: charset = None
else:
# Try UTF-8
charset = 'utf-8'
try: content = content.decode('utf-8')
except: charset = None
# Then try to guess primitively
if charset is None:
for charset in self.encodings: