2006-03-18 05:07:36 +03:00
|
|
|
# -*-python-*-
|
2002-02-12 00:03:31 +03:00
|
|
|
#
|
2013-01-04 23:01:54 +04:00
|
|
|
# Copyright (C) 1999-2013 The ViewCVS Group. All Rights Reserved.
|
2002-02-12 00:03:31 +03:00
|
|
|
#
|
|
|
|
# By using this file, you agree to the terms and conditions set forth in
|
2005-12-17 20:19:28 +03:00
|
|
|
# the LICENSE.html file which can be found at the top level of the ViewVC
|
|
|
|
# distribution or at http://viewvc.org/license-1.html.
|
2002-02-12 00:03:31 +03:00
|
|
|
#
|
2006-03-18 05:07:36 +03:00
|
|
|
# For more information, visit http://viewvc.org/
|
2002-02-12 00:03:31 +03:00
|
|
|
#
|
|
|
|
# -----------------------------------------------------------------------
|
|
|
|
#
|
|
|
|
# This file was originally based on portions of the blame.py script by
|
|
|
|
# Curt Hagenlocher.
|
|
|
|
#
|
|
|
|
# -----------------------------------------------------------------------
|
|
|
|
|
|
|
|
import string
|
2002-02-13 15:17:38 +03:00
|
|
|
import common
|
|
|
|
|
2002-02-12 00:03:31 +03:00
|
|
|
class _TokenStream:
|
2010-05-20 18:56:08 +04:00
|
|
|
token_term = string.whitespace + ";:"
|
|
|
|
try:
|
|
|
|
token_term = frozenset(token_term)
|
|
|
|
except NameError:
|
|
|
|
pass
|
2002-02-12 00:03:31 +03:00
|
|
|
|
|
|
|
# the algorithm is about the same speed for any CHUNK_SIZE chosen.
|
|
|
|
# grab a good-sized chunk, but not too large to overwhelm memory.
|
2002-02-13 15:17:38 +03:00
|
|
|
# note: we use a multiple of a standard block size
|
|
|
|
CHUNK_SIZE = 192 * 512 # about 100k
|
2002-02-12 00:03:31 +03:00
|
|
|
|
2007-04-18 01:07:33 +04:00
|
|
|
# CHUNK_SIZE = 5 # for debugging, make the function grind...
|
2002-02-12 00:03:31 +03:00
|
|
|
|
|
|
|
def __init__(self, file):
|
|
|
|
self.rcsfile = file
|
|
|
|
self.idx = 0
|
|
|
|
self.buf = self.rcsfile.read(self.CHUNK_SIZE)
|
|
|
|
if self.buf == '':
|
|
|
|
raise RuntimeError, 'EOF'
|
|
|
|
|
|
|
|
def get(self):
|
|
|
|
"Get the next token from the RCS file."
|
|
|
|
|
|
|
|
# Note: we can afford to loop within Python, examining individual
|
|
|
|
# characters. For the whitespace and tokens, the number of iterations
|
|
|
|
# is typically quite small. Thus, a simple iterative loop will beat
|
|
|
|
# out more complex solutions.
|
|
|
|
|
|
|
|
buf = self.buf
|
2010-05-20 18:56:08 +04:00
|
|
|
lbuf = len(buf)
|
2002-02-12 00:03:31 +03:00
|
|
|
idx = self.idx
|
|
|
|
|
|
|
|
while 1:
|
2010-05-20 18:56:08 +04:00
|
|
|
if idx == lbuf:
|
2002-02-12 00:03:31 +03:00
|
|
|
buf = self.rcsfile.read(self.CHUNK_SIZE)
|
|
|
|
if buf == '':
|
|
|
|
# signal EOF by returning None as the token
|
2007-04-18 01:07:33 +04:00
|
|
|
del self.buf # so we fail if get() is called again
|
2002-02-12 00:03:31 +03:00
|
|
|
return None
|
2010-05-20 18:56:08 +04:00
|
|
|
lbuf = len(buf)
|
2002-02-12 00:03:31 +03:00
|
|
|
idx = 0
|
|
|
|
|
|
|
|
if buf[idx] not in string.whitespace:
|
|
|
|
break
|
|
|
|
|
|
|
|
idx = idx + 1
|
|
|
|
|
2010-05-20 18:56:08 +04:00
|
|
|
if buf[idx] in ';:':
|
2002-02-12 00:03:31 +03:00
|
|
|
self.buf = buf
|
|
|
|
self.idx = idx + 1
|
Resolve some inconsistency in the rcsparse library, concerning the parsing of
colons, which has an impact on symbols and locks.
Fixes a bug where the texttools backend would fail to parse RCS files
containing locks.
Fixes inelegance in the default backend, which no longer needs to override a
rather large chunk of the common code.
* lib/vclib/ccvs/rcsparse/default.py
(_TokenStream.token_term): Add colon to set.
(_TokenStream.get): Handle colon as a discrete token, just like semicolon.
(Parser.parse_rcs_admin): Remove override definition entirely.
* lib/vclib/ccvs/rcsparse/common.py
(_Parser.parse_rcs_admin): Fix "locks" clause to be consistent with colon
being a token.
Lastly, the tparse backend: the changes made here are roughly congruent to
those made to the default backend, however they are completely untested, since
the current tparse in the repository seems broken - it dies due to memory
corruption.
* tparse/tparse.cpp (Token_term): Add colon to set.
(TokenParser::get): Handle colon as a discrete token, just like semicolon.
(tparseParser::parse_rcs_admin): Remove bizarre code which attempted to
handle both the case of being, and not being, a discrete token via runtime
detection.
git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@1371 8cb11bc2-c004-0410-86c3-e597b4017df7
2006-05-27 03:18:46 +04:00
|
|
|
return buf[idx]
|
2002-02-12 00:03:31 +03:00
|
|
|
|
|
|
|
if buf[idx] != '@':
|
|
|
|
end = idx + 1
|
|
|
|
token = ''
|
|
|
|
while 1:
|
|
|
|
# find token characters in the current buffer
|
2010-05-20 18:56:08 +04:00
|
|
|
while end < lbuf and buf[end] not in self.token_term:
|
2002-02-12 00:03:31 +03:00
|
|
|
end = end + 1
|
|
|
|
token = token + buf[idx:end]
|
|
|
|
|
2010-05-20 18:56:08 +04:00
|
|
|
if end < lbuf:
|
2002-02-12 00:03:31 +03:00
|
|
|
# we stopped before the end, so we have a full token
|
|
|
|
idx = end
|
|
|
|
break
|
|
|
|
|
|
|
|
# we stopped at the end of the buffer, so we may have a partial token
|
|
|
|
buf = self.rcsfile.read(self.CHUNK_SIZE)
|
2010-05-20 18:56:08 +04:00
|
|
|
lbuf = len(buf)
|
2002-02-12 00:03:31 +03:00
|
|
|
idx = end = 0
|
|
|
|
|
|
|
|
self.buf = buf
|
|
|
|
self.idx = idx
|
|
|
|
return token
|
|
|
|
|
|
|
|
# a "string" which starts with the "@" character. we'll skip it when we
|
|
|
|
# search for content.
|
|
|
|
idx = idx + 1
|
|
|
|
|
|
|
|
chunks = [ ]
|
|
|
|
|
|
|
|
while 1:
|
2010-05-20 18:56:08 +04:00
|
|
|
if idx == lbuf:
|
2002-02-12 00:03:31 +03:00
|
|
|
idx = 0
|
|
|
|
buf = self.rcsfile.read(self.CHUNK_SIZE)
|
|
|
|
if buf == '':
|
|
|
|
raise RuntimeError, 'EOF'
|
2010-05-20 18:56:08 +04:00
|
|
|
lbuf = len(buf)
|
2002-02-12 00:03:31 +03:00
|
|
|
i = string.find(buf, '@', idx)
|
|
|
|
if i == -1:
|
|
|
|
chunks.append(buf[idx:])
|
2010-05-20 18:56:08 +04:00
|
|
|
idx = lbuf
|
2002-02-12 00:03:31 +03:00
|
|
|
continue
|
2010-05-20 18:56:08 +04:00
|
|
|
if i == lbuf - 1:
|
2002-02-12 00:03:31 +03:00
|
|
|
chunks.append(buf[idx:i])
|
|
|
|
idx = 0
|
|
|
|
buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
|
|
|
|
if buf == '@':
|
|
|
|
raise RuntimeError, 'EOF'
|
2010-05-20 18:56:08 +04:00
|
|
|
lbuf = len(buf)
|
2002-02-12 00:03:31 +03:00
|
|
|
continue
|
|
|
|
if buf[i + 1] == '@':
|
|
|
|
chunks.append(buf[idx:i+1])
|
|
|
|
idx = i + 2
|
|
|
|
continue
|
|
|
|
|
|
|
|
chunks.append(buf[idx:i])
|
|
|
|
|
|
|
|
self.buf = buf
|
|
|
|
self.idx = i + 1
|
|
|
|
|
|
|
|
return string.join(chunks, '')
|
|
|
|
|
|
|
|
# _get = get
|
|
|
|
# def get(self):
|
|
|
|
token = self._get()
|
|
|
|
print 'T:', `token`
|
|
|
|
return token
|
|
|
|
|
|
|
|
def match(self, match):
|
|
|
|
"Try to match the next token from the input buffer."
|
|
|
|
|
|
|
|
token = self.get()
|
|
|
|
if token != match:
|
2007-11-18 23:56:26 +03:00
|
|
|
raise common.RCSExpected(token, match)
|
2002-02-12 00:03:31 +03:00
|
|
|
|
|
|
|
def unget(self, token):
|
|
|
|
"Put this token back, for the next get() to return."
|
|
|
|
|
|
|
|
# Override the class' .get method with a function which clears the
|
|
|
|
# overridden method then returns the pushed token. Since this function
|
|
|
|
# will not be looked up via the class mechanism, it should be a "normal"
|
|
|
|
# function, meaning it won't have "self" automatically inserted.
|
|
|
|
# Therefore, we need to pass both self and the token thru via defaults.
|
|
|
|
|
|
|
|
# note: we don't put this into the input buffer because it may have been
|
|
|
|
# @-unescaped already.
|
|
|
|
|
|
|
|
def give_it_back(self=self, token=token):
|
|
|
|
del self.get
|
|
|
|
return token
|
|
|
|
|
|
|
|
self.get = give_it_back
|
|
|
|
|
|
|
|
def mget(self, count):
|
|
|
|
"Return multiple tokens. 'next' is at the end."
|
|
|
|
result = [ ]
|
|
|
|
for i in range(count):
|
|
|
|
result.append(self.get())
|
|
|
|
result.reverse()
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
2002-02-13 15:17:38 +03:00
|
|
|
class Parser(common._Parser):
|
|
|
|
stream_class = _TokenStream
|