168 lines
4.5 KiB
Python
168 lines
4.5 KiB
Python
|
# -*-python-*-
|
||
|
#
|
||
|
# Copyright (C) 1999-2007 The ViewCVS Group. All Rights Reserved.
|
||
|
#
|
||
|
# By using this file, you agree to the terms and conditions set forth in
|
||
|
# the LICENSE.html file which can be found at the top level of the ViewVC
|
||
|
# distribution or at http://viewvc.org/license-1.html.
|
||
|
#
|
||
|
# For more information, visit http://viewvc.org/
|
||
|
#
|
||
|
# -----------------------------------------------------------------------
|
||
|
#
|
||
|
# This file was originally based on portions of the blame.py script by
|
||
|
# Curt Hagenlocher.
|
||
|
#
|
||
|
# -----------------------------------------------------------------------
|
||
|
|
||
|
import string
|
||
|
import common
|
||
|
|
||
|
class _TokenStream:
|
||
|
token_term = string.whitespace + ';:'
|
||
|
|
||
|
# the algorithm is about the same speed for any CHUNK_SIZE chosen.
|
||
|
# grab a good-sized chunk, but not too large to overwhelm memory.
|
||
|
# note: we use a multiple of a standard block size
|
||
|
CHUNK_SIZE = 192 * 512 # about 100k
|
||
|
|
||
|
# CHUNK_SIZE = 5 # for debugging, make the function grind...
|
||
|
|
||
|
def __init__(self, file):
|
||
|
self.rcsfile = file
|
||
|
self.idx = 0
|
||
|
self.buf = self.rcsfile.read(self.CHUNK_SIZE)
|
||
|
if self.buf == '':
|
||
|
raise RuntimeError, 'EOF'
|
||
|
|
||
|
def get(self):
|
||
|
"Get the next token from the RCS file."
|
||
|
|
||
|
# Note: we can afford to loop within Python, examining individual
|
||
|
# characters. For the whitespace and tokens, the number of iterations
|
||
|
# is typically quite small. Thus, a simple iterative loop will beat
|
||
|
# out more complex solutions.
|
||
|
|
||
|
buf = self.buf
|
||
|
idx = self.idx
|
||
|
|
||
|
while 1:
|
||
|
if idx == len(buf):
|
||
|
buf = self.rcsfile.read(self.CHUNK_SIZE)
|
||
|
if buf == '':
|
||
|
# signal EOF by returning None as the token
|
||
|
del self.buf # so we fail if get() is called again
|
||
|
return None
|
||
|
idx = 0
|
||
|
|
||
|
if buf[idx] not in string.whitespace:
|
||
|
break
|
||
|
|
||
|
idx = idx + 1
|
||
|
|
||
|
if buf[idx] == ';' or buf[idx] == ':':
|
||
|
self.buf = buf
|
||
|
self.idx = idx + 1
|
||
|
return buf[idx]
|
||
|
|
||
|
if buf[idx] != '@':
|
||
|
end = idx + 1
|
||
|
token = ''
|
||
|
while 1:
|
||
|
# find token characters in the current buffer
|
||
|
while end < len(buf) and buf[end] not in self.token_term:
|
||
|
end = end + 1
|
||
|
token = token + buf[idx:end]
|
||
|
|
||
|
if end < len(buf):
|
||
|
# we stopped before the end, so we have a full token
|
||
|
idx = end
|
||
|
break
|
||
|
|
||
|
# we stopped at the end of the buffer, so we may have a partial token
|
||
|
buf = self.rcsfile.read(self.CHUNK_SIZE)
|
||
|
idx = end = 0
|
||
|
|
||
|
self.buf = buf
|
||
|
self.idx = idx
|
||
|
return token
|
||
|
|
||
|
# a "string" which starts with the "@" character. we'll skip it when we
|
||
|
# search for content.
|
||
|
idx = idx + 1
|
||
|
|
||
|
chunks = [ ]
|
||
|
|
||
|
while 1:
|
||
|
if idx == len(buf):
|
||
|
idx = 0
|
||
|
buf = self.rcsfile.read(self.CHUNK_SIZE)
|
||
|
if buf == '':
|
||
|
raise RuntimeError, 'EOF'
|
||
|
i = string.find(buf, '@', idx)
|
||
|
if i == -1:
|
||
|
chunks.append(buf[idx:])
|
||
|
idx = len(buf)
|
||
|
continue
|
||
|
if i == len(buf) - 1:
|
||
|
chunks.append(buf[idx:i])
|
||
|
idx = 0
|
||
|
buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
|
||
|
if buf == '@':
|
||
|
raise RuntimeError, 'EOF'
|
||
|
continue
|
||
|
if buf[i + 1] == '@':
|
||
|
chunks.append(buf[idx:i+1])
|
||
|
idx = i + 2
|
||
|
continue
|
||
|
|
||
|
chunks.append(buf[idx:i])
|
||
|
|
||
|
self.buf = buf
|
||
|
self.idx = i + 1
|
||
|
|
||
|
return string.join(chunks, '')
|
||
|
|
||
|
# _get = get
|
||
|
# def get(self):
|
||
|
token = self._get()
|
||
|
print 'T:', `token`
|
||
|
return token
|
||
|
|
||
|
def match(self, match):
|
||
|
"Try to match the next token from the input buffer."
|
||
|
|
||
|
token = self.get()
|
||
|
if token != match:
|
||
|
raise common.RCSExpected(token, match)
|
||
|
|
||
|
def unget(self, token):
|
||
|
"Put this token back, for the next get() to return."
|
||
|
|
||
|
# Override the class' .get method with a function which clears the
|
||
|
# overridden method then returns the pushed token. Since this function
|
||
|
# will not be looked up via the class mechanism, it should be a "normal"
|
||
|
# function, meaning it won't have "self" automatically inserted.
|
||
|
# Therefore, we need to pass both self and the token thru via defaults.
|
||
|
|
||
|
# note: we don't put this into the input buffer because it may have been
|
||
|
# @-unescaped already.
|
||
|
|
||
|
def give_it_back(self=self, token=token):
|
||
|
del self.get
|
||
|
return token
|
||
|
|
||
|
self.get = give_it_back
|
||
|
|
||
|
def mget(self, count):
|
||
|
"Return multiple tokens. 'next' is at the end."
|
||
|
result = [ ]
|
||
|
for i in range(count):
|
||
|
result.append(self.get())
|
||
|
result.reverse()
|
||
|
return result
|
||
|
|
||
|
|
||
|
class Parser(common._Parser):
|
||
|
stream_class = _TokenStream
|