viewvc-4intranet/lib/vclib/ccvs/rcsparse/default.py

# -*-python-*-
#
# Copyright (C) 1999-2007 The ViewCVS Group. All Rights Reserved.
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE.html file which can be found at the top level of the ViewVC
# distribution or at http://viewvc.org/license-1.html.
#
# For more information, visit http://viewvc.org/
#
# -----------------------------------------------------------------------
#
# This file was originally based on portions of the blame.py script by
# Curt Hagenlocher.
#
# -----------------------------------------------------------------------

import string
import common

class _TokenStream:
  token_term = string.whitespace + ';:'

  # the algorithm is about the same speed for any CHUNK_SIZE chosen.
  # grab a good-sized chunk, but not too large to overwhelm memory.
  # note: we use a multiple of a standard block size
  CHUNK_SIZE  = 192 * 512  # about 100k

# CHUNK_SIZE  = 5   # for debugging, make the function grind...

  def __init__(self, file):
    self.rcsfile = file
    self.idx = 0
    self.buf = self.rcsfile.read(self.CHUNK_SIZE)
    if self.buf == '':
      raise RuntimeError, 'EOF'

  def get(self):
    "Get the next token from the RCS file."

    # Note: we can afford to loop within Python, examining individual
    # characters. For the whitespace and tokens, the number of iterations
    # is typically quite small. Thus, a simple iterative loop will beat
    # out more complex solutions.

    buf = self.buf
    idx = self.idx

    while 1:
      if idx == len(buf):
        buf = self.rcsfile.read(self.CHUNK_SIZE)
        if buf == '':
          # signal EOF by returning None as the token
          del self.buf   # so we fail if get() is called again
          return None
        idx = 0

      if buf[idx] not in string.whitespace:
        break

      idx = idx + 1

    if buf[idx] == ';' or buf[idx] == ':':
      self.buf = buf
      self.idx = idx + 1
      return buf[idx]

    if buf[idx] != '@':
      end = idx + 1
      token = ''
      while 1:
        # find token characters in the current buffer
        while end < len(buf) and buf[end] not in self.token_term:
          end = end + 1
        token = token + buf[idx:end]

        if end < len(buf):
          # we stopped before the end, so we have a full token
          idx = end
          break

        # we stopped at the end of the buffer, so we may have a partial token
        buf = self.rcsfile.read(self.CHUNK_SIZE)
        idx = end = 0

      self.buf = buf
      self.idx = idx
      return token

    # a "string" which starts with the "@" character. we'll skip it when we
    # search for content.
    idx = idx + 1

    chunks = [ ]

    while 1:
      if idx == len(buf):
        idx = 0
        buf = self.rcsfile.read(self.CHUNK_SIZE)
        if buf == '':
          raise RuntimeError, 'EOF'
      i = string.find(buf, '@', idx)
      if i == -1:
        chunks.append(buf[idx:])
        idx = len(buf)
        continue
      if i == len(buf) - 1:
        chunks.append(buf[idx:i])
        idx = 0
        buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
        if buf == '@':
          raise RuntimeError, 'EOF'
        continue
      if buf[i + 1] == '@':
        chunks.append(buf[idx:i+1])
        idx = i + 2
        continue

      chunks.append(buf[idx:i])

      self.buf = buf
      self.idx = i + 1

      return string.join(chunks, '')

#  _get = get
#  def get(self):
    token = self._get()
    print 'T:', `token`
    return token

  def match(self, match):
    "Try to match the next token from the input buffer."

    token = self.get()
    if token != match:
      raise common.RCSExpected(token, match)

  def unget(self, token):
    "Put this token back, for the next get() to return."

    # Override the class' .get method with a function which clears the
    # overridden method then returns the pushed token. Since this function
    # will not be looked up via the class mechanism, it should be a "normal"
    # function, meaning it won't have "self" automatically inserted.
    # Therefore, we need to pass both self and the token thru via defaults.

    # note: we don't put this into the input buffer because it may have been
    # @-unescaped already.

    def give_it_back(self=self, token=token):
      del self.get
      return token

    self.get = give_it_back

  def mget(self, count):
    "Return multiple tokens. 'next' is at the end."
    result = [ ]
    for i in range(count):
      result.append(self.get())
    result.reverse()
    return result


class Parser(common._Parser):
  stream_class = _TokenStream
bug 37020 viewvc 1.1.0-beta1 initial commit git-svn-id: svn://svn.office.custis.ru/3rdparty/viewvc.org/trunk@4 6955db30-a419-402b-8a0d-67ecbb4d7f56 2008-11-11 17:17:41 +03:00			`# --python--`
			`#`
			`# Copyright (C) 1999-2007 The ViewCVS Group. All Rights Reserved.`
			`#`
			`# By using this file, you agree to the terms and conditions set forth in`
			`# the LICENSE.html file which can be found at the top level of the ViewVC`
			`# distribution or at http://viewvc.org/license-1.html.`
			`#`
			`# For more information, visit http://viewvc.org/`
			`#`
			`# -----------------------------------------------------------------------`
			`#`
			`# This file was originally based on portions of the blame.py script by`
			`# Curt Hagenlocher.`
			`#`
			`# -----------------------------------------------------------------------`

			`import string`
			`import common`

			`class _TokenStream:`
			`token_term = string.whitespace + ';:'`

			`# the algorithm is about the same speed for any CHUNK_SIZE chosen.`
			`# grab a good-sized chunk, but not too large to overwhelm memory.`
			`# note: we use a multiple of a standard block size`
			`CHUNK_SIZE = 192 * 512 # about 100k`

			`# CHUNK_SIZE = 5 # for debugging, make the function grind...`

			`def __init__(self, file):`
			`self.rcsfile = file`
			`self.idx = 0`
			`self.buf = self.rcsfile.read(self.CHUNK_SIZE)`
			`if self.buf == '':`
			`raise RuntimeError, 'EOF'`

			`def get(self):`
			`"Get the next token from the RCS file."`

			`# Note: we can afford to loop within Python, examining individual`
			`# characters. For the whitespace and tokens, the number of iterations`
			`# is typically quite small. Thus, a simple iterative loop will beat`
			`# out more complex solutions.`

			`buf = self.buf`
			`idx = self.idx`

			`while 1:`
			`if idx == len(buf):`
			`buf = self.rcsfile.read(self.CHUNK_SIZE)`
			`if buf == '':`
			`# signal EOF by returning None as the token`
			`del self.buf # so we fail if get() is called again`
			`return None`
			`idx = 0`

			`if buf[idx] not in string.whitespace:`
			`break`

			`idx = idx + 1`

			`if buf[idx] == ';' or buf[idx] == ':':`
			`self.buf = buf`
			`self.idx = idx + 1`
			`return buf[idx]`

			`if buf[idx] != '@':`
			`end = idx + 1`
			`token = ''`
			`while 1:`
			`# find token characters in the current buffer`
			`while end < len(buf) and buf[end] not in self.token_term:`
			`end = end + 1`
			`token = token + buf[idx:end]`

			`if end < len(buf):`
			`# we stopped before the end, so we have a full token`
			`idx = end`
			`break`

			`# we stopped at the end of the buffer, so we may have a partial token`
			`buf = self.rcsfile.read(self.CHUNK_SIZE)`
			`idx = end = 0`

			`self.buf = buf`
			`self.idx = idx`
			`return token`

			`# a "string" which starts with the "@" character. we'll skip it when we`
			`# search for content.`
			`idx = idx + 1`

			`chunks = [ ]`

			`while 1:`
			`if idx == len(buf):`
			`idx = 0`
			`buf = self.rcsfile.read(self.CHUNK_SIZE)`
			`if buf == '':`
			`raise RuntimeError, 'EOF'`
			`i = string.find(buf, '@', idx)`
			`if i == -1:`
			`chunks.append(buf[idx:])`
			`idx = len(buf)`
			`continue`
			`if i == len(buf) - 1:`
			`chunks.append(buf[idx:i])`
			`idx = 0`
			`buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)`
			`if buf == '@':`
			`raise RuntimeError, 'EOF'`
			`continue`
			`if buf[i + 1] == '@':`
			`chunks.append(buf[idx:i+1])`
			`idx = i + 2`
			`continue`

			`chunks.append(buf[idx:i])`

			`self.buf = buf`
			`self.idx = i + 1`

			`return string.join(chunks, '')`

			`# _get = get`
			`# def get(self):`
			`token = self._get()`
			print 'T:', `token`
			`return token`

			`def match(self, match):`
			`"Try to match the next token from the input buffer."`

			`token = self.get()`
			`if token != match:`
			`raise common.RCSExpected(token, match)`

			`def unget(self, token):`
			`"Put this token back, for the next get() to return."`

			`# Override the class' .get method with a function which clears the`
			`# overridden method then returns the pushed token. Since this function`
			`# will not be looked up via the class mechanism, it should be a "normal"`
			`# function, meaning it won't have "self" automatically inserted.`
			`# Therefore, we need to pass both self and the token thru via defaults.`

			`# note: we don't put this into the input buffer because it may have been`
			`# @-unescaped already.`

			`def give_it_back(self=self, token=token):`
			`del self.get`
			`return token`

			`self.get = give_it_back`

			`def mget(self, count):`
			`"Return multiple tokens. 'next' is at the end."`
			`result = [ ]`
			`for i in range(count):`
			`result.append(self.get())`
			`result.reverse()`
			`return result`


			`class Parser(common._Parser):`
			`stream_class = _TokenStream`