325 lines
8.0 KiB
Python
325 lines
8.0 KiB
Python
# -*-python-*-
|
|
#
|
|
# Copyright (C) 1999-2007 The ViewCVS Group. All Rights Reserved.
|
|
#
|
|
# By using this file, you agree to the terms and conditions set forth in
|
|
# the LICENSE.html file which can be found at the top level of the ViewVC
|
|
# distribution or at http://viewvc.org/license-1.html.
|
|
#
|
|
# For more information, visit http://viewvc.org/
|
|
#
|
|
# -----------------------------------------------------------------------
|
|
|
|
"""common.py: common classes and functions for the RCS parsing tools."""
|
|
|
|
import calendar
|
|
import string
|
|
|
|
class Sink:
|
|
def set_head_revision(self, revision):
|
|
pass
|
|
|
|
def set_principal_branch(self, branch_name):
|
|
pass
|
|
|
|
def set_access(self, accessors):
|
|
pass
|
|
|
|
def define_tag(self, name, revision):
|
|
pass
|
|
|
|
def set_locker(self, revision, locker):
|
|
pass
|
|
|
|
def set_locking(self, mode):
|
|
"""Used to signal locking mode.
|
|
|
|
Called with mode argument 'strict' if strict locking
|
|
Not called when no locking used."""
|
|
|
|
pass
|
|
|
|
def set_comment(self, comment):
|
|
pass
|
|
|
|
def set_expansion(self, mode):
|
|
pass
|
|
|
|
def admin_completed(self):
|
|
pass
|
|
|
|
def define_revision(self, revision, timestamp, author, state,
|
|
branches, next):
|
|
pass
|
|
|
|
def tree_completed(self):
|
|
pass
|
|
|
|
def set_description(self, description):
|
|
pass
|
|
|
|
def set_revision_info(self, revision, log, text):
|
|
pass
|
|
|
|
def parse_completed(self):
|
|
pass
|
|
|
|
|
|
# --------------------------------------------------------------------------
|
|
#
|
|
# EXCEPTIONS USED BY RCSPARSE
|
|
#
|
|
|
|
class RCSParseError(Exception):
|
|
pass
|
|
|
|
|
|
class RCSIllegalCharacter(RCSParseError):
|
|
pass
|
|
|
|
|
|
class RCSExpected(RCSParseError):
|
|
def __init__(self, got, wanted):
|
|
RCSParseError.__init__(
|
|
self,
|
|
'Unexpected parsing error in RCS file.\n'
|
|
'Expected token: %s, but saw: %s'
|
|
% (wanted, got)
|
|
)
|
|
|
|
|
|
class RCSStopParser(Exception):
|
|
pass
|
|
|
|
|
|
# --------------------------------------------------------------------------
|
|
#
|
|
# STANDARD TOKEN STREAM-BASED PARSER
|
|
#
|
|
|
|
class _Parser:
|
|
stream_class = None # subclasses need to define this
|
|
|
|
def _read_until_semicolon(self):
|
|
"""Read all tokens up to and including the next semicolon token.
|
|
|
|
Return the tokens (not including the semicolon) as a list."""
|
|
|
|
tokens = []
|
|
|
|
while 1:
|
|
token = self.ts.get()
|
|
if token == ';':
|
|
break
|
|
tokens.append(token)
|
|
|
|
return tokens
|
|
|
|
def _parse_admin_head(self, token):
|
|
rev = self.ts.get()
|
|
if rev == ';':
|
|
# The head revision is not specified. Just drop the semicolon
|
|
# on the floor.
|
|
pass
|
|
else:
|
|
self.sink.set_head_revision(rev)
|
|
self.ts.match(';')
|
|
|
|
def _parse_admin_branch(self, token):
|
|
branch = self.ts.get()
|
|
if branch != ';':
|
|
self.sink.set_principal_branch(branch)
|
|
self.ts.match(';')
|
|
|
|
def _parse_admin_access(self, token):
|
|
accessors = self._read_until_semicolon()
|
|
if accessors:
|
|
self.sink.set_access(accessors)
|
|
|
|
def _parse_admin_symbols(self, token):
|
|
while 1:
|
|
tag_name = self.ts.get()
|
|
if tag_name == ';':
|
|
break
|
|
self.ts.match(':')
|
|
tag_rev = self.ts.get()
|
|
self.sink.define_tag(tag_name, tag_rev)
|
|
|
|
def _parse_admin_locks(self, token):
|
|
while 1:
|
|
locker = self.ts.get()
|
|
if locker == ';':
|
|
break
|
|
self.ts.match(':')
|
|
rev = self.ts.get()
|
|
self.sink.set_locker(rev, locker)
|
|
|
|
def _parse_admin_strict(self, token):
|
|
self.sink.set_locking("strict")
|
|
self.ts.match(';')
|
|
|
|
def _parse_admin_comment(self, token):
|
|
self.sink.set_comment(self.ts.get())
|
|
self.ts.match(';')
|
|
|
|
def _parse_admin_expand(self, token):
|
|
expand_mode = self.ts.get()
|
|
self.sink.set_expansion(expand_mode)
|
|
self.ts.match(';')
|
|
|
|
admin_token_map = {
|
|
'head' : _parse_admin_head,
|
|
'branch' : _parse_admin_branch,
|
|
'access' : _parse_admin_access,
|
|
'symbols' : _parse_admin_symbols,
|
|
'locks' : _parse_admin_locks,
|
|
'strict' : _parse_admin_strict,
|
|
'comment' : _parse_admin_comment,
|
|
'expand' : _parse_admin_expand,
|
|
'desc' : None,
|
|
}
|
|
|
|
def parse_rcs_admin(self):
|
|
while 1:
|
|
# Read initial token at beginning of line
|
|
token = self.ts.get()
|
|
|
|
try:
|
|
f = self.admin_token_map[token]
|
|
except KeyError:
|
|
# We're done once we reach the description of the RCS tree
|
|
if token[0] in string.digits:
|
|
self.ts.unget(token)
|
|
return
|
|
else:
|
|
# Chew up "newphrase"
|
|
# warn("Unexpected RCS token: $token\n")
|
|
pass
|
|
else:
|
|
if f is None:
|
|
self.ts.unget(token)
|
|
return
|
|
else:
|
|
f(self, token)
|
|
|
|
def _parse_rcs_tree_entry(self, revision):
|
|
# Parse date
|
|
self.ts.match('date')
|
|
date = self.ts.get()
|
|
self.ts.match(';')
|
|
|
|
# Convert date into timestamp
|
|
date_fields = string.split(date, '.')
|
|
# According to rcsfile(5): the year "contains just the last two
|
|
# digits of the year for years from 1900 through 1999, and all the
|
|
# digits of years thereafter".
|
|
if len(date_fields[0]) == 2:
|
|
date_fields[0] = '19' + date_fields[0]
|
|
date_fields = map(string.atoi, date_fields)
|
|
EPOCH = 1970
|
|
if date_fields[0] < EPOCH:
|
|
raise ValueError, 'invalid year'
|
|
timestamp = calendar.timegm(tuple(date_fields) + (0, 0, 0,))
|
|
|
|
# Parse author
|
|
### NOTE: authors containing whitespace are violations of the
|
|
### RCS specification. We are making an allowance here because
|
|
### CVSNT is known to produce these sorts of authors.
|
|
self.ts.match('author')
|
|
author = ' '.join(self._read_until_semicolon())
|
|
|
|
# Parse state
|
|
self.ts.match('state')
|
|
state = ''
|
|
while 1:
|
|
token = self.ts.get()
|
|
if token == ';':
|
|
break
|
|
state = state + token + ' '
|
|
state = state[:-1] # toss the trailing space
|
|
|
|
# Parse branches
|
|
self.ts.match('branches')
|
|
branches = self._read_until_semicolon()
|
|
|
|
# Parse revision of next delta in chain
|
|
self.ts.match('next')
|
|
next = self.ts.get()
|
|
if next == ';':
|
|
next = None
|
|
else:
|
|
self.ts.match(';')
|
|
|
|
# there are some files with extra tags in them. for example:
|
|
# owner 640;
|
|
# group 15;
|
|
# permissions 644;
|
|
# hardlinks @configure.in@;
|
|
# this is "newphrase" in RCSFILE(5). we just want to skip over these.
|
|
while 1:
|
|
token = self.ts.get()
|
|
if token == 'desc' or token[0] in string.digits:
|
|
self.ts.unget(token)
|
|
break
|
|
# consume everything up to the semicolon
|
|
self._read_until_semicolon()
|
|
|
|
self.sink.define_revision(revision, timestamp, author, state, branches,
|
|
next)
|
|
|
|
def parse_rcs_tree(self):
|
|
while 1:
|
|
revision = self.ts.get()
|
|
|
|
# End of RCS tree description ?
|
|
if revision == 'desc':
|
|
self.ts.unget(revision)
|
|
return
|
|
|
|
self._parse_rcs_tree_entry(revision)
|
|
|
|
def parse_rcs_description(self):
|
|
self.ts.match('desc')
|
|
self.sink.set_description(self.ts.get())
|
|
|
|
def parse_rcs_deltatext(self):
|
|
while 1:
|
|
revision = self.ts.get()
|
|
if revision is None:
|
|
# EOF
|
|
break
|
|
text, sym2, log, sym1 = self.ts.mget(4)
|
|
if sym1 != 'log':
|
|
print `text[:100], sym2[:100], log[:100], sym1[:100]`
|
|
raise RCSExpected(sym1, 'log')
|
|
if sym2 != 'text':
|
|
raise RCSExpected(sym2, 'text')
|
|
### need to add code to chew up "newphrase"
|
|
self.sink.set_revision_info(revision, log, text)
|
|
|
|
def parse(self, file, sink):
|
|
self.ts = self.stream_class(file)
|
|
self.sink = sink
|
|
|
|
self.parse_rcs_admin()
|
|
|
|
# let sink know when the admin section has been completed
|
|
self.sink.admin_completed()
|
|
|
|
self.parse_rcs_tree()
|
|
|
|
# many sinks want to know when the tree has been completed so they can
|
|
# do some work to prep for the arrival of the deltatext
|
|
self.sink.tree_completed()
|
|
|
|
self.parse_rcs_description()
|
|
self.parse_rcs_deltatext()
|
|
|
|
# easiest for us to tell the sink it is done, rather than worry about
|
|
# higher level software doing it.
|
|
self.sink.parse_completed()
|
|
|
|
self.ts = self.sink = None
|
|
|
|
# --------------------------------------------------------------------------
|