Initial revision of the C++ RCS parser.

VS: ----------------------------------------------------------------------


git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@453 8cb11bc2-c004-0410-86c3-e597b4017df7
remotes/tags/1.0.0-rc1
lbruand 2002-01-25 17:36:15 +00:00
parent a25b2de3f9
commit a163e8b5b8
9 changed files with 860 additions and 0 deletions

17
tparse/CHANGES Normal file
View File

@ -0,0 +1,17 @@
Modif ( 25-Jan-2002)
* renamed module as tparse
* wrote distutils Setup.py
* added inline (__doc__) documentation in python module.
Modif ( 24-Jan-2002)
* Implementation of the python exceptions in the C++ code.
* Added an exception to stop the parser.
* Fixed bug that added a "@" in the end of string in certain cases.
Modif ( 21-Jan-2002)
* Extensive testing of the memory leaks
* Started to write the python wrapper. (tparsemodule.cpp & tparsemodule.h)
Creation ( 20-Jan-2002 )
* Implementation of the Token parser in C++ ( tparse.cpp & tparse.h)
* Implementation of the parser itself in C++

6
tparse/README Normal file
View File

@ -0,0 +1,6 @@
TPARSE
What is tparse ?
----------------
TPARSE is a C++ coded RCS file format parser with bindings for the Python scripting language.
It was originally designed after rcsparser.py from Greg Stein and blame.py from Curt Hagenlocher.

12
tparse/Setup.py Normal file
View File

@ -0,0 +1,12 @@
#!/usr/bin/env python
from distutils.core import setup,Extension
setup(name="tparse",
version="1.0",
description="A quick RCS file format parser",
author="Lucas Bruand",
author_email="lbruand@users.sourceforge.net",
url="http://viewcvs.sourceforge.net",
ext_modules=[Extension("tparse", ["tparsemodule.cpp"],libraries=["stdc++"])]
)

52
tparse/sink.py Normal file
View File

@ -0,0 +1,52 @@
import tparse
class Sink:
def set_head_revision(self, revision):
pass
def set_principal_branch(self, branch_name):
pass
def define_tag(self, name, revision):
pass
def set_comment(self, comment):
pass
def set_description(self, description):
pass
def define_revision(self, revision, timestamp, author, state,
branches, next):
pass
def set_revision_info(self, revision, log, text):
pass
def tree_completed(self):
pass
def parse_completed(self):
pass
class DebugSink(Sink):
def set_head_revision(self, revision):
print 'head:', revision
def set_principal_branch(self, branch_name):
print 'branch:', branch_name
def define_tag(self, name, revision):
print 'tag:', name, '=', revision
def set_comment(self, comment):
print 'comment:', comment
def set_description(self, description):
print 'description:', description
def define_revision(self, revision, timestamp, author, state,
branches, next):
print 'revision:', revision
print ' timestamp:', timestamp
print ' author:', author
print ' state:', state
print ' branches:', branches
print ' next:', next
def set_revision_info(self, revision, log, text):
print 'revision:', revision
print ' log:', log
print ' text:', text[:100], '...'

5
tparse/testtp.py Normal file
View File

@ -0,0 +1,5 @@
# This python script permits to test the behaviour of the tparse module.
import sink
import tparse
import sys
tparse.parse(sys.argv[1],sink.DebugSink())

277
tparse/tparse.cpp Normal file
View File

@ -0,0 +1,277 @@
/*
# Copyright (C) 2000-2002 The ViewCVS Group. All Rights Reserved.
# This file has been rewritten in C++ from the rcsparse.py file by
# Lucas Bruand <lucas.bruand@ecl2002.ec-lyon.fr>
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE.html file which can be found at the top level of the ViewCVS
# distribution or at http://viewcvs.sourceforge.net/license-1.html.
#
# Contact information:
# Greg Stein, PO Box 760, Palo Alto, CA, 94302
# gstein@lyra.org, http://viewcvs.sourceforge.net/
#
# -----------------------------------------------------------------------
#
# This software is being maintained as part of the ViewCVS project.
# Information is available at:
# http://viewcvs.sourceforge.net/
#
# This file was originally based on portions of the blame.py script by
# Curt Hagenlocher.
#
# -----------------------------------------------------------------------
#
*/
/*
This C++ library offers an API to a performance oriented RCSFILE parser.
It does little syntax checking.
Version: $Id$
*/
#include "tparse.h"
#define __USE_XOPEN
#include <time.h>
#define Whitespace(c) (c == ' ' || c == '\t' || c == '\014' || c == '\n' || c=='\r')
#define Token_term(c) (c == ' ' || c == '\t' || c == '\014' || c == '\n' || c=='\r' || c==';')
#define isdigit(c) ( (c-'0')<10)
/*--------- Tokenparser class -----------*/
char * TokenParser::get() {
ostrstream ost;
if (backget) {
char *ret;ret=backget;
backget=NULL;
return ret;
}
while (1) {
if (idx==buflength) {
input->read(buf,CHUNK_SIZE);
if ( (buflength=input->gcount())==0 )
return NULL;
idx=0;
}
if (!Whitespace(buf[idx]))
break;
idx++;
}
if (buf[idx]==';') {
idx++;
return semicol;
}
if (buf[idx]!='@') {
int end=idx+1;
while (1) {
while ( (end<buflength) && !(Token_term(buf[end])) )
end++;
ost.write(buf+idx,end-idx);
if (end<buflength) {
idx=end;
ost.put('\0');
return ost.str();
}
input->read(buf,CHUNK_SIZE);
buflength=input->gcount();
idx=0;
end=0;
}
}
idx++;
while (1) {
int i;
if (idx==buflength) {
idx=0;
input->read(buf,CHUNK_SIZE);
if ( (buflength=input->gcount())==0 )
throw tparseException(" Unterminated string \"@\" missing!");
}
//i=strchr(buf+idx,'@');
for(i=idx;i<buflength && (buf[i]!='@');i++) ;
if (i==buflength) {
if ((buflength-idx)>0)
ost.write(buf+idx,buflength-idx);
idx= buflength;
continue;
}
if ( i==buflength-1) {
ost.write(buf+idx,i-idx+1);
idx=0;
buf[0]='@';
input->read(buf+1,CHUNK_SIZE-1);
if ( (buflength=input->gcount())==0 )
throw tparseException("Unterminated string; @ missing");
buflength++;
continue;
}
if (buf[i+1]=='@') {
ost.write(buf+idx,i-idx+1);
idx=i+2;
continue;
}
if ((i-idx)>0)
ost.write(buf+idx,i-idx);
idx=i+1;
ost.put('\0');
return ost.str();
}
};
void TokenParser::unget(char *token) {
if (backget) {
throw tparseException(" Error, ungetting a token while already having an ungetted token ");
}
backget=token;
}
/*--------- tparseParser class -----------*/
int tparseParser::parse_rcs_admin() {
while (1) {
char *token =tokenstream->get();
if (isdigit(token[0])) {
tokenstream->unget(token);
return 0;
}
if (strcmp(token,"head")==0) {
if (sink->set_head_revision(tokenstream->get())) { delstr(token);return 1;}
tokenstream->matchsemicol();
}
if (strcmp(token,"branch")==0) {
if (sink->set_principal_branch(tokenstream->get())) { delstr(token);return 1;}
tokenstream->matchsemicol();
}
if (strcmp(token,"symbols")==0) {
while (1) {
char *tag = tokenstream->get();
char *second;
if (tag==tokenstream->semicol) break;
second=index(tag,':');
second[0]='\0';
second++;
if (sink->define_tag(tag,second)) { delstr(token);return 1;}
}
}
if (strcmp(token,"comment")==0) {
if (sink->set_comment(tokenstream->get())) { delstr(token);return 1;}
tokenstream->matchsemicol();
}
if ((strcmp(token,"locks")==0) ||
(strcmp(token,"strict")==0) ||
(strcmp(token,"expand")==0) ||
(strcmp(token,"access")==0)) {
while (1) {
char *tag=tokenstream->get();
if (tag==tokenstream->semicol) break;
delstr(tag);
}
}
delstr(token);
}
};
int tparseParser::parse_rcs_tree() {
while (1) {
char *revision;
char *date;
long timestamp;
char *author;
ostrstream *state;
char *hstate;
char *next;
Branche *branches=NULL;
struct tm tm;
revision=tokenstream->get();
if (strcmp(revision, "desc") ==0) {
tokenstream->unget(revision);
return 0;
}
// Parse date
tokenstream->match("date");
date = tokenstream->get();
tokenstream->matchsemicol();
memset ((void *) &tm, 0, sizeof(struct tm));
strptime(date, "%Y.%m.%d.%H.%M.%S", &tm);
timestamp=mktime(&tm);delstr(date);
tokenstream->match("author");
author= tokenstream->get();
tokenstream->matchsemicol();
tokenstream->match("state");
while (1) {
char *token=tokenstream->get();
if (token==tokenstream->semicol) {
break;
}
state= new ostrstream();
(*state)<<token;delstr(token);
(*state)<<" ";
}
state->put('\0');
hstate=state->str();
delete state;
state=NULL;
tokenstream->match("branches");
while (1) {
char *token=tokenstream->get();
if (token==tokenstream->semicol) {
break;
}
if (branches == NULL)
branches=new Branche(token,NULL);
else
branches=new Branche(token,branches->next);
}
tokenstream->match("next");
next= tokenstream->get();
if (next==tokenstream->semicol) next =NULL;
else tokenstream->matchsemicol();
/**
* there are some files with extra tags in them. for example:
* owner 640;
* group 15;
* permissions 644;
* hardlinks @configure.in@;
* this is "newphrase" in RCSFILE(5). we just want to skip over these.
**/
while (1) {
char *token = tokenstream->get();
if ( (strcmp(token,"desc")==0) || isdigit(token[0]) ) {
tokenstream->unget(token);
break;
};
delstr(token);
while ( tokenstream->get() !=tokenstream->semicol);
}
if (sink->define_revision(revision,timestamp,author, hstate, branches,next)) return 1;
}
return 0;
}
int tparseParser::parse_rcs_description() {
tokenstream->match("desc");
if (this->sink->set_description(tokenstream->get())) return 1;
return 0;
}
int tparseParser::parse_rcs_deltatext() {
char *revision;
char *log;
char *text;
while (1) {
revision = tokenstream->get();
if (revision==NULL)
break;
tokenstream->match("log");
log = tokenstream->get();
tokenstream->match("text");
text = tokenstream->get();
if (sink->set_revision_info(revision,log,text)) return 1;
}
return 0;
}

211
tparse/tparse.h Normal file
View File

@ -0,0 +1,211 @@
/*
# Copyright (C) 2000-2002 The ViewCVS Group. All Rights Reserved.
# This file has been rewritten in C++ from the rcsparse.py file by
# Lucas Bruand <lucas.bruand@ecl2002.ec-lyon.fr>
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE.html file which can be found at the top level of the ViewCVS
# distribution or at http://viewcvs.sourceforge.net/license-1.html.
#
# Contact information:
# Greg Stein, PO Box 760, Palo Alto, CA, 94302
# gstein@lyra.org, http://viewcvs.sourceforge.net/
#
# -----------------------------------------------------------------------
#
# This software is being maintained as part of the ViewCVS project.
# Information is available at:
# http://viewcvs.sourceforge.net/
#
# This file was originally based on portions of the blame.py script by
# Curt Hagenlocher.
#
# -----------------------------------------------------------------------
#
*/
/*
This C++ library offers an API to a performance-oriented RCSFILE parser.
It does little syntax checking.
Version: $Id$
*/
#define CHUNK_SIZE 30000
#ifndef __PARSE_H
#define __PARSE_H
#include <iostream.h>
#include <strstream.h>
#include <stdio.h>
#include <fstream.h>
#include <string.h>
#include <stdlib.h>
#define delstr(a) if (a!=NULL) {delete [] a;a=NULL;};
/* This class represents a exception that occured during the parsing of a file */
class tparseException {
char *value;
public:
tparseException(char *myvalue) { value=myvalue; };
char *getvalue() { return value; };
};
/* This class is used to stored a list of the branches of a revision */
class Branche {
public:
char *name;
Branche *next;
Branche(char *myname, Branche *mynext) {
name=myname;
next=mynext;
};
~Branche() {
delstr(name);
name=NULL;
if (next!=NULL) delete next;
next=NULL;
};
};
/* This class is a handler that receive the event generated by the parser
i.e.: When we reach the head revision tag, etc... */
class Sink {
public:
Sink() {};
virtual int set_head_revision(char * revision) {
cout<<" set head revision : "<<revision<<endl;
delstr(revision);
return 0;
};
virtual int set_principal_branch(char *branch_name) {
cout<<" set principal branch : "<<branch_name<<endl;
delstr(branch_name);
return 0;
};
virtual int define_tag(char *name, char *revision) {
cout<< " Tag: name="<<name<<" revision: "<<revision<<endl;
delstr(name);
return 0;
};
virtual int set_comment(char *comment) {
cout<<" Comment: "<<comment<<endl;
delstr(comment);
return 0;
};
virtual int set_description(char *description) {
cout<<"description :"<<description<<endl;
delstr(description);
return 0;
};
virtual int define_revision(char *revision, long timestamp, char *author, char *state, Branche *branches, char *next) {
Branche *move;
Branche *anc;
cout<<" Define_revision :"<<endl;
cout<<" |-revision = "<<revision<<endl;delstr(revision);
cout<<" |-timestamp= "<<timestamp<<endl;
cout<<" |-author = "<<author<<endl;delstr( author);
cout<<" |-state = "<<state<<endl;delstr(state);
cout<<" |-branches = ";
move=branches;
while (move!=NULL) {
cout<<move->name<<", ";
anc=move;
move=move->next;
};
if (branches!=NULL) delete branches;
cout<<endl;
cout<<" |-next = "<<next<<endl<<endl;delstr(next);
return 0;
};
virtual int set_revision_info(char *revision, char *log, char *text)
{
cout << "set revision info :"<<revision<<endl;
cout << "log :"<<log<<endl;
cout <<"----text----"<<endl;
cout << text;
cout <<"----text----"<<endl;
delstr(log);
delstr(text);
delstr(revision);
return 0;
};
virtual int tree_completed() { cout <<" tree completed"<<endl;return 0;};
virtual int parse_completed() {cout <<" parse completed"<<endl;return 0;};
};
/* The class is used to get one by one every token in the file. */
class TokenParser {
private:
istream *input;
char buf[CHUNK_SIZE];
int buflength;
int idx;
char *backget;
public:
char *semicol;
char *get();
void unget(char *token);
int eof() {
return (input->gcount()==0);
};
void matchsemicol() {
char *ptr=get();
if (ptr!=semicol) throw tparseException(" Incorrect syntax in the RCSFILE parsed!");
};
void match(char *token) {
char *ptr;
if (strcmp(ptr=get(),token)!=0) throw tparseException(" Incorrect syntax in the RCSFILE parsed!");
delstr( ptr);
};
TokenParser(istream *myinput) {
input=myinput;
backget=NULL;
idx=0;semicol=";";
input->read(buf,CHUNK_SIZE);
if ( (buflength=input->gcount())==0 )
throw tparseException("Non-existing file or empty file");
};
~TokenParser() {
if (input!=NULL) { delete input;input=NULL; };
};
};
/* this is the class that does the actual job:
by reading each part of the file and thus generate events to a sink event-handler*/
class tparseParser {
private:
TokenParser *tokenstream;
Sink *sink;
int parse_rcs_admin();
int parse_rcs_tree();
int parse_rcs_description();
int parse_rcs_deltatext();
public:
tparseParser(ifstream *myinput,Sink* mysink) {
sink=mysink;
tokenstream= new TokenParser(myinput);
if (parse_rcs_admin()) return;
if (parse_rcs_tree()) return;
// many sinks want to know when the tree has been completed so they can
// do some work to prep for the arrival of the deltatext
if (sink->tree_completed()) return;
if (parse_rcs_description()) return;
if (parse_rcs_deltatext()) return;
// easiest for us to tell the sink it is done, rather than worry about
// higher level software doing it.
if (sink->parse_completed()) return;
}
~tparseParser() {
delete tokenstream;
delete sink;
}
};
#endif

231
tparse/tparsemodule.cpp Normal file
View File

@ -0,0 +1,231 @@
/*
# Copyright (C) 2000-2002 The ViewCVS Group. All Rights Reserved.
# This file has been rewritten in C++ from the rcsparse.py file by
# Lucas Bruand <lucas.bruand@ecl2002.ec-lyon.fr>
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE.html file which can be found at the top level of the ViewCVS
# distribution or at http://viewcvs.sourceforge.net/license-1.html.
#
# Contact information:
# Greg Stein, PO Box 760, Palo Alto, CA, 94302
# gstein@lyra.org, http://viewcvs.sourceforge.net/
#
# -----------------------------------------------------------------------
#
# This software is being maintained as part of the ViewCVS project.
# Information is available at:
# http://viewcvs.sourceforge.net/
#
# This file was originally based on portions of the blame.py script by
# Curt Hagenlocher.
#
# -----------------------------------------------------------------------
#
*/
/*
this python extension module is a binding to the tparse library.
tparse is a C++ library that offers an API to a performance-oriented RCSFILE parser.
It does little syntax checking.
Version: $Id$
*/
#include <Python.h>
#include <stdiostream.h>
#include "tparsemodule.h"
#include "tparse.cpp"
static PyMethodDef tparseMethods[] = {
{"parse", tparse, METH_VARARGS, tparse__doc__},
{NULL, NULL} /* Sentinel */
};
void inittparse()
{
PyObject *m, *d;
m= Py_InitModule3("tparse", tparseMethods,__doc__);
d = PyModule_GetDict(m);
StopParser = PyErr_NewException("tparse.stopparser", NULL, NULL);
PyObject_SetAttrString(StopParser,"__doc__",PyString_FromString(StopParser__doc__));
PyDict_SetItemString(d, "stopparser", StopParser);
}
class PythonException {
public:
PythonException() {};
};
class PythonSink : public Sink {
public:
PyObject *sink;
PythonSink(PyObject *mysink)
{ sink=mysink;};
int set_head_revision(char * revision)
{
if (!PyObject_CallMethod(sink,"set_head_revision", "s", revision)) {
delstr(revision);
if (PyErr_ExceptionMatches(StopParser))
return 1;
else
throw PythonException();
}
delstr(revision);
return 0;
};
int set_principal_branch(char *branch_name)
{
if (!PyObject_CallMethod(sink,"set_principal_branch", "s", branch_name)) {
delstr(branch_name);
if (PyErr_ExceptionMatches(StopParser))
return 1;
else
throw PythonException();
}
delstr(branch_name);
return 0;
};
int define_tag(char *name, char *revision)
{
if (!PyObject_CallMethod(sink,"define_tag", "ss", name,revision)) {
delstr(name);
if (PyErr_ExceptionMatches(StopParser))
return 1;
else
throw PythonException();
}
delstr(name);
return 0;
};
int set_comment(char *comment)
{
if (!PyObject_CallMethod(sink,"set_comment", "s", comment)) {
delstr(comment);
if (PyErr_ExceptionMatches(StopParser))
return 1;
else
throw PythonException();
}
delstr(comment);
return 0;
};
int set_description(char *description)
{
if (!PyObject_CallMethod(sink,"set_description", "s", description)) {
delstr(description);
if (PyErr_ExceptionMatches(StopParser))
return 1;
else
throw PythonException();
}
delstr(description);
return 0;
};
int define_revision(char *revision, long timestamp, char *author, char *state, Branche *branches, char *next)
{
PyObject *pbranchs=PyList_New(0);
Py_INCREF(pbranchs);
Branche *move=branches;
while (move!=NULL) {
PyList_Append(pbranchs, PyString_FromString(move->name) );
move=move->next;
}
if (!PyObject_CallMethod(sink,"define_revision", "slssOs",revision,timestamp,author,state,pbranchs,next))
{
Py_DECREF(pbranchs);
delstr(revision);
delstr(author);
delstr(state);
if (branches!=NULL) delete branches;delstr(next);
if (PyErr_ExceptionMatches(StopParser))
return 1;
else
throw PythonException();
}
Py_DECREF(pbranchs);
delstr(revision);
delstr(author);
delstr(state);
if (branches!=NULL) delete branches;delstr(next);
return 0;
};
int set_revision_info(char *revision, char *log, char *text)
{
if (!PyObject_CallMethod(sink,"set_revision_info", "sss", revision,log,text))
{
delstr(revision);
delstr(log);
delstr(text);
if (PyErr_ExceptionMatches(StopParser))
return 1;
else
throw PythonException();
}
delstr(revision);
delstr(log);
delstr(text);
return 0;
};
int tree_completed()
{
if (!PyObject_CallMethod(sink,"tree_completed", NULL))
{
if (PyErr_ExceptionMatches(StopParser))
return 1;
else
throw PythonException();
}
return 0;
};
int parse_completed()
{
if (!PyObject_CallMethod(sink,"parse_completed", NULL))
{
if (PyErr_ExceptionMatches(StopParser))
return 1;
else
throw PythonException();
}
return 0;
};
};
static PyObject * tparse( PyObject *self, PyObject *args)
{
char *filename;
ifstream *input;
PyObject *file=NULL;
PyObject *hsink;
if (PyArg_ParseTuple(args, "sO!", &filename,&PyInstance_Type,&hsink))
input=new ifstream(filename,ios::nocreate|ios::in);
else if (PyArg_ParseTuple(args, "O!O!",&PyFile_Type ,&file,&PyInstance_Type, &hsink))
input=(ifstream *) new stdiobuf(PyFile_AsFile(file));
else
return NULL;
Py_INCREF(hsink);
Py_XINCREF(file);
try {
tparseParser *tp=new tparseParser(input,new PythonSink(hsink) );
}
catch (tparseException e)
{
PyErr_SetString(PyExc_Exception,e.getvalue());
Py_DECREF(hsink);
Py_XDECREF(file);
return NULL;
}
catch (PythonException e)
{
Py_DECREF(hsink);
Py_XDECREF(file);
return NULL;
}
Py_DECREF(hsink);
Py_XDECREF(file);
Py_INCREF(Py_None);
return Py_None;
}

49
tparse/tparsemodule.h Normal file
View File

@ -0,0 +1,49 @@
/*
# Copyright (C) 2000-2002 The ViewCVS Group. All Rights Reserved.
# This file has been rewritten in C++ from the rcsparse.py file by
# Lucas Bruand <lucas.bruand@ecl2002.ec-lyon.fr>
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE.html file which can be found at the top level of the ViewCVS
# distribution or at http://viewcvs.sourceforge.net/license-1.html.
#
# Contact information:
# Greg Stein, PO Box 760, Palo Alto, CA, 94302
# gstein@lyra.org, http://viewcvs.sourceforge.net/
#
# -----------------------------------------------------------------------
#
# This software is being maintained as part of the ViewCVS project.
# Information is available at:
# http://viewcvs.sourceforge.net/
#
# This file was originally based on portions of the blame.py script by
# Curt Hagenlocher.
#
# -----------------------------------------------------------------------
#
*/
static char *__doc__= "\
this python extension module is a binding to the tparse library.\n\
tparse is a C++ library that offers an API to a performance-oriented RCSFILE parser.\n\
It does little syntax checking.\n\
\n\
Version: $Id$\n";
static char *StopParser__doc__ ="Stop parser exception: to be raised from the sink to abort parsing.";
static PyObject *StopParser;
static char *tparse__doc__=" Main function: parse a file and send the result to the sink \n\
Two ways of invoking this function from python:\n\
* tparse.parse(filename, sink) \n\
where filename is a string and sink is an instance of the class Sink \n\
defined in the sink.py module.\n\
* tparse.parse(file, sink)\n\
where file is a python file and sink is an instance of the class Sink\n\
defined in the sink.py module.\n";
static PyObject * tparse( PyObject *self, PyObject *args);
/* Init function for this module:
Invoked when the module is imported from Python
Load the stopparser expression into the tparser's namespace */
extern "C" void inittparse();