From a163e8b5b892dfb559e11073f24747dcdd5d04b5 Mon Sep 17 00:00:00 2001 From: lbruand Date: Fri, 25 Jan 2002 17:36:15 +0000 Subject: [PATCH] Initial revision of the C++ RCS parser. VS: ---------------------------------------------------------------------- git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@453 8cb11bc2-c004-0410-86c3-e597b4017df7 --- tparse/CHANGES | 17 +++ tparse/README | 6 + tparse/Setup.py | 12 ++ tparse/sink.py | 52 ++++++++ tparse/testtp.py | 5 + tparse/tparse.cpp | 277 ++++++++++++++++++++++++++++++++++++++++ tparse/tparse.h | 211 ++++++++++++++++++++++++++++++ tparse/tparsemodule.cpp | 231 +++++++++++++++++++++++++++++++++ tparse/tparsemodule.h | 49 +++++++ 9 files changed, 860 insertions(+) create mode 100644 tparse/CHANGES create mode 100644 tparse/README create mode 100644 tparse/Setup.py create mode 100644 tparse/sink.py create mode 100644 tparse/testtp.py create mode 100644 tparse/tparse.cpp create mode 100644 tparse/tparse.h create mode 100644 tparse/tparsemodule.cpp create mode 100644 tparse/tparsemodule.h diff --git a/tparse/CHANGES b/tparse/CHANGES new file mode 100644 index 00000000..ec7e1b53 --- /dev/null +++ b/tparse/CHANGES @@ -0,0 +1,17 @@ +Modif ( 25-Jan-2002) + * renamed module as tparse + * wrote distutils Setup.py + * added inline (__doc__) documentation in python module. + +Modif ( 24-Jan-2002) + * Implementation of the python exceptions in the C++ code. + * Added an exception to stop the parser. + * Fixed bug that added a "@" in the end of string in certain cases. + +Modif ( 21-Jan-2002) + * Extensive testing of the memory leaks + * Started to write the python wrapper. (tparsemodule.cpp & tparsemodule.h) + +Creation ( 20-Jan-2002 ) + * Implementation of the Token parser in C++ ( tparse.cpp & tparse.h) + * Implementation of the parser itself in C++ \ No newline at end of file diff --git a/tparse/README b/tparse/README new file mode 100644 index 00000000..dc06ec1f --- /dev/null +++ b/tparse/README @@ -0,0 +1,6 @@ + TPARSE + + What is tparse ? + ---------------- +TPARSE is a C++ coded RCS file format parser with bindings for the Python scripting language. +It was originally designed after rcsparser.py from Greg Stein and blame.py from Curt Hagenlocher. diff --git a/tparse/Setup.py b/tparse/Setup.py new file mode 100644 index 00000000..1143421d --- /dev/null +++ b/tparse/Setup.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python + +from distutils.core import setup,Extension + +setup(name="tparse", + version="1.0", + description="A quick RCS file format parser", + author="Lucas Bruand", + author_email="lbruand@users.sourceforge.net", + url="http://viewcvs.sourceforge.net", + ext_modules=[Extension("tparse", ["tparsemodule.cpp"],libraries=["stdc++"])] + ) diff --git a/tparse/sink.py b/tparse/sink.py new file mode 100644 index 00000000..24d05ece --- /dev/null +++ b/tparse/sink.py @@ -0,0 +1,52 @@ +import tparse +class Sink: + def set_head_revision(self, revision): + pass + def set_principal_branch(self, branch_name): + pass + def define_tag(self, name, revision): + pass + def set_comment(self, comment): + pass + def set_description(self, description): + pass + def define_revision(self, revision, timestamp, author, state, + branches, next): + pass + def set_revision_info(self, revision, log, text): + pass + def tree_completed(self): + pass + def parse_completed(self): + pass + +class DebugSink(Sink): + def set_head_revision(self, revision): + print 'head:', revision + + def set_principal_branch(self, branch_name): + print 'branch:', branch_name + + def define_tag(self, name, revision): + print 'tag:', name, '=', revision + + def set_comment(self, comment): + print 'comment:', comment + + def set_description(self, description): + print 'description:', description + + def define_revision(self, revision, timestamp, author, state, + branches, next): + print 'revision:', revision + print ' timestamp:', timestamp + print ' author:', author + print ' state:', state + print ' branches:', branches + print ' next:', next + + def set_revision_info(self, revision, log, text): + print 'revision:', revision + print ' log:', log + print ' text:', text[:100], '...' + diff --git a/tparse/testtp.py b/tparse/testtp.py new file mode 100644 index 00000000..0f0346b8 --- /dev/null +++ b/tparse/testtp.py @@ -0,0 +1,5 @@ +# This python script permits to test the behaviour of the tparse module. +import sink +import tparse +import sys +tparse.parse(sys.argv[1],sink.DebugSink()) diff --git a/tparse/tparse.cpp b/tparse/tparse.cpp new file mode 100644 index 00000000..68165657 --- /dev/null +++ b/tparse/tparse.cpp @@ -0,0 +1,277 @@ +/* + # Copyright (C) 2000-2002 The ViewCVS Group. All Rights Reserved. + # This file has been rewritten in C++ from the rcsparse.py file by + # Lucas Bruand + # + # By using this file, you agree to the terms and conditions set forth in + # the LICENSE.html file which can be found at the top level of the ViewCVS + # distribution or at http://viewcvs.sourceforge.net/license-1.html. + # + # Contact information: + # Greg Stein, PO Box 760, Palo Alto, CA, 94302 + # gstein@lyra.org, http://viewcvs.sourceforge.net/ + # + # ----------------------------------------------------------------------- + # + # This software is being maintained as part of the ViewCVS project. + # Information is available at: + # http://viewcvs.sourceforge.net/ + # + # This file was originally based on portions of the blame.py script by + # Curt Hagenlocher. + # + # ----------------------------------------------------------------------- + # + */ + /* + This C++ library offers an API to a performance oriented RCSFILE parser. + It does little syntax checking. + + Version: $Id$ + */ + +#include "tparse.h" +#define __USE_XOPEN +#include + +#define Whitespace(c) (c == ' ' || c == '\t' || c == '\014' || c == '\n' || c=='\r') +#define Token_term(c) (c == ' ' || c == '\t' || c == '\014' || c == '\n' || c=='\r' || c==';') +#define isdigit(c) ( (c-'0')<10) + +/*--------- Tokenparser class -----------*/ +char * TokenParser::get() { + ostrstream ost; + if (backget) { + char *ret;ret=backget; + backget=NULL; + return ret; + } + + while (1) { + if (idx==buflength) { + input->read(buf,CHUNK_SIZE); + if ( (buflength=input->gcount())==0 ) + return NULL; + idx=0; + } + if (!Whitespace(buf[idx])) + break; + idx++; + } + if (buf[idx]==';') { + idx++; + return semicol; + } + + if (buf[idx]!='@') { + int end=idx+1; + while (1) { + while ( (endread(buf,CHUNK_SIZE); + buflength=input->gcount(); + idx=0; + end=0; + } + } + idx++; + while (1) { + int i; + if (idx==buflength) { + idx=0; + input->read(buf,CHUNK_SIZE); + if ( (buflength=input->gcount())==0 ) + throw tparseException(" Unterminated string \"@\" missing!"); + } + //i=strchr(buf+idx,'@'); + for(i=idx;i0) + ost.write(buf+idx,buflength-idx); + idx= buflength; + continue; + } + if ( i==buflength-1) { + ost.write(buf+idx,i-idx+1); + idx=0; + buf[0]='@'; + input->read(buf+1,CHUNK_SIZE-1); + if ( (buflength=input->gcount())==0 ) + throw tparseException("Unterminated string; @ missing"); + buflength++; + continue; + } + if (buf[i+1]=='@') { + ost.write(buf+idx,i-idx+1); + idx=i+2; + continue; + } + if ((i-idx)>0) + ost.write(buf+idx,i-idx); + idx=i+1; + ost.put('\0'); + return ost.str(); + } +}; + +void TokenParser::unget(char *token) { + if (backget) { + throw tparseException(" Error, ungetting a token while already having an ungetted token "); + + } + backget=token; +} + +/*--------- tparseParser class -----------*/ +int tparseParser::parse_rcs_admin() { + while (1) { + char *token =tokenstream->get(); + if (isdigit(token[0])) { + tokenstream->unget(token); + return 0; + } + if (strcmp(token,"head")==0) { + if (sink->set_head_revision(tokenstream->get())) { delstr(token);return 1;} + tokenstream->matchsemicol(); + } + if (strcmp(token,"branch")==0) { + if (sink->set_principal_branch(tokenstream->get())) { delstr(token);return 1;} + tokenstream->matchsemicol(); + } + if (strcmp(token,"symbols")==0) { + while (1) { + char *tag = tokenstream->get(); + char *second; + if (tag==tokenstream->semicol) break; + second=index(tag,':'); + second[0]='\0'; + second++; + if (sink->define_tag(tag,second)) { delstr(token);return 1;} + } + } + if (strcmp(token,"comment")==0) { + if (sink->set_comment(tokenstream->get())) { delstr(token);return 1;} + tokenstream->matchsemicol(); + } + if ((strcmp(token,"locks")==0) || + (strcmp(token,"strict")==0) || + (strcmp(token,"expand")==0) || + (strcmp(token,"access")==0)) { + while (1) { + char *tag=tokenstream->get(); + if (tag==tokenstream->semicol) break; + delstr(tag); + } + } + delstr(token); + } +}; + +int tparseParser::parse_rcs_tree() { + + while (1) { + char *revision; + char *date; + long timestamp; + char *author; + ostrstream *state; + char *hstate; + char *next; + Branche *branches=NULL; + struct tm tm; + revision=tokenstream->get(); + if (strcmp(revision, "desc") ==0) { + tokenstream->unget(revision); + return 0; + } + // Parse date + tokenstream->match("date"); + date = tokenstream->get(); + tokenstream->matchsemicol(); + memset ((void *) &tm, 0, sizeof(struct tm)); + strptime(date, "%Y.%m.%d.%H.%M.%S", &tm); + timestamp=mktime(&tm);delstr(date); + tokenstream->match("author"); + author= tokenstream->get(); + tokenstream->matchsemicol(); + tokenstream->match("state"); + while (1) { + char *token=tokenstream->get(); + if (token==tokenstream->semicol) { + break; + } + state= new ostrstream(); + (*state)<put('\0'); + hstate=state->str(); + delete state; + state=NULL; + tokenstream->match("branches"); + while (1) { + char *token=tokenstream->get(); + if (token==tokenstream->semicol) { + break; + } + if (branches == NULL) + branches=new Branche(token,NULL); + else + branches=new Branche(token,branches->next); + + } + tokenstream->match("next"); + next= tokenstream->get(); + if (next==tokenstream->semicol) next =NULL; + else tokenstream->matchsemicol(); + /** + * there are some files with extra tags in them. for example: + * owner 640; + * group 15; + * permissions 644; + * hardlinks @configure.in@; + * this is "newphrase" in RCSFILE(5). we just want to skip over these. + **/ + + while (1) { + char *token = tokenstream->get(); + if ( (strcmp(token,"desc")==0) || isdigit(token[0]) ) { + tokenstream->unget(token); + break; + }; + delstr(token); + while ( tokenstream->get() !=tokenstream->semicol); + } + if (sink->define_revision(revision,timestamp,author, hstate, branches,next)) return 1; + + } + return 0; + +} +int tparseParser::parse_rcs_description() { + tokenstream->match("desc"); + if (this->sink->set_description(tokenstream->get())) return 1; + return 0; +} +int tparseParser::parse_rcs_deltatext() { + char *revision; + char *log; + char *text; + while (1) { + revision = tokenstream->get(); + if (revision==NULL) + break; + tokenstream->match("log"); + log = tokenstream->get(); + tokenstream->match("text"); + text = tokenstream->get(); + if (sink->set_revision_info(revision,log,text)) return 1; + } + return 0; +} diff --git a/tparse/tparse.h b/tparse/tparse.h new file mode 100644 index 00000000..4b3818e9 --- /dev/null +++ b/tparse/tparse.h @@ -0,0 +1,211 @@ +/* + # Copyright (C) 2000-2002 The ViewCVS Group. All Rights Reserved. + # This file has been rewritten in C++ from the rcsparse.py file by + # Lucas Bruand + # + # By using this file, you agree to the terms and conditions set forth in + # the LICENSE.html file which can be found at the top level of the ViewCVS + # distribution or at http://viewcvs.sourceforge.net/license-1.html. + # + # Contact information: + # Greg Stein, PO Box 760, Palo Alto, CA, 94302 + # gstein@lyra.org, http://viewcvs.sourceforge.net/ + # + # ----------------------------------------------------------------------- + # + # This software is being maintained as part of the ViewCVS project. + # Information is available at: + # http://viewcvs.sourceforge.net/ + # + # This file was originally based on portions of the blame.py script by + # Curt Hagenlocher. + # + # ----------------------------------------------------------------------- + # + */ + +/* + This C++ library offers an API to a performance-oriented RCSFILE parser. + It does little syntax checking. + + Version: $Id$ + */ +#define CHUNK_SIZE 30000 +#ifndef __PARSE_H +#define __PARSE_H +#include +#include +#include +#include +#include +#include +#define delstr(a) if (a!=NULL) {delete [] a;a=NULL;}; + + +/* This class represents a exception that occured during the parsing of a file */ +class tparseException { + + char *value; + public: + tparseException(char *myvalue) { value=myvalue; }; + char *getvalue() { return value; }; +}; + +/* This class is used to stored a list of the branches of a revision */ +class Branche { + public: + char *name; + Branche *next; + Branche(char *myname, Branche *mynext) { + name=myname; + next=mynext; + }; + ~Branche() { + delstr(name); + name=NULL; + if (next!=NULL) delete next; + next=NULL; + }; +}; +/* This class is a handler that receive the event generated by the parser + i.e.: When we reach the head revision tag, etc... */ +class Sink { + public: + Sink() {}; + virtual int set_head_revision(char * revision) { + cout<<" set head revision : "<next; + }; + if (branches!=NULL) delete branches; + cout<gcount()==0); + }; + void matchsemicol() { + char *ptr=get(); + if (ptr!=semicol) throw tparseException(" Incorrect syntax in the RCSFILE parsed!"); + }; + void match(char *token) { + char *ptr; + if (strcmp(ptr=get(),token)!=0) throw tparseException(" Incorrect syntax in the RCSFILE parsed!"); + delstr( ptr); + }; + + TokenParser(istream *myinput) { + input=myinput; + backget=NULL; + idx=0;semicol=";"; + input->read(buf,CHUNK_SIZE); + if ( (buflength=input->gcount())==0 ) + throw tparseException("Non-existing file or empty file"); + }; + + + ~TokenParser() { + if (input!=NULL) { delete input;input=NULL; }; + }; +}; + +/* this is the class that does the actual job: + by reading each part of the file and thus generate events to a sink event-handler*/ +class tparseParser { + private: + TokenParser *tokenstream; + Sink *sink; + int parse_rcs_admin(); + int parse_rcs_tree(); + int parse_rcs_description(); + int parse_rcs_deltatext(); + public: + tparseParser(ifstream *myinput,Sink* mysink) { + sink=mysink; + tokenstream= new TokenParser(myinput); + + if (parse_rcs_admin()) return; + if (parse_rcs_tree()) return; + + // many sinks want to know when the tree has been completed so they can + // do some work to prep for the arrival of the deltatext + if (sink->tree_completed()) return; + + if (parse_rcs_description()) return; + if (parse_rcs_deltatext()) return; + + // easiest for us to tell the sink it is done, rather than worry about + // higher level software doing it. + if (sink->parse_completed()) return; + } + ~tparseParser() { + delete tokenstream; + delete sink; + } +}; + +#endif diff --git a/tparse/tparsemodule.cpp b/tparse/tparsemodule.cpp new file mode 100644 index 00000000..781b3ba3 --- /dev/null +++ b/tparse/tparsemodule.cpp @@ -0,0 +1,231 @@ +/* + # Copyright (C) 2000-2002 The ViewCVS Group. All Rights Reserved. + # This file has been rewritten in C++ from the rcsparse.py file by + # Lucas Bruand + # + # By using this file, you agree to the terms and conditions set forth in + # the LICENSE.html file which can be found at the top level of the ViewCVS + # distribution or at http://viewcvs.sourceforge.net/license-1.html. + # + # Contact information: + # Greg Stein, PO Box 760, Palo Alto, CA, 94302 + # gstein@lyra.org, http://viewcvs.sourceforge.net/ + # + # ----------------------------------------------------------------------- + # + # This software is being maintained as part of the ViewCVS project. + # Information is available at: + # http://viewcvs.sourceforge.net/ + # + # This file was originally based on portions of the blame.py script by + # Curt Hagenlocher. + # + # ----------------------------------------------------------------------- + # + */ + /* + this python extension module is a binding to the tparse library. + tparse is a C++ library that offers an API to a performance-oriented RCSFILE parser. + It does little syntax checking. + + Version: $Id$ + */ +#include +#include +#include "tparsemodule.h" +#include "tparse.cpp" + +static PyMethodDef tparseMethods[] = { + {"parse", tparse, METH_VARARGS, tparse__doc__}, + {NULL, NULL} /* Sentinel */ +}; + +void inittparse() +{ + PyObject *m, *d; + m= Py_InitModule3("tparse", tparseMethods,__doc__); + d = PyModule_GetDict(m); + StopParser = PyErr_NewException("tparse.stopparser", NULL, NULL); + PyObject_SetAttrString(StopParser,"__doc__",PyString_FromString(StopParser__doc__)); + PyDict_SetItemString(d, "stopparser", StopParser); +} + +class PythonException { + public: + PythonException() {}; +}; + +class PythonSink : public Sink { + public: + PyObject *sink; + PythonSink(PyObject *mysink) + { sink=mysink;}; + int set_head_revision(char * revision) + { + if (!PyObject_CallMethod(sink,"set_head_revision", "s", revision)) { + delstr(revision); + if (PyErr_ExceptionMatches(StopParser)) + return 1; + else + throw PythonException(); + } + delstr(revision); + return 0; + }; + int set_principal_branch(char *branch_name) + { + if (!PyObject_CallMethod(sink,"set_principal_branch", "s", branch_name)) { + delstr(branch_name); + if (PyErr_ExceptionMatches(StopParser)) + return 1; + else + throw PythonException(); + } + delstr(branch_name); + return 0; + }; + int define_tag(char *name, char *revision) + { + if (!PyObject_CallMethod(sink,"define_tag", "ss", name,revision)) { + delstr(name); + if (PyErr_ExceptionMatches(StopParser)) + return 1; + else + throw PythonException(); + } + delstr(name); + return 0; + }; + int set_comment(char *comment) + { + if (!PyObject_CallMethod(sink,"set_comment", "s", comment)) { + delstr(comment); + if (PyErr_ExceptionMatches(StopParser)) + return 1; + else + throw PythonException(); + } + delstr(comment); + return 0; + }; + int set_description(char *description) + { + if (!PyObject_CallMethod(sink,"set_description", "s", description)) { + delstr(description); + if (PyErr_ExceptionMatches(StopParser)) + return 1; + else + throw PythonException(); + } + delstr(description); + return 0; + }; + int define_revision(char *revision, long timestamp, char *author, char *state, Branche *branches, char *next) + { + PyObject *pbranchs=PyList_New(0); + Py_INCREF(pbranchs); + Branche *move=branches; + while (move!=NULL) { + PyList_Append(pbranchs, PyString_FromString(move->name) ); + move=move->next; + } + + if (!PyObject_CallMethod(sink,"define_revision", "slssOs",revision,timestamp,author,state,pbranchs,next)) + { + Py_DECREF(pbranchs); + delstr(revision); + delstr(author); + delstr(state); + if (branches!=NULL) delete branches;delstr(next); + if (PyErr_ExceptionMatches(StopParser)) + return 1; + else + throw PythonException(); + } + Py_DECREF(pbranchs); + delstr(revision); + delstr(author); + delstr(state); + if (branches!=NULL) delete branches;delstr(next); + return 0; + }; + int set_revision_info(char *revision, char *log, char *text) + { + if (!PyObject_CallMethod(sink,"set_revision_info", "sss", revision,log,text)) + { + delstr(revision); + delstr(log); + delstr(text); + if (PyErr_ExceptionMatches(StopParser)) + return 1; + else + throw PythonException(); + } + delstr(revision); + delstr(log); + delstr(text); + return 0; + }; + int tree_completed() + { + if (!PyObject_CallMethod(sink,"tree_completed", NULL)) + { + if (PyErr_ExceptionMatches(StopParser)) + return 1; + else + throw PythonException(); + } + return 0; + }; + int parse_completed() + { + if (!PyObject_CallMethod(sink,"parse_completed", NULL)) + { + if (PyErr_ExceptionMatches(StopParser)) + return 1; + else + throw PythonException(); + } + return 0; + + }; +}; + + +static PyObject * tparse( PyObject *self, PyObject *args) +{ + char *filename; + ifstream *input; + PyObject *file=NULL; + PyObject *hsink; + + if (PyArg_ParseTuple(args, "sO!", &filename,&PyInstance_Type,&hsink)) + input=new ifstream(filename,ios::nocreate|ios::in); + else if (PyArg_ParseTuple(args, "O!O!",&PyFile_Type ,&file,&PyInstance_Type, &hsink)) + input=(ifstream *) new stdiobuf(PyFile_AsFile(file)); + else + return NULL; + Py_INCREF(hsink); + Py_XINCREF(file); + try { + tparseParser *tp=new tparseParser(input,new PythonSink(hsink) ); + } + catch (tparseException e) + { + PyErr_SetString(PyExc_Exception,e.getvalue()); + Py_DECREF(hsink); + Py_XDECREF(file); + return NULL; + } + catch (PythonException e) + { + Py_DECREF(hsink); + Py_XDECREF(file); + return NULL; + } + Py_DECREF(hsink); + Py_XDECREF(file); + Py_INCREF(Py_None); + return Py_None; + +} diff --git a/tparse/tparsemodule.h b/tparse/tparsemodule.h new file mode 100644 index 00000000..e995009e --- /dev/null +++ b/tparse/tparsemodule.h @@ -0,0 +1,49 @@ +/* + # Copyright (C) 2000-2002 The ViewCVS Group. All Rights Reserved. + # This file has been rewritten in C++ from the rcsparse.py file by + # Lucas Bruand + # + # By using this file, you agree to the terms and conditions set forth in + # the LICENSE.html file which can be found at the top level of the ViewCVS + # distribution or at http://viewcvs.sourceforge.net/license-1.html. + # + # Contact information: + # Greg Stein, PO Box 760, Palo Alto, CA, 94302 + # gstein@lyra.org, http://viewcvs.sourceforge.net/ + # + # ----------------------------------------------------------------------- + # + # This software is being maintained as part of the ViewCVS project. + # Information is available at: + # http://viewcvs.sourceforge.net/ + # + # This file was originally based on portions of the blame.py script by + # Curt Hagenlocher. + # + # ----------------------------------------------------------------------- + # + */ +static char *__doc__= "\ +this python extension module is a binding to the tparse library.\n\ +tparse is a C++ library that offers an API to a performance-oriented RCSFILE parser.\n\ +It does little syntax checking.\n\ +\n\ +Version: $Id$\n"; + +static char *StopParser__doc__ ="Stop parser exception: to be raised from the sink to abort parsing."; +static PyObject *StopParser; + +static char *tparse__doc__=" Main function: parse a file and send the result to the sink \n\ +Two ways of invoking this function from python:\n\ + * tparse.parse(filename, sink) \n\ + where filename is a string and sink is an instance of the class Sink \n\ + defined in the sink.py module.\n\ + * tparse.parse(file, sink)\n\ + where file is a python file and sink is an instance of the class Sink\n\ + defined in the sink.py module.\n"; +static PyObject * tparse( PyObject *self, PyObject *args); + + /* Init function for this module: + Invoked when the module is imported from Python + Load the stopparser expression into the tparser's namespace */ +extern "C" void inittparse();