399 lines
8.4 KiB
C++
399 lines
8.4 KiB
C++
/*
|
|
# Copyright (C) 1999-2013 The ViewCVS Group. All Rights Reserved.
|
|
#
|
|
# By using this file, you agree to the terms and conditions set forth in
|
|
# the LICENSE.html file which can be found at the top level of the ViewVC
|
|
# distribution or at http://viewvc.org/license-1.html.
|
|
#
|
|
# For more information, visit http://viewvc.org/
|
|
#
|
|
# -----------------------------------------------------------------------
|
|
#
|
|
# This file has been rewritten in C++ from the rcsparse.py file by
|
|
# Lucas Bruand <lucas.bruand@ecl2002.ec-lyon.fr>
|
|
#
|
|
# This file was originally based on portions of the blame.py script by
|
|
# Curt Hagenlocher.
|
|
#
|
|
# -----------------------------------------------------------------------
|
|
*/
|
|
|
|
/*
|
|
This C++ library offers an API to a performance oriented RCSFILE parser.
|
|
It does little syntax checking.
|
|
|
|
Version: $Id$
|
|
*/
|
|
|
|
#include "tparse.h"
|
|
|
|
#ifndef __USE_XOPEN
|
|
#define __USE_XOPEN
|
|
#endif
|
|
#include <ctime> /* for strptime */
|
|
|
|
|
|
using namespace std;
|
|
|
|
#define Whitespace(c) (c == ' ' || c == '\t' || c == '\014' || c == '\n' || \
|
|
c == '\r')
|
|
#define Token_term(c) (c == ' ' || c == '\t' || c == '\014' || c == '\n' || \
|
|
c == '\r' || c == ';' || c == ':')
|
|
#define isdigit(c) ((c-'0')<10)
|
|
|
|
|
|
|
|
void rcstoken::init(const char *mydata, size_t mylen)
|
|
{
|
|
size = DEFAULT_TOKEN_SIZE;
|
|
length = 0;
|
|
delta = DEFAULT_TOKEN_DELTA;
|
|
data = NULL;
|
|
if (mydata && mylen)
|
|
append(mydata, mylen);
|
|
};
|
|
|
|
void rcstoken::append(const char *b, size_t b_len)
|
|
{
|
|
if (b || b_len)
|
|
{
|
|
grow(length + b_len + 1);
|
|
memcpy(&data[length], b, b_len);
|
|
length += b_len;
|
|
data[length] = 0;
|
|
}
|
|
};
|
|
|
|
void rcstoken::grow(size_t new_size)
|
|
{
|
|
if ((! data) || (new_size > size))
|
|
{
|
|
while (new_size > size)
|
|
size += delta;
|
|
|
|
data = (char*) realloc(data, size);
|
|
};
|
|
};
|
|
|
|
rcstoken *rcstoken::copy_begin_end(size_t begin, size_t end)
|
|
{
|
|
return new rcstoken(&data[begin], end - begin);
|
|
};
|
|
|
|
rcstoken *rcstoken::copy_begin_len(size_t begin, size_t len)
|
|
{
|
|
return new rcstoken(&data[begin], len);
|
|
};
|
|
|
|
|
|
/*--------- Tokenparser class -----------*/
|
|
rcstoken *TokenParser::get(int allow_eof)
|
|
{
|
|
auto_ptr<rcstoken> token;
|
|
|
|
if (backget)
|
|
{
|
|
token.reset(backget);
|
|
backget = NULL;
|
|
|
|
return token.release();
|
|
}
|
|
|
|
token.reset(new rcstoken());
|
|
while (1)
|
|
{
|
|
if (idx == buflength)
|
|
{
|
|
input->read(buf, CHUNK_SIZE);
|
|
if ( (buflength = input->gcount()) == 0 )
|
|
{
|
|
if (allow_eof)
|
|
return token.release();
|
|
else
|
|
throw RCSParseError("Unexpected end of file.");
|
|
};
|
|
|
|
idx = 0;
|
|
}
|
|
if (!Whitespace(buf[idx]))
|
|
break;
|
|
idx++;
|
|
}
|
|
|
|
if (buf[idx] == ';' || buf[idx] == ':')
|
|
{
|
|
idx++;
|
|
(*token) = buf[idx];
|
|
return token.release();
|
|
}
|
|
|
|
if (buf[idx] != '@')
|
|
{
|
|
int end = idx + 1;
|
|
|
|
while (1)
|
|
{
|
|
while ( (end < buflength) && !(Token_term(buf[end])) )
|
|
end++;
|
|
token->append(buf + idx, end - idx);
|
|
if (end < buflength)
|
|
{
|
|
idx = end;
|
|
return token.release();
|
|
}
|
|
input->read(buf, CHUNK_SIZE);
|
|
buflength = input->gcount();
|
|
idx = 0;
|
|
end = 0;
|
|
}
|
|
}
|
|
idx++;
|
|
|
|
while (1)
|
|
{
|
|
int i;
|
|
|
|
if (idx == buflength)
|
|
{
|
|
idx = 0;
|
|
input->read(buf, CHUNK_SIZE);
|
|
if ( (buflength = input->gcount()) == 0 )
|
|
throw RCSIllegalCharacter("Unterminated string: @ missing!");
|
|
}
|
|
//i=strchr(buf+idx,'@');
|
|
for (i = idx;i < buflength && (buf[i] != '@');i++)
|
|
;
|
|
if (i == buflength)
|
|
{
|
|
if ((buflength - idx) > 0)
|
|
token->append(buf + idx, buflength - idx);
|
|
idx = buflength;
|
|
continue;
|
|
}
|
|
if ( i == buflength - 1)
|
|
{
|
|
token->append(buf + idx, i - idx);
|
|
idx = 0;
|
|
buf[0] = '@';
|
|
input->read(buf + 1, CHUNK_SIZE - 1);
|
|
if ( (buflength = input->gcount()) == 0 )
|
|
throw RCSIllegalCharacter("Unterminated string: @ missing!");
|
|
buflength++;
|
|
continue;
|
|
}
|
|
if (buf[i + 1] == '@')
|
|
{
|
|
token->append(buf + idx, i - idx + 1);
|
|
idx = i + 2;
|
|
continue;
|
|
}
|
|
if ((i - idx) > 0)
|
|
token->append(buf + idx, i - idx);
|
|
idx = i + 1;
|
|
return token.release();
|
|
}
|
|
};
|
|
|
|
void TokenParser::unget(rcstoken *token)
|
|
{
|
|
if (backget)
|
|
{
|
|
throw RCSParseError("Ungetting a token while already having "
|
|
"an ungetted token.");
|
|
}
|
|
backget = token;
|
|
}
|
|
|
|
/*--------- tparseParser class -----------*/
|
|
void tparseParser::parse_rcs_admin()
|
|
{
|
|
while (1)
|
|
{
|
|
auto_ptr<rcstoken> token(tokenstream->get(FALSE));
|
|
|
|
if (isdigit((*token)[0]))
|
|
{
|
|
tokenstream->unget(token.release());
|
|
return;
|
|
}
|
|
if (*token == "head")
|
|
{
|
|
token.reset(tokenstream->get(FALSE));
|
|
sink->set_head_revision(*token);
|
|
|
|
tokenstream->match(';');
|
|
continue;
|
|
}
|
|
if (*token == "branch")
|
|
{
|
|
token.reset(tokenstream->get(FALSE));
|
|
if (*token != ';')
|
|
{
|
|
sink->set_principal_branch(*token);
|
|
|
|
tokenstream->match(';');
|
|
}
|
|
continue;
|
|
}
|
|
if (*token == "symbols")
|
|
{
|
|
while (1)
|
|
{
|
|
auto_ptr<rcstoken> rev;
|
|
token.reset(tokenstream->get(FALSE));
|
|
if (*token == ';')
|
|
break;
|
|
|
|
tokenstream->match(':');
|
|
rev.reset(tokenstream->get(FALSE));
|
|
sink->define_tag(*token, *rev);
|
|
}
|
|
continue;
|
|
}
|
|
if (*token == "comment")
|
|
{
|
|
token.reset(tokenstream->get(FALSE));
|
|
sink->set_comment((*token));
|
|
|
|
tokenstream->match(';');
|
|
continue;
|
|
}
|
|
if (*token == "locks" ||
|
|
*token == "strict" ||
|
|
*token == "expand" ||
|
|
*token == "access")
|
|
{
|
|
while (1)
|
|
{
|
|
token.reset(tokenstream->get(FALSE));
|
|
if (*token == ';')
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
};
|
|
|
|
void tparseParser::parse_rcs_tree()
|
|
{
|
|
while (1)
|
|
{
|
|
auto_ptr<rcstoken> revision, date, author, hstate, next;
|
|
long timestamp;
|
|
tokenlist branches;
|
|
struct tm tm;
|
|
|
|
revision.reset(tokenstream->get(FALSE));
|
|
if (*revision == "desc")
|
|
{
|
|
tokenstream->unget(revision.release());
|
|
return;
|
|
}
|
|
|
|
// Parse date
|
|
tokenstream->match("date");
|
|
date.reset(tokenstream->get(FALSE));
|
|
tokenstream->match(";");
|
|
|
|
memset ((void *) &tm, 0, sizeof(struct tm));
|
|
if (strptime((*date).data, "%y.%m.%d.%H.%M.%S", &tm) == NULL)
|
|
strptime((*date).data, "%Y.%m.%d.%H.%M.%S", &tm);
|
|
timestamp = mktime(&tm);
|
|
|
|
|
|
tokenstream->match("author");
|
|
author.reset(tokenstream->get(FALSE));
|
|
tokenstream->match(';');
|
|
|
|
tokenstream->match("state");
|
|
hstate.reset(new rcstoken());
|
|
while (1)
|
|
{
|
|
auto_ptr<rcstoken> token;
|
|
token.reset(tokenstream->get(FALSE));
|
|
if (*token == ';')
|
|
break;
|
|
|
|
if ((*hstate).length)
|
|
(*hstate) += ' ';
|
|
(*hstate) += *token;
|
|
}
|
|
|
|
tokenstream->match("branches");
|
|
while (1)
|
|
{
|
|
auto_ptr<rcstoken> token;
|
|
token.reset(tokenstream->get(FALSE));
|
|
if (*token == ';')
|
|
break;
|
|
|
|
branches.push_front((*token));
|
|
}
|
|
|
|
tokenstream->match("next");
|
|
next.reset(tokenstream->get(FALSE));
|
|
if (*next == ';')
|
|
/* generate null token */
|
|
next.reset(new rcstoken());
|
|
else
|
|
tokenstream->match(';');
|
|
|
|
/*
|
|
* there are some files with extra tags in them. for example:
|
|
* owner 640;
|
|
* group 15;
|
|
* permissions 644;
|
|
* hardlinks @configure.in@;
|
|
* this is "newphrase" in RCSFILE(5). we just want to skip over these.
|
|
*/
|
|
while (1)
|
|
{
|
|
auto_ptr<rcstoken> token;
|
|
token.reset(tokenstream->get(FALSE));
|
|
|
|
if ((*token == "desc") || isdigit((*token)[0]) )
|
|
{
|
|
tokenstream->unget(token.release());
|
|
break;
|
|
};
|
|
|
|
while (*token != ";")
|
|
token.reset(tokenstream->get(FALSE));
|
|
}
|
|
|
|
sink->define_revision(*revision, timestamp, *author,
|
|
*hstate, branches, *next);
|
|
}
|
|
return;
|
|
}
|
|
|
|
void tparseParser::parse_rcs_description()
|
|
{
|
|
auto_ptr<rcstoken> token;
|
|
tokenstream->match("desc");
|
|
|
|
token.reset(tokenstream->get(FALSE));
|
|
sink->set_description(*token);
|
|
}
|
|
|
|
void tparseParser::parse_rcs_deltatext()
|
|
{
|
|
auto_ptr<rcstoken> revision, log, text;
|
|
|
|
while (1)
|
|
{
|
|
revision.reset(tokenstream->get(TRUE));
|
|
if ((*revision).null_token())
|
|
break;
|
|
|
|
tokenstream->match("log");
|
|
log.reset(tokenstream->get(FALSE));
|
|
|
|
tokenstream->match("text");
|
|
text.reset(tokenstream->get(FALSE));
|
|
|
|
sink->set_revision_info(*revision, *log, *text);
|
|
}
|
|
return;
|
|
}
|