viewvc-4intranet/misc/tparse/tparse.cpp

/*
# Copyright (C) 1999-2013 The ViewCVS Group. All Rights Reserved.
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE.html file which can be found at the top level of the ViewVC
# distribution or at http://viewvc.org/license-1.html.
#
# For more information, visit http://viewvc.org/
#
# -----------------------------------------------------------------------
#
# This file has been rewritten in C++ from the rcsparse.py file by
# Lucas Bruand <lucas.bruand@ecl2002.ec-lyon.fr>
#
# This file was originally based on portions of the blame.py script by
# Curt Hagenlocher.
#
# -----------------------------------------------------------------------
*/

/*
   This C++ library offers an API to a performance oriented RCSFILE parser.
   It does little syntax checking.

   Version: $Id$
*/

#include "tparse.h"

#ifndef __USE_XOPEN
#define __USE_XOPEN
#endif
#include <ctime>   /* for strptime */


using namespace std;

#define Whitespace(c) (c == ' ' || c == '\t' || c == '\014' || c == '\n' || \
                       c == '\r')
#define Token_term(c) (c == ' ' || c == '\t' || c == '\014' || c == '\n' || \
                       c == '\r' || c == ';' || c == ':')
#define isdigit(c) ((c-'0')<10)


void rcstoken::init(const char *mydata, size_t mylen)
{
  size = DEFAULT_TOKEN_SIZE;
  length = 0;
  delta = DEFAULT_TOKEN_DELTA;
  data = NULL;
  if (mydata && mylen)
    append(mydata, mylen);
};

void rcstoken::append(const char *b, size_t b_len)
{
  if (b || b_len)
    {
      grow(length + b_len + 1);
      memcpy(&data[length], b, b_len);
      length += b_len;
      data[length] = 0;
    }
};

void rcstoken::grow(size_t new_size)
{
  if ((! data) || (new_size > size))
    {
      while (new_size > size)
        size += delta;

      data = (char*) realloc(data, size);
    };
};

rcstoken *rcstoken::copy_begin_end(size_t begin, size_t end)
{
  return new rcstoken(&data[begin], end - begin);
};

rcstoken *rcstoken::copy_begin_len(size_t begin, size_t len)
{
  return new rcstoken(&data[begin], len);
};


/*--------- Tokenparser class -----------*/
rcstoken *TokenParser::get(int allow_eof)
{
  auto_ptr<rcstoken> token;

  if (backget)
  {
    token.reset(backget);
    backget = NULL;

    return token.release();
  }

  token.reset(new rcstoken());
  while (1)
  {
    if (idx == buflength)
    {
      input->read(buf, CHUNK_SIZE);
      if ( (buflength = input->gcount()) == 0 )
      {
        if (allow_eof)
          return token.release();
        else
          throw RCSParseError("Unexpected end of file.");
      };

      idx = 0;
    }
    if (!Whitespace(buf[idx]))
      break;
    idx++;
  }

  if (buf[idx] == ';' || buf[idx] == ':')
  {
    idx++;
    (*token) = buf[idx];
    return token.release();
  }

  if (buf[idx] != '@')
  {
    int end = idx + 1;

    while (1)
    {
      while ( (end < buflength) && !(Token_term(buf[end])) )
        end++;
      token->append(buf + idx, end - idx);
      if (end < buflength)
      {
        idx = end;
        return token.release();
      }
      input->read(buf, CHUNK_SIZE);
      buflength = input->gcount();
      idx = 0;
      end = 0;
    }
  }
  idx++;

  while (1)
  {
    int i;

    if (idx == buflength)
    {
      idx = 0;
      input->read(buf, CHUNK_SIZE);
      if ( (buflength = input->gcount()) == 0 )
        throw RCSIllegalCharacter("Unterminated string: @ missing!");
    }
    //i=strchr(buf+idx,'@');
    for (i = idx;i < buflength && (buf[i] != '@');i++)
      ;
    if (i == buflength)
    {
      if ((buflength - idx) > 0)
        token->append(buf + idx, buflength - idx);
      idx = buflength;
      continue;
    }
    if ( i == buflength - 1)
    {
      token->append(buf + idx, i - idx);
      idx = 0;
      buf[0] = '@';
      input->read(buf + 1, CHUNK_SIZE - 1);
      if ( (buflength = input->gcount()) == 0 )
        throw RCSIllegalCharacter("Unterminated string: @ missing!");
      buflength++;
      continue;
    }
    if (buf[i + 1] == '@')
    {
      token->append(buf + idx, i - idx + 1);
      idx = i + 2;
      continue;
    }
    if ((i - idx) > 0)
      token->append(buf + idx, i - idx);
    idx = i + 1;
    return token.release();
  }
};

void TokenParser::unget(rcstoken *token)
{
  if (backget)
  {
    throw RCSParseError("Ungetting a token while already having "
                        "an ungetted token.");
  }
  backget = token;
}

/*--------- tparseParser class -----------*/
void tparseParser::parse_rcs_admin()
{
  while (1)
  {
    auto_ptr<rcstoken> token(tokenstream->get(FALSE));

    if (isdigit((*token)[0]))
    {
      tokenstream->unget(token.release());
      return;
    }
    if (*token == "head")
    {
      token.reset(tokenstream->get(FALSE));
      sink->set_head_revision(*token);

      tokenstream->match(';');
      continue;
    }
    if (*token == "branch")
    {
      token.reset(tokenstream->get(FALSE));
      if (*token != ';')
      {
        sink->set_principal_branch(*token);

        tokenstream->match(';');
      }
      continue;
    }
    if (*token == "symbols")
    {
      while (1)
      {
        auto_ptr<rcstoken> rev;
        token.reset(tokenstream->get(FALSE));
        if (*token == ';')
          break;

        tokenstream->match(':');
        rev.reset(tokenstream->get(FALSE));
        sink->define_tag(*token, *rev);
      }
      continue;
    }
    if (*token == "comment")
    {
      token.reset(tokenstream->get(FALSE));
      sink->set_comment((*token));

      tokenstream->match(';');
      continue;
    }
    if (*token == "locks" ||
        *token == "strict" ||
        *token == "expand" ||
        *token == "access")
    {
      while (1)
      {
        token.reset(tokenstream->get(FALSE));
        if (*token == ';')
          break;
      }
      continue;
    }
  }
};

void tparseParser::parse_rcs_tree()
{
  while (1)
  {
    auto_ptr<rcstoken> revision, date, author, hstate, next;
    long timestamp;
    tokenlist branches;
    struct tm tm;

    revision.reset(tokenstream->get(FALSE));
    if (*revision == "desc")
    {
      tokenstream->unget(revision.release());
      return;
    }

    // Parse date
    tokenstream->match("date");
    date.reset(tokenstream->get(FALSE));
    tokenstream->match(";");

    memset ((void *) &tm, 0, sizeof(struct tm));
    if (strptime((*date).data, "%y.%m.%d.%H.%M.%S", &tm) == NULL)
      strptime((*date).data, "%Y.%m.%d.%H.%M.%S", &tm);
    timestamp = mktime(&tm);


    tokenstream->match("author");
    author.reset(tokenstream->get(FALSE));
    tokenstream->match(';');

    tokenstream->match("state");
    hstate.reset(new rcstoken());
    while (1)
    {
      auto_ptr<rcstoken> token;
      token.reset(tokenstream->get(FALSE));
      if (*token == ';')
        break;

      if ((*hstate).length)
        (*hstate) += ' ';
      (*hstate) += *token;
    }

    tokenstream->match("branches");
    while (1)
      {
        auto_ptr<rcstoken> token;
        token.reset(tokenstream->get(FALSE));
        if (*token == ';')
          break;

        branches.push_front((*token));
      }

    tokenstream->match("next");
    next.reset(tokenstream->get(FALSE));
    if (*next == ';')
      /* generate null token */
      next.reset(new rcstoken());
    else
      tokenstream->match(';');

    /*
     * 	there are some files with extra tags in them. for example:
     *	owner	640;
     *	group	15;
     *	permissions	644;
     *	hardlinks	@configure.in@;
     *	this is "newphrase" in RCSFILE(5). we just want to skip over these.
     */
    while (1)
      {
        auto_ptr<rcstoken> token;
        token.reset(tokenstream->get(FALSE));

        if ((*token == "desc") || isdigit((*token)[0]) )
          {
            tokenstream->unget(token.release());
            break;
          };

        while (*token != ";")
          token.reset(tokenstream->get(FALSE));
      }

    sink->define_revision(*revision, timestamp, *author,
                          *hstate, branches, *next);
  }
  return;
}

void tparseParser::parse_rcs_description()
{
  auto_ptr<rcstoken> token;
  tokenstream->match("desc");

  token.reset(tokenstream->get(FALSE));
  sink->set_description(*token);
}

void tparseParser::parse_rcs_deltatext()
{
  auto_ptr<rcstoken> revision, log, text;

  while (1)
  {
    revision.reset(tokenstream->get(TRUE));
    if ((*revision).null_token())
      break;

    tokenstream->match("log");
    log.reset(tokenstream->get(FALSE));

    tokenstream->match("text");
    text.reset(tokenstream->get(FALSE));

    sink->set_revision_info(*revision, *log, *text);
  }
  return;
}