viewvc-4intranet/tests/timelog.py


import time
import string
import profile

import rcsparse
import viewcvs

def lines_changed(delta):
  idx = 0
  added = deleted = 0
  while idx < len(delta):
    op = delta[idx]
    i = string.find(delta, ' ', idx + 1)
    j = string.find(delta, '\n', i + 1)
    line = int(delta[idx+1:i])
    count = int(delta[i+1:j])
    idx = j + 1
    if op == 'd':
      deleted = deleted + count
    else: # 'a' for adding text
      added = added + count
      # skip new text
      while count > 0:
        nl = string.find(delta, '\n', idx)
        assert nl > 0, 'missing a newline in the delta in the RCS file'
        idx = nl + 1
        count = count - 1
  return added, deleted

class FetchSink(rcsparse.Sink):
  def __init__(self, which_rev=None):
    self.head = self.branch = ''
    self.tags = { }
    self.meta = { }
    self.revs = [ ]
    self.base = { }
    self.entries = { }
    self.which = which_rev

  def set_head_revision(self, revision):
    self.head = revision

  def set_principal_branch(self, branch_name):
    self.branch = branch_name

  def define_tag(self, name, revision):
    self.tags[name] = revision

  def define_revision(self, revision, timestamp, author, state,
                      branches, next):
    self.meta[revision] = (timestamp, author, state)
    self.base[next] = revision
    for b in branches:
      self.base[b] = revision

  def set_revision_info(self, revision, log, text):
    timestamp, author, state = self.meta[revision]
    entry = viewcvs.LogEntry(revision, int(timestamp) - time.timezone, author,
                             state, None, log)

    # .revs is "order seen" and .entries is for random access
    self.revs.append(entry)
    self.entries[revision] = entry

    if revision != self.head:
      added, deleted = lines_changed(text)
      if string.count(revision, '.') == 1:
        # on the trunk. reverse delta.
        changed = '+%d -%d' % (deleted, added)
        self.entries[self.base[revision]].changed = changed
      else:
        # on a branch. forward delta.
        changed = '+%d -%d' % (added, deleted)
        self.entries[revision].changed = changed

  def parse_completed(self):
    if self.which:
      self.revs = [ self.entries[self.which] ]

def fetch_log2(full_name, which_rev=None):
  sink = FetchSink(which_rev)
  rcsparse.Parser().parse(open(full_name), sink)
  return sink.head, sink.branch, sink.tags, sink.revs

def compare_fetch(full_name, which_rev=None):
  # d1 and d2 are:
  #   ( HEAD revision, branch name, TAGS { name : revision }, [ LogEntry ] )
  d1 = viewcvs.fetch_log(full_name, which_rev)
  d2 = fetch_log2(full_name, which_rev)
  if d1[:3] != d2[:3]:
    print 'd1:', d1[:3]
    print 'd2:', d2[:3]
    return
  if len(d1[3]) != len(d2[3]):
    print 'len(d1[3])=%d  len(d2[3])=%d' % (len(d1[3]), len(d2[3]))
    return
  def sort_func(e, f):
    return cmp(e.rev, f.rev)
  d1[3].sort(sort_func)
  d2[3].sort(sort_func)
  import pprint
  for i in range(len(d1[3])):
    if vars(d1[3][i]) != vars(d2[3][i]):
      pprint.pprint((i, vars(d1[3][i]), vars(d2[3][i])))

def compare_many(files):
  for file in files:
    print file, '...'
    compare_fetch(file)

def time_stream(stream_class, filename, n=10):
  d1 = d2 = d3 = d4 = 0
  t = time.time()
  for i in range(n):
    ts = stream_class(open(filename))
    while ts.get() is not None:
      pass
  t = time.time() - t
  print t/n

def time_fetch(full_name, which_rev=None, n=1):
  times1 = [ None ] * n
  times2 = [ None ] * n
  for i in range(n):
    t = time.time()
    viewcvs.fetch_log(full_name, which_rev)
    times1[i] = time.time() - t
  for i in range(n):
    t = time.time()
    fetch_log2(full_name, which_rev)
    times2[i] = time.time() - t
  times1.sort()
  times2.sort()
  i1 = int(n*.05)
  i2 = int(n*.95)+1
  times1 = times1[i1:i2]
  times2 = times2[i1:i2]
  t1 = reduce(lambda x,y: x+y, times1, 0) / len(times1)
  t2 = reduce(lambda x,y: x+y, times2, 0) / len(times2)
  print "t1=%.4f (%.4f .. %.4f)    t2=%.4f (%.4f .. %.4f)" % \
        (t1, times1[0], times1[-1], t2, times2[0], times2[-1])

def profile_stream(stream_class, filename, n=20):
  p = profile.Profile()
  def many_calls(filename, n):
    for i in xrange(n):
      ts = stream_class(open(filename))
      while ts.get() is not None:
        pass
  p.runcall(many_calls, filename, n)
  p.print_stats()

def profile_fetch(full_name, which_rev=None, n=10):
  p = profile.Profile()
  def many_calls(full_name, which_rev, n):
    for i in xrange(n):
      fetch_log2(full_name, which_rev)
  p.runcall(many_calls, full_name, which_rev, n)
  p.print_stats()

def varysize(full_name, which_rev=None):
  def one_run(n, *args):
    rcsparse._TokenStream.CHUNK_SIZE = n
    t = time.time()
    for i in xrange(5):
      apply(fetch_log2, args)
    print n, time.time() - t

  #one_run(2020, full_name, which_rev)
  #one_run(4070, full_name, which_rev)
  #one_run(8170, full_name, which_rev)
  #one_run(8192, full_name, which_rev)
  #one_run(16384, full_name, which_rev)
  one_run(32740, full_name, which_rev)
  one_run(65500, full_name, which_rev)
  one_run(100000, full_name, which_rev)
  one_run(200000, full_name, which_rev)
  one_run(500000, full_name, which_rev)
save a little test program for comparing rcsparse vs rlog output parsing git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@184 8cb11bc2-c004-0410-86c3-e597b4017df7 2001-05-13 04:16:42 +04:00
			`import time`
			`import string`
Speed up the log extraction by avoiding regular expressions (there are simple equivalents which are faster). Add a test function for profiling. git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@185 8cb11bc2-c004-0410-86c3-e597b4017df7 2001-05-13 06:50:24 +04:00			`import profile`
save a little test program for comparing rcsparse vs rlog output parsing git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@184 8cb11bc2-c004-0410-86c3-e597b4017df7 2001-05-13 04:16:42 +04:00
			`import rcsparse`
			`import viewcvs`

			`def lines_changed(delta):`
			`idx = 0`
			`added = deleted = 0`
			`while idx < len(delta):`
			`op = delta[idx]`
More performance tweaks. git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@187 8cb11bc2-c004-0410-86c3-e597b4017df7 2001-05-14 09:53:35 +04:00			`i = string.find(delta, ' ', idx + 1)`
			`j = string.find(delta, '\n', i + 1)`
Speed up the log extraction by avoiding regular expressions (there are simple equivalents which are faster). Add a test function for profiling. git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@185 8cb11bc2-c004-0410-86c3-e597b4017df7 2001-05-13 06:50:24 +04:00			`line = int(delta[idx+1:i])`
			`count = int(delta[i+1:j])`
			`idx = j + 1`
save a little test program for comparing rcsparse vs rlog output parsing git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@184 8cb11bc2-c004-0410-86c3-e597b4017df7 2001-05-13 04:16:42 +04:00			`if op == 'd':`
			`deleted = deleted + count`
			`else: # 'a' for adding text`
			`added = added + count`
			`# skip new text`
			`while count > 0:`
			`nl = string.find(delta, '\n', idx)`
			`assert nl > 0, 'missing a newline in the delta in the RCS file'`
			`idx = nl + 1`
			`count = count - 1`
			`return added, deleted`

			`class FetchSink(rcsparse.Sink):`
			`def __init__(self, which_rev=None):`
			`self.head = self.branch = ''`
			`self.tags = { }`
			`self.meta = { }`
			`self.revs = [ ]`
			`self.base = { }`
			`self.entries = { }`
			`self.which = which_rev`

			`def set_head_revision(self, revision):`
			`self.head = revision`

			`def set_principal_branch(self, branch_name):`
			`self.branch = branch_name`

			`def define_tag(self, name, revision):`
			`self.tags[name] = revision`

			`def define_revision(self, revision, timestamp, author, state,`
			`branches, next):`
			`self.meta[revision] = (timestamp, author, state)`
			`self.base[next] = revision`
			`for b in branches:`
			`self.base[b] = revision`

			`def set_revision_info(self, revision, log, text):`
			`timestamp, author, state = self.meta[revision]`
			`entry = viewcvs.LogEntry(revision, int(timestamp) - time.timezone, author,`
			`state, None, log)`

			`# .revs is "order seen" and .entries is for random access`
			`self.revs.append(entry)`
			`self.entries[revision] = entry`

			`if revision != self.head:`
			`added, deleted = lines_changed(text)`
Speed up the log extraction by avoiding regular expressions (there are simple equivalents which are faster). Add a test function for profiling. git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@185 8cb11bc2-c004-0410-86c3-e597b4017df7 2001-05-13 06:50:24 +04:00			`if string.count(revision, '.') == 1:`
save a little test program for comparing rcsparse vs rlog output parsing git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@184 8cb11bc2-c004-0410-86c3-e597b4017df7 2001-05-13 04:16:42 +04:00			`# on the trunk. reverse delta.`
			`changed = '+%d -%d' % (deleted, added)`
			`self.entries[self.base[revision]].changed = changed`
Speed up the log extraction by avoiding regular expressions (there are simple equivalents which are faster). Add a test function for profiling. git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@185 8cb11bc2-c004-0410-86c3-e597b4017df7 2001-05-13 06:50:24 +04:00			`else:`
			`# on a branch. forward delta.`
			`changed = '+%d -%d' % (added, deleted)`
			`self.entries[revision].changed = changed`
save a little test program for comparing rcsparse vs rlog output parsing git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@184 8cb11bc2-c004-0410-86c3-e597b4017df7 2001-05-13 04:16:42 +04:00
			`def parse_completed(self):`
			`if self.which:`
			`self.revs = [ self.entries[self.which] ]`

			`def fetch_log2(full_name, which_rev=None):`
			`sink = FetchSink(which_rev)`
			`rcsparse.Parser().parse(open(full_name), sink)`
			`return sink.head, sink.branch, sink.tags, sink.revs`

			`def compare_fetch(full_name, which_rev=None):`
Check in the mxTextTools-based parser that I implemented back in November. The timelog.fetch_log2() timing dropped from .53 seconds for my test input to .34 seconds. Overall stream speed dropped from .25 seconds to .10 seconds. This also introduces an mget() method on the token stream so that the parser can get multiple tokens in one shot, rather than needing to call many times. In timelog, I've added a new compare_many() for big verifications, time_stream to time individual token streams, profile_stream for profiling streams, and revised the time_fetch function to do multiple fetches and to trim the exceptional cases off the ends before reporting. git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@456 8cb11bc2-c004-0410-86c3-e597b4017df7 2002-01-29 13:08:46 +03:00			`# d1 and d2 are:`
			`# ( HEAD revision, branch name, TAGS { name : revision }, [ LogEntry ] )`
save a little test program for comparing rcsparse vs rlog output parsing git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@184 8cb11bc2-c004-0410-86c3-e597b4017df7 2001-05-13 04:16:42 +04:00			`d1 = viewcvs.fetch_log(full_name, which_rev)`
			`d2 = fetch_log2(full_name, which_rev)`
			`if d1[:3] != d2[:3]:`
			`print 'd1:', d1[:3]`
			`print 'd2:', d2[:3]`
			`return`
			`if len(d1[3]) != len(d2[3]):`
			`print 'len(d1[3])=%d len(d2[3])=%d' % (len(d1[3]), len(d2[3]))`
			`return`
			`def sort_func(e, f):`
			`return cmp(e.rev, f.rev)`
			`d1[3].sort(sort_func)`
			`d2[3].sort(sort_func)`
			`import pprint`
			`for i in range(len(d1[3])):`
			`if vars(d1[3][i]) != vars(d2[3][i]):`
			`pprint.pprint((i, vars(d1[3][i]), vars(d2[3][i])))`

Check in the mxTextTools-based parser that I implemented back in November. The timelog.fetch_log2() timing dropped from .53 seconds for my test input to .34 seconds. Overall stream speed dropped from .25 seconds to .10 seconds. This also introduces an mget() method on the token stream so that the parser can get multiple tokens in one shot, rather than needing to call many times. In timelog, I've added a new compare_many() for big verifications, time_stream to time individual token streams, profile_stream for profiling streams, and revised the time_fetch function to do multiple fetches and to trim the exceptional cases off the ends before reporting. git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@456 8cb11bc2-c004-0410-86c3-e597b4017df7 2002-01-29 13:08:46 +03:00			`def compare_many(files):`
			`for file in files:`
			`print file, '...'`
			`compare_fetch(file)`

			`def time_stream(stream_class, filename, n=10):`
			`d1 = d2 = d3 = d4 = 0`
save a little test program for comparing rcsparse vs rlog output parsing git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@184 8cb11bc2-c004-0410-86c3-e597b4017df7 2001-05-13 04:16:42 +04:00			`t = time.time()`
Check in the mxTextTools-based parser that I implemented back in November. The timelog.fetch_log2() timing dropped from .53 seconds for my test input to .34 seconds. Overall stream speed dropped from .25 seconds to .10 seconds. This also introduces an mget() method on the token stream so that the parser can get multiple tokens in one shot, rather than needing to call many times. In timelog, I've added a new compare_many() for big verifications, time_stream to time individual token streams, profile_stream for profiling streams, and revised the time_fetch function to do multiple fetches and to trim the exceptional cases off the ends before reporting. git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@456 8cb11bc2-c004-0410-86c3-e597b4017df7 2002-01-29 13:08:46 +03:00			`for i in range(n):`
			`ts = stream_class(open(filename))`
			`while ts.get() is not None:`
			`pass`
			`t = time.time() - t`
			`print t/n`

			`def time_fetch(full_name, which_rev=None, n=1):`
			`times1 = [ None ] * n`
			`times2 = [ None ] * n`
			`for i in range(n):`
			`t = time.time()`
			`viewcvs.fetch_log(full_name, which_rev)`
			`times1[i] = time.time() - t`
			`for i in range(n):`
			`t = time.time()`
			`fetch_log2(full_name, which_rev)`
			`times2[i] = time.time() - t`
			`times1.sort()`
			`times2.sort()`
			`i1 = int(n*.05)`
			`i2 = int(n*.95)+1`
			`times1 = times1[i1:i2]`
			`times2 = times2[i1:i2]`
			`t1 = reduce(lambda x,y: x+y, times1, 0) / len(times1)`
			`t2 = reduce(lambda x,y: x+y, times2, 0) / len(times2)`
			`print "t1=%.4f (%.4f .. %.4f) t2=%.4f (%.4f .. %.4f)" % \`
			`(t1, times1[0], times1[-1], t2, times2[0], times2[-1])`

			`def profile_stream(stream_class, filename, n=20):`
			`p = profile.Profile()`
			`def many_calls(filename, n):`
			`for i in xrange(n):`
			`ts = stream_class(open(filename))`
			`while ts.get() is not None:`
			`pass`
			`p.runcall(many_calls, filename, n)`
			`p.print_stats()`
Speed up the log extraction by avoiding regular expressions (there are simple equivalents which are faster). Add a test function for profiling. git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@185 8cb11bc2-c004-0410-86c3-e597b4017df7 2001-05-13 06:50:24 +04:00
Check in the mxTextTools-based parser that I implemented back in November. The timelog.fetch_log2() timing dropped from .53 seconds for my test input to .34 seconds. Overall stream speed dropped from .25 seconds to .10 seconds. This also introduces an mget() method on the token stream so that the parser can get multiple tokens in one shot, rather than needing to call many times. In timelog, I've added a new compare_many() for big verifications, time_stream to time individual token streams, profile_stream for profiling streams, and revised the time_fetch function to do multiple fetches and to trim the exceptional cases off the ends before reporting. git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@456 8cb11bc2-c004-0410-86c3-e597b4017df7 2002-01-29 13:08:46 +03:00			`def profile_fetch(full_name, which_rev=None, n=10):`
Speed up the log extraction by avoiding regular expressions (there are simple equivalents which are faster). Add a test function for profiling. git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@185 8cb11bc2-c004-0410-86c3-e597b4017df7 2001-05-13 06:50:24 +04:00			`p = profile.Profile()`
Check in the mxTextTools-based parser that I implemented back in November. The timelog.fetch_log2() timing dropped from .53 seconds for my test input to .34 seconds. Overall stream speed dropped from .25 seconds to .10 seconds. This also introduces an mget() method on the token stream so that the parser can get multiple tokens in one shot, rather than needing to call many times. In timelog, I've added a new compare_many() for big verifications, time_stream to time individual token streams, profile_stream for profiling streams, and revised the time_fetch function to do multiple fetches and to trim the exceptional cases off the ends before reporting. git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@456 8cb11bc2-c004-0410-86c3-e597b4017df7 2002-01-29 13:08:46 +03:00			`def many_calls(full_name, which_rev, n):`
			`for i in xrange(n):`
			`fetch_log2(full_name, which_rev)`
			`p.runcall(many_calls, full_name, which_rev, n)`
Speed up the log extraction by avoiding regular expressions (there are simple equivalents which are faster). Add a test function for profiling. git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@185 8cb11bc2-c004-0410-86c3-e597b4017df7 2001-05-13 06:50:24 +04:00			`p.print_stats()`
change web site references from www.lyra.org/viewcvs/ to the new viewcvs.sourceforge.net/ site. git-svn-id: http://viewvc.tigris.org/svn/viewvc/trunk@193 8cb11bc2-c004-0410-86c3-e597b4017df7 2001-05-30 12:49:19 +04:00
			`def varysize(full_name, which_rev=None):`
			`def one_run(n, *args):`
			`rcsparse._TokenStream.CHUNK_SIZE = n`
			`t = time.time()`
			`for i in xrange(5):`
			`apply(fetch_log2, args)`
			`print n, time.time() - t`

			`#one_run(2020, full_name, which_rev)`
			`#one_run(4070, full_name, which_rev)`
			`#one_run(8170, full_name, which_rev)`
			`#one_run(8192, full_name, which_rev)`
			`#one_run(16384, full_name, which_rev)`
			`one_run(32740, full_name, which_rev)`
			`one_run(65500, full_name, which_rev)`
			`one_run(100000, full_name, which_rev)`
			`one_run(200000, full_name, which_rev)`
			`one_run(500000, full_name, which_rev)`