viewvc-4intranet/lib/accept.py

# -*-python-*-
#
# Copyright (C) 1999-2013 The ViewCVS Group. All Rights Reserved.
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE.html file which can be found at the top level of the ViewVC
# distribution or at http://viewvc.org/license-1.html.
#
# For more information, visit http://viewvc.org/
#
# -----------------------------------------------------------------------
#
# accept.py: parse/handle the various Accept headers from the client
#
# -----------------------------------------------------------------------

import re


def language(hdr):
  "Parse an Accept-Language header."

  # parse the header, storing results in a _LanguageSelector object
  return _parse(hdr, _LanguageSelector())

# -----------------------------------------------------------------------

_re_token = re.compile(r'\s*([^\s;,"]+|"[^"]*")+\s*')
_re_param = re.compile(r';\s*([^;,"]+|"[^"]*")+\s*')
_re_split_param = re.compile(r'([^\s=])\s*=\s*(.*)')

def _parse(hdr, result):
  # quick exit for empty or not-supplied header
  if not hdr:
    return result

  pos = 0
  while pos < len(hdr):
    name = _re_token.match(hdr, pos)
    if not name:
      raise AcceptLanguageParseError()
    a = result.item_class(name.group(1).lower())
    pos = name.end()
    while 1:
      # are we looking at a parameter?
      match = _re_param.match(hdr, pos)
      if not match:
        break
      param = match.group(1)
      pos = match.end()

      # split up the pieces of the parameter
      match = _re_split_param.match(param)
      if not match:
        # the "=" was probably missing
        continue

      pname = match.group(1).lower()
      if pname == 'q' or pname == 'qs':
        try:
          a.quality = float(match.group(2))
        except ValueError:
          # bad float literal
          pass
      elif pname == 'level':
        try:
          a.level = float(match.group(2))
        except ValueError:
          # bad float literal
          pass
      elif pname == 'charset':
        a.charset = match.group(2).lower()

    result.append(a)
    if hdr[pos:pos+1] == ',':
      pos = pos + 1

  return result

class _AcceptItem:
  def __init__(self, name):
    self.name = name
    self.quality = 1.0
    self.level = 0.0
    self.charset = ''

  def __str__(self):
    s = self.name
    if self.quality != 1.0:
      s = '%s;q=%.3f' % (s, self.quality)
    if self.level != 0.0:
      s = '%s;level=%.3f' % (s, self.level)
    if self.charset:
      s = '%s;charset=%s' % (s, self.charset)
    return s

class _LanguageRange(_AcceptItem):
  def matches(self, tag):
    "Match the tag against self. Returns the qvalue, or None if non-matching."
    if tag == self.name:
      return self.quality

    # are we a prefix of the available language-tag
    name = self.name + '-'
    if tag[:len(name)] == name:
      return self.quality
    return None

class _LanguageSelector:
  """Instances select an available language based on the user's request.

  Languages found in the user's request are added to this object with the
  append() method (they should be instances of _LanguageRange). After the
  languages have been added, then the caller can use select_from() to
  determine which user-request language(s) best matches the set of
  available languages.

  Strictly speaking, this class is pretty close for more than just
  language matching. It has been implemented to enable q-value based
  matching between requests and availability. Some minor tweaks may be
  necessary, but simply using a new 'item_class' should be sufficient
  to allow the _parse() function to construct a selector which holds
  the appropriate item implementations (e.g. _LanguageRange is the
  concrete _AcceptItem class that handles matching of language tags).
  """

  item_class = _LanguageRange

  def __init__(self):
    self.requested = [ ]

  def select_from(self, avail):
    """Select one of the available choices based on the request.

    Note: if there isn't a match, then the first available choice is
    considered the default. Also, if a number of matches are equally
    relevant, then the first-requested will be used.

    avail is a list of language-tag strings of available languages
    """

    # tuples of (qvalue, language-tag)
    matches = [ ]

    # try matching all pairs of desired vs available, recording the
    # resulting qvalues. we also need to record the longest language-range
    # that matches since the most specific range "wins"
    for tag in avail:
      longest = 0
      final = 0.0

      # check this tag against the requests from the user
      for want in self.requested:
        qvalue = want.matches(tag)
        #print 'have %s. want %s. qvalue=%s' % (tag, want.name, qvalue)
        if qvalue is not None and len(want.name) > longest:
          # we have a match and it is longer than any we may have had.
          # the final qvalue should be from this tag.
          final = qvalue
          longest = len(want.name)

      # a non-zero qvalue is a potential match
      if final:
        matches.append((final, tag))

    # if there are no matches, then return the default language tag
    if not matches:
      return avail[0]

    # get the highest qvalue and its corresponding tag
    matches.sort()
    qvalue, tag = matches[-1]

    # if the qvalue is zero, then we have no valid matches. return the
    # default language tag.
    if not qvalue:
      return avail[0]

    # if there are two or more matches, and the second-highest has a
    # qvalue equal to the best, then we have multiple "best" options.
    # select the one that occurs first in self.requested
    if len(matches) >= 2 and matches[-2][0] == qvalue:
      # remove non-best matches
      while matches[0][0] != qvalue:
        del matches[0]
      #print "non-deterministic choice", matches

      # sequence through self.requested, in order
      for want in self.requested:
        # try to find this one in our best matches
        for qvalue, tag in matches:
          if want.matches(tag):
            # this requested item is one of the "best" options
            ### note: this request item could match *other* "best" options,
            ### so returning *this* one is rather non-deterministic.
            ### theoretically, we could go further here, and do another
            ### search based on the ordering in 'avail'. however, note
            ### that this generally means that we are picking from multiple
            ### *SUB* languages, so I'm all right with the non-determinism
            ### at this point. stupid client should send a qvalue if they
            ### want to refine.
            return tag

      # NOTREACHED

    # return the best match
    return tag

  def append(self, item):
    self.requested.append(item)

class AcceptLanguageParseError(Exception):
  pass

def _test():
  s = language('en')
  assert s.select_from(['en']) == 'en'
  assert s.select_from(['en', 'de']) == 'en'
  assert s.select_from(['de', 'en']) == 'en'

  # Netscape 4.x and early version of Mozilla may not send a q value
  s = language('en, ja')
  assert s.select_from(['en', 'ja']) == 'en'

  s = language('fr, de;q=0.9, en-gb;q=0.7, en;q=0.6, en-gb-foo;q=0.8')
  assert s.select_from(['en']) == 'en'
  assert s.select_from(['en-gb-foo']) == 'en-gb-foo'
  assert s.select_from(['de', 'fr']) == 'fr'
  assert s.select_from(['de', 'en-gb']) == 'de'
  assert s.select_from(['en-gb', 'en-gb-foo']) == 'en-gb-foo'
  assert s.select_from(['en-bar']) == 'en-bar'
  assert s.select_from(['en-gb-bar', 'en-gb-foo']) == 'en-gb-foo'

  # non-deterministic. en-gb;q=0.7 matches both avail tags.
  #assert s.select_from(['en-gb-bar', 'en-gb']) == 'en-gb'