Source code for bibolamazi.core.butils

# -*- coding: utf-8 -*-
################################################################################
#                                                                              #
#   This file is part of the Bibolamazi Project.                               #
#   Copyright (C) 2013 by Philippe Faist                                       #
#   philippe.faist@bluewin.ch                                                  #
#                                                                              #
#   Bibolamazi is free software: you can redistribute it and/or modify         #
#   it under the terms of the GNU General Public License as published by       #
#   the Free Software Foundation, either version 3 of the License, or          #
#   (at your option) any later version.                                        #
#                                                                              #
#   Bibolamazi is distributed in the hope that it will be useful,              #
#   but WITHOUT ANY WARRANTY; without even the implied warranty of             #
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              #
#   GNU General Public License for more details.                               #
#                                                                              #
#   You should have received a copy of the GNU General Public License          #
#   along with Bibolamazi.  If not, see <http://www.gnu.org/licenses/>.        #
#                                                                              #
################################################################################

"""
Various utilities for use within all of the Bibolamazi Project.
"""


import re
import types
import math
import datetime
import logging

from pylatexenc import latex2text

import bibolamazi.init
from . import version

logger = logging.getLogger(__name__)



[docs]def get_version(): """ Return the version string :py:data:`~core.version.version_str`, unchanged. """ return version.version_str
_theversionsplit = None
[docs]def get_version_split(): """ Return a 4-tuple `(maj, min, rel, suffix)` resulting from parsing the version obtained via :py:data:`version.version_str`. ............ TODO: FIXME: CURRENTLY, the elements are strings! why not integers? If not there, they will/should be empty or None? """ if (_theversionsplit is None): m = re.match(r'^(\d+)(?:\.(\d+)(?:\.(\d+)(.+)?)?)?', version.version_str) _theversionsplit = (m.group(1), m.group(2), m.group(3), m.group(4)) return _theversionsplit
[docs]def get_copyrightyear(): """ Return the copyright year :py:data:`~core.version.copyright_year`, unchanged. """ return version.copyright_year
# ------------------------------------------------------------------------------
[docs]class BibolamaziError(Exception): """ Root bibolamazi error exception. See also :py:class:`~core.bibfilter.BibFilterError` and :py:class:`~core.bibusercache.BibUserCacheError`. """ def __init__(self, msg, where=None): self.where = where fullmsg = msg if (where is not None): fullmsg += "\n\t@: "+where super().__init__(fullmsg)
# ------------------------------------------------------------------------------
[docs]def getbool(x): """ Utility to parse a string representing a boolean value. If `x` is already of integer or boolean type (actually, anything castable to an integer), then the corresponding boolean convertion is returned. If it is a string-like type, then it is matched against something that looks like 't(rue)?', '1', 'y(es)?' or 'on' (ignoring case), or against something that looks like 'f(alse)?', '0', 'n(o)?' or 'off' (also ignoring case). Leading or trailing whitespace is ignored. If the string cannot be parsed, a :py:exc:`ValueError` is raised. """ try: return (int(x) != 0) except (TypeError, ValueError): pass if isinstance(x, str): m = re.match(r'^\s*(t(?:rue)?|1|y(?:es)?|on)\s*$', x, re.IGNORECASE) if m: return True m = re.match(r'^\s*(f(?:alse)?|0|n(?:o)?|off)\s*$', x, re.IGNORECASE) if m: return False raise ValueError("Can't parse boolean value: %r" % x)
[docs]def resolve_type(typename, in_module=None): """ Returns a type object corresponding to the given type name `typename`, given as a string. ..... TODO: MORE DOC ......... """ if (in_module is not None): logger.longdebug("Resolving type %s in module %s", typename, in_module) if (typename in in_module.__dict__): return in_module.__dict__.get(typename) logger.longdebug("Resolving type %s (no module)", typename) if (typename == 'str'): try: return types.StringType except AttributeError: # Python 3 doesn't have types.BooleanType etc. return str if (typename == 'bool'): try: return types.BooleanType except AttributeError: # Python 3 doesn't have types.BooleanType etc. return bool if (typename == 'int'): try: return types.IntType except AttributeError: # Python 3 doesn't have types.BooleanType etc. return int if (typename == 'float'): try: return types.FloatType except AttributeError: # Python 3 doesn't have types.BooleanType etc. return float if (typename == 'bytes'): try: return types.StringType except AttributeError: # Python 3 doesn't have types.BooleanType etc. return bytes if (typename == 'complex'): try: return types.ComplexType except AttributeError: # Python 3 doesn't have types.BooleanType etc. return complex raise ValueError("Unknown type name: %s"%(typename))
_rx_quotearg_oknames = re.compile(r'^[-\w./:~%#]+$')
[docs]def quotearg(x): """ If `x` contains only non-special characters, it is returned as is. The non-special characters are: all alphanumerical chars, hyphen, dot, slash, colon, tilde, percent, hash. Otherwise, put the value `x` in double-quotes, escaping all double-quotes and backslashes in the value of `x` by a backslash. The argument `x` may be either a python string or unicode object. For example: >>> print(quotearg('kosher_name_clean')) kosher_name_clean >>> print(quotearg('dirty name with spaces')) \"dirty name with spaces\" >>> print(quotearg(r'''really\\dirty\"name::with/tons&#$of special chars!!!''')) \"really\\\\dirty\\\"name::with/tons&#$of special chars!!!\" """ if not x: return "" if (_rx_quotearg_oknames.match(x)): # only very sympathetic chars return x return '"' + re.sub(r'("|\\)', lambda m: '\\'+m.group(), x) + '"'
[docs]def guess_encoding_decode(dat, encoding=None): if isinstance(dat, str): return dat # already unicode if encoding: return dat.decode(encoding) try: return dat.decode('utf-8') except UnicodeDecodeError: pass # this should always succeed return dat.decode('latin1')
[docs]def call_with_args(fn, *args, **kwargs): """ Utility to call a function `fn` with `*args` and `**kwargs`. `fn(*args)` must be an acceptable function call; beyond that, additional keyword arguments which the function accepts will be provided from `**kwargs`. This function is meant to be essentially `fn(*args, **kwargs)`, but without raising an error if there are arguments in `kwargs` which the function doesn't accept (in which case, those arguments are ignored). """ args2 = args kwargs2 = kwargs if hasattr(fn, '__call__'): args2 = [fn] + args fn = fn.__call__ (fargs, varargs, keywords, defaults) = inspect.getargspec(fn) if keywords: return fn(*args2, **kwargs2) kwargs2 = dict([(k,v) for (k,v) in kwargs2 if k in fargs]) return fn(*args2, **kwargs2)
_rx_timedelta_part = re.compile(r'(?P<value>\d+(?:\.\d*)?|\d*\.\d+)(?P<unit>\w+)', flags=re.IGNORECASE)
[docs]def parse_timedelta(in_s): """ Note: only positive timedelta accepted. """ # all-lowercase, please keys = {"weeks": (7, 'days'), "days": (24, 'hours'), "hours": (60, 'minutes'), "minutes": (60, 'seconds'), "seconds": (1000, 'milliseconds'), } kwargs = {} for k in keys.keys(): kwargs[k] = 0.0 kwargs[keys[k][1]] = 0.0 for m in _rx_timedelta_part.finditer(in_s): unit = m.group('unit').lower() keyoks = [x for x in keys if x.startswith(unit)] if len(keyoks) < 1: raise ValueError("Unknown unit for timedelta: %s" %(unit)) if len(keyoks) > 1: raise ValueError("Ambiguous unit for timedelta: %s" %(unit)) # should never happen key = keyoks[0] value = float(m.group('value')) value_int = math.floor(value) kwargs[key] += value_int x = value - value_int while True: x *= keys[key][0] newkey = keys[key][1] v = math.floor(x) kwargs[newkey] += v x = (x - v) key = newkey if key not in keys: break #print 'kwargs: %r'%(kwargs) return datetime.timedelta(**kwargs)
[docs]def warn_deprecated(classname, oldname, newname, modulename=None, explanation=None): import traceback if modulename is not None: warnlogger = logging.getLogger(modulename) else: warnlogger = logger warnlogger.warning( ("%(modulenamecolon)s%(classnamedot)s%(oldname)s is deprecated. Please use " "%(modulenamecolon)s%(classnamedot)s%(newname)s instead. %(explanationspace)s" "at:\n" "%(stack)s") % { 'classnamedot': (classname+'.' if classname else ''), 'modulenamecolon': (modulename+':' if modulename else ''), 'oldname': oldname, 'newname': newname, 'explanationspace': (explanation+' ' if explanation else ''), 'stack': traceback.format_stack(limit=3)[0], } )
# ------------------------------------------------------------------------------ # _latex2text_default_text_replacements = ( # ("~", " "), # ("``", '"'), # ("''", '"'), # # # # do NOT replace tabular alignment symbol '&', because most often it's used # # in names perhaps unescaped, like in "Taylor & Francis" # ) latex2text_latex_context = latex2text.get_default_latex_context_db() # in most instances when converting to text, keep ``, '', --, ---, etc. as they are latex2text_latex_context.add_context_category( 'override-nonascii-specials', prepend=True, macros=[], environments=[], specials=[ latex2text.SpecialsTextSpec('~', u" "), latex2text.SpecialsTextSpec('``', u"\""), latex2text.SpecialsTextSpec("''", u"\""), latex2text.SpecialsTextSpec("--", u"--"), latex2text.SpecialsTextSpec("---", u"---"), latex2text.SpecialsTextSpec("!`", u"!`"), latex2text.SpecialsTextSpec("?`", u"?`"), ] ) _l2t = latex2text.LatexNodes2Text( strict_latex_spaces=True, latex_context=latex2text_latex_context, )
[docs]def latex_to_text(x): return _l2t.latex_to_text(x, tolerant_parsing=True)