Source code for brainvisa.data.patterns

#  This software and supporting documentation are distributed by
#      Institut Federatif de Recherche 49
#      CEA/NeuroSpin, Batiment 145,
#      91191 Gif-sur-Yvette cedex
#      France
#
# This software is governed by the CeCILL license version 2 under
# French law and abiding by the rules of distribution of free software.
# You can  use, modify and/or redistribute the software under the
# terms of the CeCILL license version 2 as circulated by CEA, CNRS
# and INRIA at the following URL "http://www.cecill.info".
#
# As a counterpart to the access to the source code and  rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty  and the software's author,  the holder of the
# economic rights,  and the successive licensors  have only  limited
# liability.
#
# In this respect, the user's attention is drawn to the risks associated
# with loading,  using,  modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean  that it is complicated to manipulate,  and  that  also
# therefore means  that it is reserved for developers  and  experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or
# data to be ensured and,  more generally, to use and operate it in the
# same conditions as regards security.
#
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license version 2 and that you accept its terms.

from __future__ import print_function
from __future__ import absolute_import
import re
import types
import six

if six.PY3:
    six.text_type = str

#------------------------------------------------------------------------------


[docs]class DictPattern(object): '''A DictPattern contains a pattern that is matched against a string and a set of attributes contained in a dictionary. When the match succeed, it returns a dictionary containing attributes values extracted from the input string. Such patterns are used to define Brainvisa ontology rules which associate filenames and data types. The input pattern is a string that is splitted in three kinds of tokens: * An attribute name enclosed in ``<`` and ``>`` * A named regular expression enclosed in ``{`` and ``}`` * A string literal which is everything not enclosed neither in braces nor with ``<`` and ``>``. When ``DictPattern.match( s, dict )`` is called, all attribute names from the pattern are replaced by the corresponding value in dict. If the dict does not contain the attribute, the match fails. Then, the string is matched against the pattern. If the pattern contains named regular expressions, the values corresponding to each expression is put in the resulting dictionary. If the match succeed, a dictionary (possibly empty) is returned. Otherwise, ``None`` is returned. In the string literal of the pattern, special characters can be found: * ``*`` matches any string and the matched value is associated to a ``filename_variable`` key in the results dictionary. * ``#`` matches any number and the matches value is associated to a ``name_serie`` key in the results dictionary. :Match examples: :: p = DictPattern( '<subject>_t1' ) p.match( 's_t1', { 'subject': 's' } ) == {} p.match( 's_t1', { 'subject': 'x' } ) == None p.match( 's_t2', { 'subject': 's' } ) == None p = DictPattern( '{subject}_t1' ) p.match( 's_t1', { 'subject': 's' } ) == {'subject': 's'} p.match( 'tutu_t1', { 'subject': 's' } ) == {'subject': 'tutu'} p.match( 's_t2', { 'subject': 's' } ) == None p = DictPattern( '*_#' ) p.match( 'toto_titi', {} ) == None p.match( 'toto_42', {} ) == {'name_serie':'42', 'filename_variable':'toto'} p = DictPattern( 'begin*_<subject>_*end' ) p.match( 'beginxxx_anatole_yyyend', { 'subject': 'anatole' } ) == None p.match( 'beginxxx_anatole_xxxend', { 'subject': 'anatole' } ) == { 'filename_variable': 'xxx' } p.match( 'beginxxx_anatole_xxxend', { 'subject': 'raymond' } ) == None p = DictPattern( 'begin#_<subject>_#end': ( p.match( 'beginxxx_anatole_xxxend', { 'subject': 'anatole' } ) == None p.match( 'begin123_anatole_456end', { 'subject': 'anatole' } ) == None p.match( 'begin123_anatole_123end', { 'subject': 'anatole' } ) == { 'name_serie': '123' } p.match( 'begin123_anatole_123end', { 'subject': 'raymond' } ) == None :Unmatch example: >>> DictPattern.unmatch( matchResult, dict ) This allows to build the string that would produce matchResult if ``DictPattern.match( s, dict )`` is succesfully used. The unmatch always succeed if matchResult is not None, in this case, we have : >>> DictPattern.match( DictPattern.match( s, dict ), dict ) == s ''' class Constant(object): pass def __repr__(self): return str(self) class Litteral(Constant): def __init__(self, s): self.__litteral = s def __call__(self, dict={}, matchResult=None): return self.__litteral def escaped(self): return DictPattern.EscapedLitteral(self.__litteral) def __str__(self): return repr(self.__litteral) class Attribute(Constant): def __init__(self, s): self.__key = s def __call__(self, dict, matchResult=None): try: return dict[self.__key] except KeyError as ke: splitted = self.__key.split('.') if len(splitted) > 1: stack = splitted[:] try: result = dict while stack: result = result[stack.pop(0)] return result except KeyError: if not matchResult: raise ke try: result = matchResult while splitted: result = result[splitted.pop(0)] if not result: raise ke return result except Exception: raise ke else: if not matchResult: raise ke try: result = matchResult[self.__key] if not result: raise ke return result except KeyError: raise ke def escaped(self): return DictPattern.EscapedAttribute(self.__key) def __str__(self): return '<Attribute ' + repr(self.__key) + '>' class EscapedLitteral(Litteral): def __init__(self, s): DictPattern.Litteral.__init__(self, re.escape(s)) class EscapedAttribute(Attribute): def __call__(self, dict): return re.escape(DictPattern.Attribute.__call__(self, dict)) def __str__(self): return '<EscapedAttribute ' + repr(self._Attribute__key) + '>' class MatchResult(Constant): def __init__(self, s): self.__key = s self.__splittedKey = s.split('.') def __call__(self, dict, matchResult): if matchResult is None: raise KeyError(self.__key) # search for matchResult["att1.att2..."] result = matchResult.get(self.__key, None) if type(result) is list and result: # case of name_serie result = result[0] if result is not None: return result # if not found, search for matchResult["att1"][att2]... try: stack = self.__splittedKey[:] result = matchResult while stack: result = result[stack.pop(0)] if result: return result except KeyError: pass # if not found, search in dict result = dict.get(self.__key, None) if result is not None: return result try: stack = self.__splittedKey[:] result = dict while stack: result = result[stack.pop(0)] if not result: raise KeyError(self.__key) if result: return result raise KeyError(self.__key) except KeyError: raise KeyError(self.__key) def __str__(self): return '<MatchResult ' + repr(self.__key) + '>' class RegexpMatch(object): def __init__(self, matchList): precompile = True for i in matchList: if isinstance(i, DictPattern.Attribute): precompile = False break if precompile: # Regex is constant and can be precompiled attributeToGroupname = {} self.groupnameToAttributes = {} matchRegexp = '' for i in matchList: if type(i) is tuple: attribute, regexp = i groupname = attributeToGroupname.get(attribute) if groupname: matchRegexp += '(?P=' + groupname + ')' else: if attribute.find('.') == -1: groupname = attribute attributeToGroupname[attribute] = groupname self.groupnameToAttributes[ groupname] = (attribute, ) else: s = attribute.split('.') groupname = '__dict_' + '_'.join(s) + '__' attributeToGroupname[attribute] = groupname self.groupnameToAttributes[groupname] = s matchRegexp += '(?P<' + \ groupname + '>' + regexp + ')' else: matchRegexp += re.escape(i()) self.__regexp = matchRegexp self.__match = re.compile(matchRegexp) else: self.__regexp = None attributeToGroupname = {} self.groupnameToAttributes = {} self.__match = [] for i in matchList: if type(i) is tuple: attribute, regexp = i groupname = attributeToGroupname.get(attribute) if groupname: self.__match.append( DictPattern.Litteral('(?P=' + groupname + ')')) else: if attribute.find('.') == -1: groupname = attribute attributeToGroupname[attribute] = groupname self.groupnameToAttributes[ groupname] = (attribute, ) else: s = attribute.split('.') groupname = '__dict_' + '_'.join(s) + '__' attributeToGroupname[attribute] = groupname self.groupnameToAttributes[groupname] = s self.__match.append( DictPattern.Litteral('(?P<' + groupname + '>' + regexp + ')')) else: self.__match.append(i.escaped()) def match(self, s, dict): if self.__regexp is None: m = re.compile( ''.join([i(dict) for i in self.__match])).match(s) else: m = self.__match.match(s) if m: result = {} for groupname, value in six.iteritems(m.groupdict()): attributes = self.groupnameToAttributes[groupname] d = result for k in attributes[:-1]: d = d.setdefault(k, {}) d[attributes[-1]] = value return result return None def __str__(self): if self.__regexp is None: return '<RegexpMatch ' + repr(self.__match) + '>' else: return '<RegexpMatch ' + repr(self.__regexp) + '>' def __repr__(self): return str(self) def __init__(self, pattern): self.pattern = pattern splitNamedPattern = re.compile(r'([^<>{]*){([^{}<>]*)}(.*)') splitAttribute = re.compile(r'([^{}<]*)<([^{}<>]*)>(.*)') check = re.compile(r'.*[{}<>].*') matchList = [] unmatchList = [] while pattern: m = splitAttribute.match(pattern) if m: litteral = m.group(1) if litteral: if check.match(litteral): raise ValueError('Invalid pattern') ma, un = DictPattern._replaceStarAndSharp(litteral) matchList += ma unmatchList += un attribute = m.group(2) r = DictPattern.Attribute(attribute) matchList.append(r) unmatchList.append(r) pattern = m.group(3) continue m = splitNamedPattern.match(pattern) if m: litteral = m.group(1) if litteral: if check.match(litteral): raise ValueError('Invalid pattern') ma, un = DictPattern._replaceStarAndSharp(litteral) matchList += ma unmatchList += un patternName = m.group(2) i = patternName.find('|') if i >= 0: pattern = n[i + 1:] patternName = patternName[:i] else: pattern = '.+' matchList.append((patternName, pattern)) unmatchList.append(DictPattern.MatchResult(patternName)) pattern = m.group(3) continue if check.match(pattern): raise ValueError('Invalid pattern') ma, un = DictPattern._replaceStarAndSharp(pattern) matchList += ma unmatchList += un break self.unmatchList = unmatchList onlyLitteral = True self.matchPrefix = [] while matchList and isinstance(matchList[0], DictPattern.Constant): i = matchList.pop(0) self.matchPrefix.append(i) if not isinstance(i, DictPattern.Litteral): onlyLitteral = False if onlyLitteral: self.matchPrefix = ''.join([i({}) for i in self.matchPrefix]) onlyLitteral = True self.matchSufix = [] while matchList and isinstance(matchList[-1], DictPattern.Constant): i = matchList.pop() self.matchSufix.insert(0, i) if not isinstance(i, DictPattern.Litteral): onlyLitteral = False if onlyLitteral: self.matchSufix = ''.join([i({}) for i in self.matchSufix]) if matchList: self.matchInfix = DictPattern.RegexpMatch(matchList) else: self.matchInfix = None def _replaceSharp(s): l = s.split('#') match = [DictPattern.Litteral(l[0])] unmatch = [DictPattern.Litteral(l[0])] l.pop(0) while l: i = l.pop(0) litteral = DictPattern.Litteral(i) match += [('name_serie', '[0-9]+'), litteral] unmatch += [DictPattern.MatchResult('name_serie'), litteral] return match, unmatch _replaceSharp = staticmethod(_replaceSharp) def _replaceStarAndSharp(s): l = s.split('*') match, unmatch = DictPattern._replaceSharp(l[0]) l.pop(0) while l: i = l.pop(0) match.append(('filename_variable', '.+')) unmatch.append(DictPattern.MatchResult('filename_variable')) m, u = DictPattern._replaceSharp(i) match += m unmatch += u return match, unmatch _replaceStarAndSharp = staticmethod(_replaceStarAndSharp) def __getstate__(self): return self.pattern def __setstate__(self, state): self.__init__(state)
[docs] def match(self, s, dict): """ Checks if the given string matches the pattern. :param string s: the string which should match the pattern :param dict: a dictionary containing the values to set to the attributes named in the pattern. :returns: a dictionary containing the value of each named expression of the pattern found in the string, None if the string doesn't match the pattern. """ try: if self.matchPrefix: if isinstance(self.matchPrefix, list): prefix = ''.join([i(dict) for i in self.matchPrefix]) else: prefix = self.matchPrefix if not s.startswith(prefix): return None s = s[len(prefix):] if self.matchSufix: if isinstance(self.matchSufix, list): sufix = ''.join([i(dict) for i in self.matchSufix]) else: sufix = self.matchSufix if not s.endswith(sufix): return None s = s[:-len(sufix)] if self.matchInfix is not None: return self.matchInfix.match(s, dict) if s: return None return {} except KeyError: return None
[docs] def unmatch(self, matchResult, dic): """ The opposite of :py:meth:`match` method: the matching string is found from a match result and a dictionary of attributes values. :param matchResult: dictionary which associates a value to each named expression of the pattern. :param dict: dictionary which associates a value to each attribute name of the pattern. :rtype: string :returns: the rebuilt matching string. """ try: return ''.join( [six.text_type(i( dic, matchResult )) for i in self.unmatchList] ) except KeyError as e: # print('!unmatch!', e) return None
def multipleUnmatch(self, dict, _debug = None): if _debug is not None: print('!multipleUnmatch!', self, dict, file=_debug) # Retrieve attributes() and namedRegex() attributes = [] for i in self.unmatchList: if isinstance(i, DictPattern.Attribute): if i._Attribute__key not in attributes: attributes.append(i._Attribute__key) elif isinstance(i, DictPattern.MatchResult): if i._MatchResult__key not in attributes: attributes.append(i._MatchResult__key) if _debug is not None: print('!multipleUnmatch! attributes =', attributes, file=_debug) # Check attributes values that are list multipleValues = [] for a in attributes: if a == 'name_serie': continue v = dict.get(a) if isinstance(v, list): if multipleValues: newMultipleValues = [] for d in multipleValues: l = [{a: i} for i in v] for d2 in l: d2.update(d) newMultipleValues.extend(l) multipleValues = newMultipleValues else: multipleValues = [{a: i} for i in v] if _debug is not None: print('!multipleUnmatch! multipleValues =', multipleValues, file=_debug) if multipleValues: result = [] d = dict.copy() for d2 in multipleValues: d.update(d2) r = self.unmatch(d, d) if _debug is not None: print('!multipleUnmatch! call unmatch', d, '-->', r, file=_debug) if r: result.append((r, d2)) if _debug is not None: print('!multipleUnmatch! -->', result, file=_debug) return result else: r = self.unmatch(dict, dict) if r: if _debug is not None: print('!multipleUnmatch! -->', [ ( r, {} ) ], file=_debug) return [(r, {})] if _debug is not None: print('!multipleUnmatch! -->', [], file=_debug) return [] def attributes(self): sent = [] for i in self.unmatchList: if isinstance(i, DictPattern.Attribute): if i._Attribute__key not in sent: sent.append(i._Attribute__key) yield i._Attribute__key def namedRegex(self): sent = [] for i in self.unmatchList: if isinstance(i, DictPattern.MatchResult): if i._MatchResult__key not in sent: sent.append(i._MatchResult__key) yield i._MatchResult__key def __str__(self): # return '<DictPattern ' + repr(self.matchPrefix) + ', ' + # repr(self.matchInfix) + ', ' + repr(self.matchSufix) + '>' return '<DictPattern ' + repr(self.pattern) + '>' def __repr__(self): return self.__str__() def __eq__(self, other): return self.pattern == getattr(other, 'pattern', other) def __ne__(self, other): return self.pattern != getattr(other, 'pattern', other) def __hash__(self): return hash(self.pattern)