Source code for brainvisa.data.fileSystemOntology

# -*- coding: utf-8 -*-
#  This software and supporting documentation are distributed by
#      Institut Federatif de Recherche 49
#      CEA/NeuroSpin, Batiment 145,
#      91191 Gif-sur-Yvette cedex
#      France
#
# This software is governed by the CeCILL license version 2 under
# French law and abiding by the rules of distribution of free software.
# You can  use, modify and/or redistribute the software under the
# terms of the CeCILL license version 2 as circulated by CEA, CNRS
# and INRIA at the following URL "http://www.cecill.info".
#
# As a counterpart to the access to the source code and  rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty  and the software's author,  the holder of the
# economic rights,  and the successive licensors  have only  limited
# liability.
#
# In this respect, the user's attention is drawn to the risks associated
# with loading,  using,  modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean  that it is complicated to manipulate,  and  that  also
# therefore means  that it is reserved for developers  and  experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or
# data to be ensured and,  more generally, to use and operate it in the
# same conditions as regards security.
#
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license version 2 and that you accept its terms.
"""
This module contains classes defining Brainvisa **ontology rules** to manage a database directory.

In Brainvisa ontology, adding to the types and formats definition, it is possible to define rules associating a data type and a path in a location and filename in a database directory. The classes in this module enable to define such rules.

These rules describe the organization of data in the database filesystem.
Thanks to this description, the name and place of a file allows to guess its type and some information about it,
as for example the center, subject or acquisition associated to this data.
It also makes it possible to write data in the database using the same rules,
so the information can be retrieved when the data is reloaded later.

These ontology files that we called *hierarchy* files in Brainvisa are located in ``brainvisa/hierarchies`` directory and in the hierarchies directory of each toolbox. BrainVISA can use several hierarchies whose files are grouped in a directory named as the hierarchy.

The main class in this module is :py:class:`FileSystemOntology`.
It represents a Brainvisa **hierarchy**, a set of rules that associate data types and data organization on the filesystem.

A rule is represented by the class :py:class:`ScannerRule`.
Several classes inheriting from :py:class:`ScannerRuleBuilder` are used to associate ontology attributes to a rule.

:Inheritance diagram:

.. inheritance-diagram:: brainvisa.data.fileSystemOntology

:Classes:


"""
from __future__ import print_function
from __future__ import absolute_import
import types
import sys
import os
import time
from brainvisa.configuration import neuroConfig
from brainvisa.processing.neuroException import *
from brainvisa import shelltools
from soma.sorted_dictionary import SortedDictionary
from brainvisa.data import neuroDiskItems
from brainvisa.multipleExecfile import MultipleExecfile
import six

#----------------------------------------------------------------------------


class AttrValueFunction(object):
    _msgBadValue = '<em>%s</em> is not a valid attribute value'

    def __init__(self, function_string, dependencies):
        self.function_string = function_string
        self.function = eval(function_string)
        self.dependencies = dependencies

    def __getstate__(self):
        return (self.function_string, self.dependencies)

    def __setstate__(self, state):
        self.function_string, self.dependencies = state
        self.function = eval(self.function_string)

    def getValue(self, diskItem, matchResult):
        star = matchResult.get('filename_variable')
        number = matchResult.get('name_serie')
        return self.function(diskItem, star, number)

    percent = {
        'f': 'i.fileName()',
      'F': 'i.fullName()',
      'd': 'i.parent.fileName()',
      'D': 'i.parent.fullPath()',
      '*': 's',
      '#': 'n'
    }


#----------------------------------------------------------------------------
_attrValueFunctions = {}


def getAttrValue(value):
    global _attrValueFunctions
    function = _attrValueFunctions.get(value)
    if function is not None:
        return function
    dependencies = []
    functionBegin = 'lambda i,s,n: '
    i = 0
    s = ''
    while i < len(value):
        c = value[i]
        if c == '%':
            i += 1
            if i >= len(value):
                raise ValueError(
                    HTMLMessage(_t_(AttrValueFunction._msgBadValue) % value))
            c = value[i]
            if c == '%':
                s += c
            else:
                if function:
                    function += '+'
                else:
                    function = functionBegin
                if s:
                    function += "'" + s + "'+"
                    s = ''
                if c == '<':
                    i += 1
                    j = value.find('>', i)
                    if j == -1:
                        raise ValueError(
                            HTMLMessage(_t_(AttrValueFunction._msgBadValue) % value))
                    attributeName = value[i:j]
                    function += 'i.get(\'' + attributeName + '\',"")'
                    if attributeName not in dependencies:
                        dependencies.append(attributeName)
                    i = j
                else:
                    percent = AttrValueFunction.percent.get(c)
                    if percent:
                        function += percent
                    else:
                        raise ValueError(
                            HTMLMessage(_t_(AttrValueFunction._msgBadValue) % value))
        elif c == "'":
            s += '\\\''
        else:
            s += c
        i += 1
    if function and s:
        function += "+'" + s + "'"
    if function:
        function = AttrValueFunction(function, dependencies)
        _attrValueFunctions[value] = function
        return function
    return value


#----------------------------------------------------------------------------
[docs]class ScannerRule(object): """ This class represents a hierarchy rule. It associates a filename pattern and ontology attributes. :Attributes: .. py:attribute:: pattern The filename pattern for this rule. It is an instance of :py:class:`brainvisa.data.neuroDiskItems.BackwardCompatiblePattern`. .. py:attribute:: globalAttributes List of global attributes names and values (tuples). Global attributes are added with a :py:class:`SetAttr` builder. .. py:attribute:: localAttributes List of local attributes names and values (tuples). Local attributes are added with a :py:class:`SetWeakAttr` builder. .. py:attribute:: defaultAttributesValues Dictionary associating attribute names and default values. It can be added with a :py:class:`SetDefaultAttributeValue` builder. .. py:attribute:: type Data type associated to this rule. Instance of :py:class:`brainvisa.data.neuroDiskItems.DiskItemType`. It can be added with a :py:class:`SetType` builder. .. py:attribute:: formats List of file formats associated to this rule, each format is an instance of :py:class:`brainvisa.data.neuroDiskItems.Format`. It can be added with a :py:class:`SetFormats` builder. .. py:attribute:: scanner When the rule pattern matches a directory, it can contain other elements. In this case, this attribute is a :py:class:`DirectoryScanner` and it contains other rules describing the content of the directory. .. py:attribute:: itemName A name associated to this rule. It can be added with a :py:class:`SetName` builder. .. py:attribute:: priority This attribute can be added with a :py:class:`SetPriority` builder. .. py:attribute:: priorityOffset This attribute can be added with a :py:class:`SetPriorityOffset` builder. .. py:attribute:: fileNameAttribute This attribute can be added with a :py:class:`SetFileNameAttribute` or a :py:class:`SetFileNameStrongAttribute` builder. .. py:attribute:: fileNameAttributeIsWeak This attribute can be added with a :py:class:`SetFileNameAttribute` or a :py:class:`SetFileNameStrongAttribute` builder. .. py:attribute:: fileNameAttributeDefault Default value for the filename attribute. .. py:attribute:: nonMandatoryKeyAttributes This attribute can be added with a :py:class:`SetNonMandatoryKeyAttribute` builder. """ def __init__(self, pattern): self.pattern = pattern self.type = None self.formats = None #self.formatNamesInSet?? #self.fileNameAttributeDefault?? self.globalAttributes = [] self.localAttributes = [] self.defaultAttributesValues = {} self.scanner = None self.itemName = None self.priority = None self.priorityOffset = 0 self.nonMandatoryKeyAttributes = set() self.declared_attributes = set() for fileNameAttribute in pattern.namedRegex(): if fileNameAttribute != 'name_serie': break else: fileNameAttribute = 'filename_variable' self.fileNameAttribute = fileNameAttribute self.fileNameAttributeIsWeak = 1 def __getstate__(self): state = self.__dict__.copy() if self.type is not None: state['type'] = self.type.id if self.formats is not None: state['formats'] = [x.id for x in self.formats] else: state['formats'] = None del state['_ScannerRule__formats'] # Do not store self.__formats return state def __setstate__(self, state): t = state['type'] if t: self.type = neuroDiskItems.getDiskItemType(t) else: self.type = None t = state['formats'] if t: self.formats = [neuroDiskItems.getFormat(i) for i in t] else: self.formats = None del state['type'], state['formats'] self.__dict__.update(state) def _getFormats(self): return self.__formats def _setFormats(self, formats): if formats is None: self.__formats = None self._formatsNameSet = set() else: self.__formats = formats # Ugly trick that will last until 'Graph' format is completely # replaced by 'Graph and data' formatsName = [] for f in formats: if f.name != 'Graph': formatsName.append(f.name) else: formatsName.append('Graph and data') self._formatsNameSet = set(formatsName) formats = property(_getFormats, _setFormats)
#----------------------------------------------------------------------------
[docs]class ScannerRuleBuilder(object): """ Base class for rule builders. It defines a virtual method :py:meth:`build`. All derived class override this method. """
[docs] def build(self, scannerRule): """ :param scannerRule: related :py:class:`ScannerRule`. """ pass
#----------------------------------------------------------------------------
[docs]class SetType(ScannerRuleBuilder): """ This builder set the :py:attr:`ScannerRule.type` attribute of the current rule. """ def __init__(self, diskItemType): """ :param string diskItemType: data type. """ self.type = neuroDiskItems.getDiskItemType(diskItemType)
[docs] def build(self, scannerRule): """ Sets its data type to the given scannerRule. If the type has associated formats, the :py:attr:`ScannerRule.formats` is also updated. """ scannerRule.type = self.type # Set default formats if self.type.formats: if scannerRule.formats is None: if 'name_serie' in scannerRule.pattern.namedRegex(): scannerRule.formats \ = [neuroDiskItems.changeToFormatSeries(x) for x in self.type.formats] else: scannerRule.formats = self.type.formats scannerRule.formatNamesInSet = set( (f.name for f in self.type.formats))
def __str__(self): return "SetType('" + self.type.name + "')"
#----------------------------------------------------------------------------
[docs]class SetName(ScannerRuleBuilder): """ This builder set the :py:attr:`ScannerRule.itemName` attribute of the current rule. """ def __init__(self, name): """ :param string name: a name that will be associated to any diskitem that match the rule. """ self.name = getAttrValue(name)
[docs] def build(self, scannerRule): scannerRule.itemName = self.name
#----------------------------------------------------------------------------
[docs]class SetAttr(ScannerRuleBuilder): """ This builder set the :py:attr:`ScannerRule.globalAttributes` attribute of the current rule. """ def __init__(self, *params): """ :param params: list of attribute name followed by its value. """ self.attributes = [] i = 0 while i + 1 < len(params): (attr, value) = (params[i], params[i + 1]) # Check attribute name if type(attr) is not str: raise TypeError( HTMLMessage(_t_('<em><code>%s</code></em> is not a valid attribute name') % str(attr))) # Check attribute value if type(value) is str: value = getAttrValue(value) self.attributes.append((attr, value)) i += 2 if i != len(params): raise ValueError( HTMLMessage(_t_('missing value for attribute <em>%s</em>') % str(params[-1])))
[docs] def build(self, scannerRule): scannerRule.globalAttributes += self.attributes
#----------------------------------------------------------------------------
[docs]class SetWeakAttr(SetAttr): """ This builder set the :py:attr:`ScannerRule.localAttributes` attribute of the current rule. """
[docs] def build(self, scannerRule): scannerRule.localAttributes += self.attributes
#----------------------------------------------------------------------------
[docs]class SetDefaultAttributeValue(ScannerRuleBuilder): """ This builder adds a new attribute value in the :py:attr:`ScannerRule.defaultAttributesValues` map of the current rule. """ def __init__(self, attribute, value): """ :param string attribute: name of the attribute :param string value: default value of the attribute. """ self.attribute = attribute self.value = value
[docs] def build(self, scannerRule): scannerRule.defaultAttributesValues[self.attribute] = self.value
#----------------------------------------------------------------------------
[docs]class SetNonMandatoryKeyAttribute(ScannerRuleBuilder): """ This builder adds new attributes names in the :py:attr:`ScannerRule.nonMandatoryKeyAttributes` list of the current rule. """ def __init__(self, *attributes): """ :param attributes: list of attributes names that are not mandatory key attributes. """ self.attributes = attributes
[docs] def build(self, scannerRule): scannerRule.nonMandatoryKeyAttributes.update(self.attributes)
#----------------------------------------------------------------------------
[docs]class SetContent(ScannerRuleBuilder): """ This builder assumes that the current rule pattern is matches a directory. As a directory can contain other files, a new :py:class:`DirectoryScanner` is created and set as the :py:attr:`ScannerRule.scanner` attribute of the current rule. This directory scanner contains the rules defined to describe the content of this directory. """ def __init__(self, *params): """ :param params: list of rules describing the content of a directory: several filename patterns followed by associated rule builders. """ scannerRules = [] i = 0 while i < len(params): filterValue = params[i] rule = ScannerRule( neuroDiskItems.BackwardCompatiblePattern(filterValue)) i += 1 while i < len(params): builder = params[i] if isinstance(builder, ScannerRuleBuilder): builder.build(rule) else: i -= 1 break i += 1 scannerRules.append(rule) i += 1 self.scanner = DirectoryScanner(scannerRules)
[docs] def build(self, scannerRule): # Only directories can have a content scannerRule.pattern.fileType = neuroDiskItems.Directory scannerRule.scanner = self.scanner
#----------------------------------------------------------------------------
[docs]class SetPriority(ScannerRuleBuilder): """ This builder set the :py:attr:`ScannerRule.priority` attribute of the current rule. """ def __init__(self, priority): self.priority = priority
[docs] def build(self, scannerRule): scannerRule.priority = self.priority
#----------------------------------------------------------------------------
[docs]class SetPriorityOffset(ScannerRuleBuilder): """ This builder set the :py:attr:`ScannerRule.priorityOffset` attribute of the current rule. """ def __init__(self, priorityOffset): self.priorityOffset = priorityOffset
[docs] def build(self, scannerRule): scannerRule.priorityOffset = self.priorityOffset
#----------------------------------------------------------------------------
[docs]class SetFormats(ScannerRuleBuilder): """ This builder set the :py:attr:`ScannerRule.formats` attribute of the current rule. """ def __init__(self, formats): """ :param formats: list of formats names that will be associated to the current pattern. """ self.formats = neuroDiskItems.getFormats(formats)
[docs] def build(self, scannerRule): if scannerRule.pattern.number: scannerRule.formats = [neuroDiskItems.changeToFormatSeries(x) for x in self.formats] else: scannerRule.formats = self.formats scannerRule.formatNamesInSet = set( (f.name for f in scannerRule.formats))
#----------------------------------------------------------------------------
[docs]class SetFileNameAttribute(ScannerRuleBuilder): """ This builder set the :py:attr:`ScannerRule.fileNameAttribute` and :py:attr:`ScannerRule.fileNameAttributeDefault` attributes of the current rule. The :py:attr:`ScannerRule.fileNameAttributeIsWeak` attribute is set to 1. """ def __init__(self, attribute, defaultValue=None): """ :param string attribute: name of the attribute :param string defaultValue: a default value for this attribute. """ self.attribute = str(attribute) self.default = defaultValue
[docs] def build(self, scannerRule): scannerRule.fileNameAttribute = self.attribute scannerRule.fileNameAttributeIsWeak = 1 scannerRule.fileNameAttributeDefault = self.default
#----------------------------------------------------------------------------
[docs]class SetFileNameStrongAttribute(ScannerRuleBuilder): """ This builder set the :py:attr:`ScannerRule.fileNameAttribute` and :py:attr:`ScannerRule.fileNameAttributeDefault` attributes of the current rule. The :py:attr:`ScannerRule.fileNameAttributeIsWeak` attribute is set to 0. """ def __init__(self, attribute, defaultValue=None): """ :param string attribute: name of the attribute :param string defaultValue: a default value for this attribute. """ self.attribute = str(attribute) self.default = defaultValue
[docs] def build(self, scannerRule): scannerRule.fileNameAttribute = self.attribute scannerRule.fileNameAttributeIsWeak = 0 scannerRule.fileNameAttributeDefault = self.default
#----------------------------------------------------------------------------
[docs]class DeclareAttributes(ScannerRuleBuilder): """ """ def __init__(self, *attributes): """ Lists attributes which should be added to the database columns, with no default value. :param string attributes: name of the attributes """ self.attributes = attributes
[docs] def build(self, scannerRule): scannerRule.declared_attributes.update(self.attributes)
#----------------------------------------------------------------------------
[docs]class DirectoryScanner(object): """ This object contains a list of :py:class:`ScannerRule` describing the content of a directory. :Attributes: .. py:attribute:: rules The list of :py:class:`ScannerRule` describing the content of the directory. .. py:attribute:: possibleTypes Dictionary containing the data types associated to its rules as keys. Values are always 1. :Methods: """ def __init__(self, scannerRules): self.rules = scannerRules # Set possible types self.possibleTypes = {} for rule in self.rules: t = rule.type while t: self.possibleTypes[t] = 1 t = t.parent if rule.scanner: self.possibleTypes.update(rule.scanner.possibleTypes) def __getstate__(self): return { 'rules': self.rules, 'possibleTypes': [x.id for x in self.possibleTypes.keys()] } def __setstate__(self, state): self.rules = state['rules'] self.possibleTypes = {} for t in state['possibleTypes']: self.possibleTypes[neuroDiskItems.getDiskItemType(t)] = 1
[docs] def scan(self, directory): """ Scans a directory and returns the list of diskitem that it contain. The diskitems that match ontology rules get corresponding ontology attributes. """ debug = neuroConfig.debugHierarchyScanning groups = {} formatGroups = {} unknownGroups = {} known = [] unknown = [] for item in directory: if item.name[-5:] == '.minf': if debug: print('-> Skiping', item, file=debug) continue if debug: print('-> Examining', item, file=debug) print(' time:', time.ctime(), file=debug) print(' attributes:', file=debug) for n, v in item.attributes().items(): print(' ', n, '=', repr(v), file=debug) checkedFormats = {} identified = 0 for rule in self.rules: # Check item format and rule matching matchDict = None if rule.formats is None: # No format list check only pattern position = 0 if debug: print( ' rule (without formats list)', rule.pattern.pattern, file=debug) matchDict = rule.pattern.match(item) if matchDict is not None: if debug: print(' -> matched', file=debug) known.append(item) else: # Only formats in self.formats are allowed for this rule for format in rule.formats: # Check if format as already been checked matchFormat = checkedFormats.get(format, 1) if matchFormat == 1: # Check if file/directory name match format matchFormat = format.match(item, returnPosition=1) checkedFormats[format] = matchFormat if matchFormat: position = matchFormat[1] matchFormat = matchFormat[0] # Remove format prefix and sufix oldName = item.name item.name = format.formatedName(item, matchFormat) # Only one item is created for each ( item.name, item.format ) couple. # Items are grouped with format.group() method formatId = (format, item.name) groupedItem = formatGroups.get(formatId) if groupedItem is not None: if debug: print( ' -> grouped with ', groupedItem, 'at position', position, file=debug) item = format.group( groupedItem, item, position=position) formatGroups[formatId] = item groupId = (item.name, item.type, item.format) groups[groupId] = item # item is merged into groupedItem (wich is identified). Therefore further # identification for item is useless. identified = 1 break # Check if file/directory name without format prefix and sufix # match the current rule pattern if debug: print( ' rule', rule.pattern.pattern, file=debug) matchDict = rule.pattern.match(item) if matchDict is not None: if debug: print(' -> matched', file=debug) format.setFormat(item, (rule, matchDict)) # Set the definite name of the item if rule.itemName: if isinstance(rule.itemName, AttrValueFunction): item.name = rule.itemName.getValue( item, matchDict) else: item.name = rule.itemName if debug: print( ' item name set to', repr(item.name), file=debug) # Only one item is created for each ( item.name, item.type, item.format ) triplet. # Items are grouped with format.group() method groupId = (item.name, rule.type, item.format) groupedItem = groups.get(groupId) if groupedItem is not None: if debug: print( ' -> grouped with ', groupedItem, 'at position', position, file=debug) item = format.group( groupedItem, item, position=position, matchRule=matchDict) groups[groupId] = item # item is merged into groupedItem (wich is identified). Therefore further # identification for item is useless. identified = 1 else: formatGroups[formatId] = item # A new ( item.name, item.type, item.format # ) triplet is created groups[groupId] = item # No more format checked break else: # The rule pattern is not matched for that # format, the item name is restored item.name = oldName if not identified and item.format is None: # No format in self.formats allow self.pattern to match, # the rule is rejected continue # If all has been done for this item, go to the next item if identified: break # Check if the current rule matched if matchDict is not None: # Set item type (and possibly some attributes) if rule.type: rule.type.setType(item, matchDict, position) # Set attributes star = matchDict.get(rule.fileNameAttribute, '') if debug: if rule.fileNameAttributeIsWeak: debug.write(' Setting local attribute: ') else: debug.write(' Setting global attribute: ') print( rule.fileNameAttribute, '=', repr(star), file=debug) if rule.fileNameAttributeIsWeak: item._setLocal(rule.fileNameAttribute, star) else: item._setGlobal(rule.fileNameAttribute, star) for (attr, value) in rule.globalAttributes: if isinstance(value, AttrValueFunction): item._setGlobal( attr, value.getValue(item, matchDict)) else: item._setGlobal(attr, value) for (attr, value) in rule.localAttributes: if isinstance(value, AttrValueFunction): item._setLocal( attr, value.getValue(item, matchDict)) else: item._setLocal(attr, value) # Set scanner item.scanner = rule.scanner # Set priority item.setPriority( rule.priority, priorityOffset=rule.priorityOffset) if debug: print( ' identified as', item.name, 'by rule', rule.pattern, file=debug) if rule.scanner is None: print(' rule has no scanner', file=debug) else: print(' rule scanner:', ', '.join( [i.pattern.pattern for i in rule.scanner.rules]), file=debug) # A matching rule has been found, do not inspect other # rules identified = 1 break # After for rule in ... # if item is not identified, try to find its format accoding to # filename and group files that are part of item format if not identified: if debug: print(' -> Not identified:', item, file=debug) item.findFormat() if item.format: formatId = (item.format, item.name) if not unknownGroups.get(formatId, None): # register a group for this format unknownGroups[formatId] = item unknown.append(item) # Correct items if necessary known += list(groups.values()) for item in known: if item.format: item.format.postProcessing(item) item._identified = True return [i.setFormatAndTypeAttributes() for i in known + unknown]
def attributesDependencies(self, parentAttributes, result): allRulesAttributes = [] for rule in self.rules: currentAttributes = [x[0] for x in rule.globalAttributes + rule.localAttributes] if rule.fileNameAttribute != 'filename_variable': currentAttributes.append(rule.fileNameAttribute) allRulesAttributes = currentAttributes for attribute in currentAttributes: if attribute in ('name_serie', ): continue dependencies = result.get(attribute, []) for parent in parentAttributes: if parent not in dependencies + [attribute]: dependencies.append(parent) for grandParent in result.get(parent, []): if grandParent not in dependencies + [attribute]: dependencies.append(grandParent) if dependencies: result[attribute] = dependencies for rule in self.rules: if rule.scanner is not None: rule.scanner.attributesDependencies(currentAttributes, result) def attributesValues(self, result): for rule in self.rules: for name, value in [(rule.fileNameAttribute, getAttrValue('%*'))] + rule.globalAttributes + rule.localAttributes: if name == 'filename_variable': continue values = result.get(name, []) if value not in values: values.append(value) result[name] = values if rule.scanner is not None: rule.scanner.attributesValues(result)
#----------------------------------------------------------------------------
[docs]class FileSystemOntology(object): """ This class represents a Brainvisa hierarchy, that is to say a set of rules associating data types and filenames. The right way to use this class is to use the :py:meth:`get` method to get an instance of this class for a specific hierarchy in order to create only one instance for each hierarchy. :Attributes: .. py:attribute:: name The name of the hierarchy. It is the name of the directory containing the hierachy files, under the *hierarchies* directory. .. py:attribute:: source List of source paths for this hierarchy. Indeed, the hierarchy files can be in several directories: in the main Brainvisa directory and in each toolbox directory. .. py:attribute:: content Content of the hierarchy as it is described in the hierarchy files. .. py:attribute:: typeToPatterns Map associating each data type (:py:class:`brainvisa.data.neuroDiskItems.FileType`) with a list of rules (:py:class:`ScannerRule`). .. py:attribute:: lastModification Date of last modification of the hierarchy files. This enables to detect ontology changes and to offer the user to update his databases. :Methods: """ __instances = {} def __init__(self, source, directories): # FileSystemOntology constructor is private. Use :py:meth:`get` # instead. self.cacheName = None # if os.path.isdir( source ): # Source is a directory => new (version 3.1 and later) multiple files # definition of FSO # reader = self.__Reader() # reader.read( self, source ) if os.path.isfile(source): # Source is a file => old (prior to version 3.1) single file FSO (that was # called hierarchy) oldFSOContent = {} beforeError = _t_('in file system ontology <em>%s</em>') % str(p) with open(source, 'rb') as f: code = compile(f.read(), f.name, 'exec') six.exec_(code, globals(), oldFSOContent) self.content = oldFSOContent.get('hierarchy') self.cacheName = oldFSOContent.get('cache') self.lastModification = os.stat(source).st_mtime # hierarchy is in several directories elif len(directories) > 0 and os.path.isdir(directories[0]): reader = self.__Reader() reader.read(self, directories) else: raise RuntimeError( HTMLMessage(_t_('<em>%s</em> is not a valid file system ontology') % (str(source),))) self.source = directories # source dir, name = os.path.split(source) if dir == "": # in neuroConfig.fileSystemOntologiesPath: self.name = name else: self.name = source if self.cacheName is None: self.cacheName = self.name + '.fsd' # For each data type, build all the possible patterns in extenso self._declared_attributes = set() self.typeToPatterns = SortedDictionary() stack = [(r,) for r in [i for i in self.content if isinstance(i, SetContent)][0].scanner.rules] while stack: rules = stack.pop(0) rule = rules[-1] if rule.type is not None: ruleInExtenso = ScannerRule(neuroDiskItems.DictPattern( '/'.join((r.pattern.pattern for r in rules)))) ruleInExtenso.type = rule.type ruleInExtenso.formats = rule.formats ruleInExtenso.priority = rule.priority ruleInExtenso._declared_attributes_location = {} for r in rules: ruleInExtenso.globalAttributes += r.globalAttributes ruleInExtenso.defaultAttributesValues.update( r.defaultAttributesValues) ruleInExtenso.globalAttributes += r.globalAttributes d = None for i in r.localAttributes: for j in ruleInExtenso.localAttributes: if i[0] == j[0]: d = j break if d: ruleInExtenso.localAttributes.remove(d) ruleInExtenso.localAttributes += r.localAttributes ruleInExtenso.priorityOffset += r.priorityOffset ruleInExtenso.nonMandatoryKeyAttributes.update( r.nonMandatoryKeyAttributes) ruleInExtenso.declared_attributes.update( r.declared_attributes) # Prepend "../" to all _declared_attributes_location ruleInExtenso._declared_attributes_location = \ dict((att, os.path.join('..', path)) for att, path in six.iteritems(ruleInExtenso._declared_attributes_location)) for att in r.declared_attributes: self._declared_attributes.add(att) ruleInExtenso._declared_attributes_location[ att] = 'fso_attributes.json' ruleInExtenso.itemName = rule.itemName self.typeToPatterns.setdefault( rule.type, []).append(ruleInExtenso) if rule.scanner: stack = [rules + (r,) for r in rule.scanner.rules] + stack
[docs] def getOntologiesNames(): """ Lists all the ontologies names found in fileSystemOntologiesPath. """ ontologies = set() for fsoPath in neuroConfig.fileSystemOntologiesPath: _, dirnames, _ = next(os.walk(fsoPath)) for ontology in dirnames: ontologies.add(ontology) return ontologies
getOntologiesNames = staticmethod(getOntologiesNames)
[docs] def get(source): ''' Satic factory for creation of FileSystemOntology instances. The source can be: * The name of one of the FSO directories located in one of the "hierarchies" directories of neuroConfig.fileSystemOntologiesPath (for example 'brainvisa-3.2.0' is the main FSO). * The name of any FSO directory. * The name of an old-style (prior to version 3.1) hierarchy file. ''' # Get the source file if source is None: source = 'brainvisa-3.2.0' # Keep backward compatibility with old <mainPath>/*Hierarchy.py files source = os.path.normpath(source) if source == os.path.normpath(os.path.join(neuroConfig.mainPath, 'shfjHierarchy.py')): source = 'brainvisa-3.0' elif source == os.path.normpath(os.path.join(neuroConfig.mainPath, 'sharedHierarchy.py')): source = 'shared' elif source == os.path.normpath(os.path.join(neuroConfig.mainPath, 'shfjFlatHierarchy.py')): source = 'flat' # as hierarchy can be located in several directories, the # fileSystemOntology will be created with a list of directories directories = [] if not os.path.isabs(source) or os.path.exists(source): source = os.path.basename(source) # remove path if any for fsoPath in neuroConfig.fileSystemOntologiesPath: s = os.path.normpath(os.path.join(fsoPath, source)) if os.path.exists(s): # source = s # break directories.append(s) if len(directories) == 0: directories.append(source) # Check if the FSO instance have already been created result = FileSystemOntology.__instances.get(source) if result is None: result = FileSystemOntology(source, directories) # source FileSystemOntology.__instances[source] = result return result
get = staticmethod(get) def clear(): FileSystemOntology.__instances = {} clear = staticmethod(clear) def getTypeChildren(self, types): if getattr(self, '_childrenByTypeName', None) is None: self._childrenByTypeName = {} for type in six.itervalues(neuroDiskItems.diskItemTypes): self._childrenByTypeName.setdefault( type.name, set((type.name,))) if type.parent: self._childrenByTypeName.setdefault( type.parent.name, set((type.parent.name,))).append(type.name) return chain(self._childrenByType.get(type, []) for type in types) def getTypesFormats(self, types): if getattr(self, '_formatsByTypeName', None) is None: self._formatsByTypeName = {} for type, rules in six.iteritems(self.typeToPatterns): typeFormats = self._formatsByTypeName.setdefault(type.name, []) for rule in rules: if rule.formats: for format in rule.formats: if format.name not in typeFormats: typeFormats.append(format.name) result = set() for t1 in types: for t2 in self.getTypeChildren(t1): f = self._formatsByTypeName.get(t2) if f: result.update(f) return result #-------------------------------------------------------------------------- class __Reader(MultipleExecfile): """ A reader for hierarchy files. It enables to use the functions *insert*, *insertFirst* and *insertLast* in these files. These functions are associated to the methods :py:meth:`insert`, :py:meth:`insertFirst` and :py:meth:`insertLast` of this class. """ def __init__(self): MultipleExecfile.__init__(self) self.fileExtensions.append('.py') self.localDict['insert'] = self.insert self.localDict['insertFirst'] = self.insertFirst self.localDict['insertLast'] = self.insertLast def read(self, fso, directories): """ Reads the hiearchy files of an ontology. Set the value of *hierarchy* variable that should be defined in the hierarchy files as the content of the fso. :param fso: :py:class:`FileSystemOntoloy` :param directories: paths to the hierarchy files of this ontology. """ self.includePath.update(directories) files = [] for directory in directories: files.extend(shelltools.filesFromShPatterns( os.path.join(directory, '*.py'))) files.sort() # print('*** BUILD FSO:', files) exc = self.execute(continue_on_error=True, *files) if exc: for e in exc: try: six.reraise(e[0], e[1], e[2]) except Exception: showException( beforeError="Error while reading ontology " + directory + ": ") try: fso.content = self.localDict['hierarchy'] # fso.printHierarchy() except Exception as e: msg = 'in filesystem ontology "' + directory + '": ' + six.text_type(e) \ + ', files=' + str(files) if hasattr(e, 'messsage'): e.message = msg e.args = (msg, ) + e.args[1:] raise fso.lastModification = max( neuroDiskItems.typesLastModification, max((os.stat(f).st_mtime for f in files))) def insert(self, path, *content): """ Inserts rules in a :py:class:`DirectoryScanner` whose pattern matches *path*. """ self._insert(False, False, path, *content) def insertLast(self, path, *content): """ Appends rules in a :py:class:`DirectoryScanner` whose pattern matches *path*. """ self._insert(False, True, path, *content) def insertFirst(self, path, *content): """ Inserts rules in a :py:class:`DirectoryScanner` whose pattern matches *path* at the beginning of the list of rules. """ self._insert(True, False, path, *content) def _insert(self, first, last, path, *content): #print('=== !_insert! in', path, first, '(', #self.localDict['__name__'], ')') #print(content) contentScanner = SetContent(*content).scanner for ruleBuilder in self.localDict['hierarchy']: if isinstance(ruleBuilder, SetContent): scanner = ruleBuilder.scanner break parentScanners = [scanner] if path: if path[-1] == '/': path = path[:-1] currentPattern = [] # '!' for pattern in path.split('/'): currentPattern.append(pattern) # '!' found = None for rule in scanner.rules: if rule.pattern.pattern == pattern: found = rule.scanner break if found is None: if pattern.find('*') == -1: # dbg# print('!_insert! creating', '/'.join( currentPattern )) # Create a rule for that directory found = SetContent(pattern, SetContent()).scanner # dbg# print('!_insert! adding rules:', ', '.join( # [r.pattern.pattern for r in found.rules] )) scanner.rules += found.rules found = found.rules[0].scanner else: raise RuntimeError( HTMLMessage(_t_('invalid hierarchy path: <em>%s</em>') % (path,))) scanner = found parentScanners.append(scanner) # Concatenate rules (updating hierarchy towards the leaves) stack = [(scanner, contentScanner.rules)] while stack: scanner, rules = stack.pop() posCount = 0 for rule in rules: found = False if rule.scanner is not None: for scannerRule in scanner.rules: if scannerRule.pattern.pattern == rule.pattern.pattern: found = True if scannerRule.scanner is None: raise RuntimeError(_t_('Invalid redefinition of rule %(rule)s in file %(file)s') % { 'rule': rule.pattern.pattern, 'file': self.localDict['__name__']}) stack.append( (scannerRule.scanner, rule.scanner.rules)) scannerRule.scanner.possibleTypes.update( rule.scanner.possibleTypes) break if not found: # dbg# print('!_insert! add', rule.pattern.pattern, first) if not hasattr(scanner, '_lastpos'): scanner._lastpos = len(scanner.rules) if first: scanner.rules.insert(posCount, rule) posCount += 1 scanner._lastpos += 1 elif last: scanner.rules.append(rule) else: scanner.rules.insert(scanner._lastpos, rule) scanner._lastpos += 1 # Update parent possible types (updating hierarchy towards the # root) for s in parentScanners: s.possibleTypes.update(contentScanner.possibleTypes)
[docs] def printOntology(self, file=sys.stdout): """ Writes ontology information. """ allKeys = set() tab = ' ' anyType = neuroDiskItems.getDiskItemType('Any type') ontology = {anyType.name: [(), None]} for type, rules in six.iteritems(self.typeToPatterns): keys = [] for rule in rules: ruleAttributes = set(rule.pattern.namedRegex()) ruleAttributes.update(rule.pattern.attributes()) localAttributesValues = {} for n, v in rule.localAttributes: ev = localAttributesValues.get(n) if ev is None: localAttributesValues[n] = v elif ev != v: ruleAttributes.add(n) for keyAttributes in keys: if ruleAttributes.issubset(keyAttributes): break elif keyAttributes and ruleAttributes.issuperset(keyAttributes): keys.remove(keyAttributes) keys.append(ruleAttributes) break else: keys.append(ruleAttributes) allKeys.update([tuple(i) for i in keys]) if type.parent: ontology[type.name] = [keys, type.parent.name] else: ontology[type.name] = [keys, None] for type in six.itervalues(neuroDiskItems.diskItemTypes): if type.name not in ontology: if type.parent: ontology[type.name] = [(), type.parent.name] else: ontology[type.name] = [(), None] def keepAttribute(ontology, type, attribute): try: parent = ontology[type][1] except KeyError: ontology if parent: if attribute in ontology[parent][0]: return False return keepAttribute(ontology, parent, attribute) return True for k, v in six.iteritems(ontology): v[0] = [i for i in v[0] if keepAttribute(ontology, k, i)] print('\n\n#' + '=' * 79, file=file) print('# Ontology:', self.name, file=file) print('#' + '=' * 79, file=file) for typeName, (attributes, parent) in six.iteritems(ontology): print(file=file) print('#' + '-' * 79, file=file) print('newType( ' + repr(typeName) + ',', file=file) for a in attributes: print(tab + repr(tuple(a)) + ',', file=file) if parent is not None: print(tab + 'parent=' + repr(parent) + ',', file=file) print('),', file=file) print(file=file) print('# all FSO keys:', file=file) for attributes in allKeys: print('#' + tab + ', '.join( [repr(i) for i in attributes]), file=file)
def printFSO(self, file=sys.stdout): print('\n\n#' + '=' * 79, file=file) print('# File System Ontology:', self.name, file=file) print('#' + '=' * 79, file=file) tab = ' ' for type, rules in six.iteritems(self.typeToPatterns): print(file=file) print('#' + '-' * 79, file=file) print('newRules(', repr(type.name), ',', file=file) for rule in rules: file.write(tab + repr(rule.pattern.pattern) + ",") if rule.priority or rule.priorityOffset or \ rule.localAttributes or rule.formats is not type.formats: print(' {', file=file) if rule.formats is not type.formats: print(tab * 2 + "'formats':", repr( rule.formats) + ',', file=file) if rule.priority: print( tab * 2 + "'priority':", repr(rule.priority) + ',', file=file) if rule.priorityOffset: print(tab * 2 + "'priorityOffset':", repr(rule.priorityOffset) + ',', file=file) if rule.localAttributes: attributes = tuple(rule.pattern.namedRegex()) print(tab * 2 + "'attributes': {", file=file) for n, v in rule.localAttributes: if n in attributes: continue print( tab * 3 + repr(n) + ':', repr(v) + ',', file=file) print(tab * 2 + '},', file=file) print(tab + '},', file=file) else: print(file=file) print('),', file=file) print(file=file)
[docs] def printHierarchy(self, file=sys.stdout): ''' Print the ontology in a hierachical way, with indentation ''' todo = [(rule, 0) for rule in self.content[1].scanner.rules] while todo: rule, indent = todo.pop(0) print('* ' + ' ' * indent, rule.type.name if rule.type else None, ':', rule.pattern, file=file) if isinstance(rule, ScannerRule) and rule.scanner: todo = [(r, indent + 1) for r in rule.scanner.rules] + todo
[docs] def printFormats(self, file=sys.stdout): """ Prints information about formats. """ for format in six.itervalues(neuroDiskItems.formats): output = 'newFormat( ' + repr(format.name) + ', ( ' for pattern in format.getPatterns().patterns: output += "'" if pattern.fileType is neuroDiskItems.Directory: output += 'd|' dotIndex = pattern.pattern.find('.') if dotIndex != -1: output += pattern.pattern[dotIndex + 1:] else: output = '# ' + output + '???' output += "', " output += ') )' print(output, file=file) print(file=file) for formatList in six.itervalues(neuroDiskItems.formatLists): print('newFormatList( ' + repr( formatList.name) + ', ' + repr(tuple((f.name for f in formatList))) + ' )', file=file)