Source code for soma.aims.diff

#! /usr/bin/env python3
# -*- coding: utf-8 -*-
#
#  This software and supporting documentation are distributed by
#      Institut Federatif de Recherche 49
#      CEA/NeuroSpin, Batiment 145,
#      91191 Gif-sur-Yvette cedex
#      France
#
# Copyright CEA (2018-2023).
# Copyright Forschungszentrum Jülich GmbH (2017).
#
# Author: Yann Leprince <yann.leprince@ylep.fr>.
#
# This software is governed by the CeCILL licence under French law and
# abiding by the rules of distribution of free software. You can use,
# modify and/or redistribute the software under the terms of the CeCILL
# licence as circulated by CEA, CNRS and INRIA at the following URL:
# <http://www.cecill.info/>.
#
# As a counterpart to the access to the source code and rights to copy,
# modify and redistribute granted by the licence, users are provided only
# with a limited warranty and the software's author, the holder of the
# economic rights, and the successive licensors have only limited
# liability.
#
# In this respect, the user's attention is drawn to the risks associated
# with loading, using, modifying and/or developing or reproducing the
# software by the user in light of its specific status of scientific
# software, that may mean that it is complicated to manipulate, and that
# also therefore means that it is reserved for developers and experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or
# data to be ensured and, more generally, to use and operate it in the
# same conditions as regards security.
#
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL licence and that you accept its terms.

import numpy

from soma import aims

from . import graph_comparison


BUCKET_TYPES = tuple(getattr(aims, t) for t in dir(aims)
                     if t.startswith('BucketMap_')
                     or t.startswith('rc_ptr_BucketMap_'))
VOLUME_TYPES = tuple(getattr(aims, t) for t in dir(aims)
                     if t.startswith('Volume_')
                     or t.startswith('rc_ptr_Volume_'))
# Texture_?
TEXTURE_TYPES = tuple(getattr(aims, t) for t in dir(aims)
                      if t.startswith('TimeTexture_')
                      or t.startswith('rc_ptr_TimeTexture_'))
MESH_TYPES = tuple(getattr(aims, t) for t in dir(aims)
                   if t.startswith('AimsTimeSurface_')
                   or t.startswith('rc_ptr_AimsTimeSurface_'))

TESTABLE_TYPES = (
    (aims.Graph, aims.AffineTransformation3d)
    + BUCKET_TYPES
    + VOLUME_TYPES
    + TEXTURE_TYPES
    + MESH_TYPES
)

# Header fields containing variable data (e.g. UUIDs) are ignored
IGNORED_FIELDS = [
    'destination_referential',
    'referential',
    'source_referential',
    'uuid',
]


_TESTABLE_TYPES_STR = ['Graph', 'AffineTransformation3d',
                       'Bucket', 'Volume', 'Texture', 'Mesh']
_TESTABLE_EXTS = None

[docs] def testable_exts(): """Set of filename extensions that aims.diff is able to compare.""" global _TESTABLE_EXTS if _TESTABLE_EXTS is None: _TESTABLE_EXTS = set() for fmt in aims.supported_io_formats(_TESTABLE_TYPES_STR, 'r'): _TESTABLE_EXTS |= ( set(aims.Finder.extensions(fmt)) | set(aims.soma.DataSourceInfoLoader.extensions(fmt)) ) _TESTABLE_EXTS = frozenset(_TESTABLE_EXTS) return _TESTABLE_EXTS
def get_data_kind(data_type): # Names of data kinds are the human-readable AIMS names (see the list of # types for --input in AimsFileConvert --help). if data_type in BUCKET_TYPES: return 'Bucket' elif data_type in VOLUME_TYPES: return 'Volume' elif data_type in TEXTURE_TYPES: return 'Texture' elif data_type in MESH_TYPES: return 'Mesh' def diff_headers(reference_header, test_header): difference_found = False for key, reference_value in reference_header.items(): if key in IGNORED_FIELDS: continue try: test_value = test_header[key] except KeyError: print("Header field {0} is missing from the test header" "(reference value: {1}" .format(key, reference_value)) difference_found = True continue except TypeError as e: print("TypeError when reading {0}: {1}".format(key, e)) continue if test_value != reference_value: # TODO special case for NaN print("Header field {0} differs:\n" " Reference value: {1}\n" " Test value: {2}" .format(key, reference_value, test_value)) difference_found = True for key, test_value in test_header.items(): if key in IGNORED_FIELDS: continue if key not in reference_header: print("Header field {0} is present in the test header, " "not in the reference header (value: {1})" .format(key, test_value)) difference_found = True return difference_found def diff_volumes(reference_volume, test_volume, min_value=None, max_value=None): if reference_volume == test_volume: return False reference_arr = numpy.asarray(reference_volume) test_arr = numpy.asarray(test_volume) return diff_arrays(reference_arr, test_arr, min_value=min_value, max_value=max_value, text_bits={ 'array': 'volume', 'Arrays': 'Volumes', 'indent': '', }) DEFAULT_TEXT_BITS = { 'array': 'volume', 'Arrays': 'Volumes', 'indent': '', } def diff_arrays(reference_arr, test_arr, text_bits=DEFAULT_TEXT_BITS, min_value=None, max_value=None): if reference_arr.shape != test_arr.shape: print("{indent}{Arrays} have differing shapes:\n" "{indent} Shape of reference: {0}\n" "{indent} Shape of test: {1}" .format(reference_arr.shape, test_arr.shape, **text_bits)) return True # Only arrays with numeric dtypes can be compared. Return right away if # one array has a non-numeric dtype (dtypes with unsupported conversion to # NumPy, like Point3df before AIMS 4.7, are converted to Object). if (reference_arr.dtype.fields is not None and 'v' in reference_arr.dtype.fields): reference_arr = reference_arr['v'] if not numpy.issubdtype(reference_arr.dtype, numpy.number): print("{indent}A detailed data comparison cannot be done, because the " "NumPy data type for the reference {array} ({0}) is not numeric." .format(reference_arr.dtype, **text_bits)) return True if (test_arr.dtype.fields is not None and 'v' in test_arr.dtype.fields): test_arr = test_arr['v'] if not numpy.issubdtype(test_arr.dtype, numpy.number): print("{indent}A detailed data comparison cannot be done, because the " "NumPy data type for the test {array} ({0}) is not numeric." .format(test_arr.dtype, **text_bits)) return True try: common_dtype = numpy.promote_types(reference_arr.dtype, test_arr.dtype) except TypeError: common_dtype = numpy.void if common_dtype == numpy.void: print("{indent}{Arrays} have incompatible data types:\n" "{indent} Data type of reference: {0}\n" "{indent} Data type of test: {1}" .format(reference_arr.dtype, test_arr.dtype, **text_bits)) return True # broadcast + array_equal is the same as array_equiv, except we can pass # the equal_nan parameter. asarray is used to "copy" the array structure, # i.e. to avoid changing the shape of the original objects. reference_arr = numpy.asarray(reference_arr) test_arr = numpy.asarray(test_arr) numpy.broadcast_arrays(reference_arr, test_arr) if numpy.array_equal(reference_arr, test_arr, equal_nan=True): return False print("{indent}{Arrays} have different data:" .format(**text_bits)) # Do this test before converting to floating-point (no loss of precision) bool_diff = (reference_arr != test_arr) # Do the numerical comparisons using a floating-point type comparison_dtype = numpy.promote_types(common_dtype, numpy.float32) reference_arr = reference_arr.astype(comparison_dtype, casting='safe') test_arr = test_arr.astype(comparison_dtype, casting='safe') with numpy.errstate(invalid="ignore"): if min_value is not None: reference_arr[reference_arr < min_value] = numpy.nan test_arr[test_arr < min_value] = numpy.nan if max_value is not None: reference_arr[reference_arr > max_value] = numpy.nan test_arr[test_arr > max_value] = numpy.nan has_nans = False reference_nans = numpy.isnan(reference_arr) if numpy.any(reference_nans): bool_diff[reference_nans] = False has_nans = True del reference_nans test_nans = numpy.isnan(test_arr) if numpy.any(test_nans): bool_diff[test_nans] = False has_nans = True del test_nans total_diff_count = numpy.count_nonzero(bool_diff) print("{indent} {0} value(s) differ ({2:.02%} of {1})" .format(total_diff_count, '×'.join(str(n) for n in bool_diff.shape), total_diff_count / bool_diff.size, **text_bits)) if has_nans: print( "{indent} Breakdown of differing values:" "{indent} reference\n" "{indent} t │ NaN │ non-NaN │\n" "{indent} e ─────────┼───────────────┼───────────────┤\n" "{indent} s NaN │ {0:13d}{1:13d}\n" "{indent} t non-NaN │ {2:13d}{3:13d}\n".format( numpy.count_nonzero(numpy.logical_and( numpy.isnan(test_arr), numpy.isnan(reference_arr) )), numpy.count_nonzero(numpy.logical_and( numpy.isnan(test_arr), numpy.logical_not(numpy.isnan(reference_arr)) )), numpy.count_nonzero(numpy.logical_and( numpy.isnan(reference_arr), numpy.logical_not(numpy.isnan(test_arr)) )), numpy.count_nonzero(bool_diff), **text_bits )) del bool_diff # Print most different values and their coordinates MAX_DIFF_TO_PRINT = 5 diff_arr = numpy.abs(test_arr - reference_arr) # Do not show differences where one of the inputs is NaN. Note that we do # not test for diff_arr to be NaN, because NaN could be the result of Inf - # Inf, which is a useful difference to show. diff_arr[numpy.isnan(test_arr)] = 0 diff_arr[numpy.isnan(reference_arr)] = 0 max_diff_indices = numpy.argsort(diff_arr, axis=None) print( "{indent} " "Summary of the most different values (in absolute difference):\n" "{indent} " "│ Reference value │ Test value │ Abs. difference │ Coordinates\n" "{indent} " "├─────────────────┼─────────────────┼─────────────────┼─────────────" .format(**text_bits) ) for i in range(1, MAX_DIFF_TO_PRINT + 1): current_diff_index = max_diff_indices[-i] if diff_arr.flat[current_diff_index] == 0: break # only display values that actually differ print( "{indent}{0: ^17.17g}{1: ^17.17g}{2: ^17.17g}{3}" .format(reference_arr.flat[current_diff_index], test_arr.flat[current_diff_index], diff_arr.flat[current_diff_index], numpy.unravel_index(current_diff_index, diff_arr.shape), **text_bits) ) print("{indent} Min-max of reference: {0} -- {1}" .format(numpy.nanmin(reference_arr), numpy.nanmax(reference_arr), **text_bits)) print("{indent} Min-max of test: {0} -- {1}" .format(numpy.nanmin(test_arr), numpy.nanmax(test_arr), **text_bits)) abs_diff = numpy.abs(test_arr - reference_arr) print("{indent} Average absolute difference: {0}" .format(numpy.nanmean(abs_diff), **text_bits)) print("{indent} Min-max absolute difference: {0} -- {1}" .format(numpy.nanmin(abs_diff), numpy.nanmax(abs_diff), **text_bits)) print("{indent} Median absolute difference: {0}" .format(numpy.nanmedian(abs_diff), **text_bits)) print("{indent} Mean absolute difference: {0}" .format(numpy.nanmean(abs_diff), **text_bits)) print("{indent} RMS absolute difference: {0}" .format(numpy.sqrt(numpy.nanmean(numpy.square(abs_diff))), **text_bits)) with numpy.errstate(divide="ignore", invalid="ignore"): avg_rel_diff = 2 * numpy.nanmean(abs_diff / (numpy.abs(reference_arr) + numpy.abs(test_arr))) print("{indent} Average relative difference: {0:%}" .format(avg_rel_diff, **text_bits)) return True def diff_bucketmaps(reference_bucketmap, test_bucketmap, min_value=None, max_value=None): difference_found = False reference_keys = set(reference_bucketmap.keys()) test_keys = set(test_bucketmap.keys()) if reference_keys != test_keys: print("BucketMaps do not have the same set of keys " "(i.e. time points):\n" " Reference keys: {0}\n" " Test keys: {1}\n" .format(reference_keys, test_keys)) difference_found = True for key in set(reference_keys) & set(test_keys): difference_found = diff_single_bucket( reference_bucketmap, test_bucketmap, key, key, min_value=min_value, max_value=max_value) or difference_found return difference_found def diff_single_bucket(reference_bucketmap, test_bucketmap, reference_key, test_key, min_value=None, max_value=None): difference_found = False difference_found = diff_mesh_vectors( reference_bucketmap[reference_key].keys(), test_bucketmap[test_key].keys(), 'reference[{0}].keys()'.format(reference_key), 'test[{0}].keys()'.format(test_key), min_value=min_value, max_value=max_value, ) or difference_found if isinstance(reference_bucketmap, aims.BucketMap_VOID.Bucket): difference_found = diff_mesh_vectors( reference_bucketmap[reference_key], test_bucketmap[test_key], 'reference[{0}] bucket values'.format(reference_key), 'test[{0}] bucket values'.format(test_key), min_value=min_value, max_value=max_value, ) or difference_found return difference_found def diff_meshes(reference_mesh, test_mesh): difference_found = False reference_keys = set(reference_mesh.keys()) test_keys = set(test_mesh.keys()) if reference_keys != test_keys: print("Meshes do not have the same set of keys (i.e. time points):\n" " Reference keys: {0}\n" " Test keys: {1}\n" .format(reference_keys, test_keys)) difference_found = True for key in set(reference_keys) & set(test_keys): difference_found = diff_single_mesh(reference_mesh, test_mesh, key, key) or difference_found return difference_found def diff_textures(reference_texture, test_texture, min_value=None, max_value=None): difference_found = False reference_keys = set(reference_texture.keys()) test_keys = set(test_texture.keys()) if reference_keys != test_keys: print("Textures do not have the same set of keys (i.e. time points):\n" " Reference keys: {0}\n" " Test keys: {1}\n" .format(reference_keys, test_keys)) difference_found = True for key in set(reference_keys) & set(test_keys): difference_found = diff_single_texture( reference_texture, test_texture, key, key, min_value=min_value, max_value=max_value) or difference_found return difference_found def diff_single_mesh(reference_mesh, test_mesh, reference_key, test_key): difference_found = False difference_found = diff_mesh_vectors( reference_mesh.vertex(reference_key), test_mesh.vertex(test_key), 'reference.vertex({0})'.format(reference_key), 'test.vertex({0})'.format(test_key) ) or difference_found difference_found = diff_mesh_vectors( reference_mesh.polygon(reference_key), test_mesh.polygon(test_key), 'reference.polygon({0})'.format(reference_key), 'test.polygon({0})'.format(test_key) ) or difference_found difference_found = diff_mesh_vectors( reference_mesh.normal(reference_key), test_mesh.normal(test_key), 'reference.normal({0})'.format(reference_key), 'test.normal({0})'.format(test_key) ) or difference_found difference_found = diff_mesh_vectors( reference_mesh.texture(reference_key), test_mesh.texture(test_key), 'reference.texture({0})'.format(reference_key), 'test.texture({0})'.format(test_key) ) or difference_found return difference_found def diff_mesh_vectors(reference_vec, test_vec, reference_name, test_name, min_value=None, max_value=None): if len(reference_vec) != len(test_vec): print("Vectors {0} and {2} differ in size:\n" " Size of {0}: {1}\n" " Size of {2}: {3}" .format(reference_name, len(reference_vec), test_name, len(test_vec))) return True if reference_vec == test_vec: #if len(reference_vec) != 0: ## print the message only for non-empty vectors #print("Vectors {0} and {1} are identical." #.format(reference_name, test_name)) return False # Vectors may compare unequal but still be equivalent (AIMS bug # https://github.com/brainvisa/aims-free/issues/42) so we do the comparison # with NumPy before printing the message reference_arr = numpy.asarray(reference_vec) test_arr = numpy.asarray(test_vec) if reference_name == test_name: vector_name = reference_name else: vector_name = reference_name + '/' + test_name return diff_arrays(reference_arr, test_arr, min_value=min_value, max_value=max_value, text_bits={ 'array': 'vector {0}'.format(vector_name), 'Arrays': 'Vectors {0}'.format(vector_name), 'indent': '', }) def diff_single_texture(reference_texture, test_texture, reference_key, test_key, min_value=None, max_value=None): difference_found = False difference_found = diff_mesh_vectors( reference_texture[reference_key], test_texture[test_key], 'reference[{0}]'.format(reference_key), 'test[{0}]'.format(test_key), min_value=min_value, max_value=max_value, ) or difference_found return difference_found def diff_trm(reference_trm, test_trm): if reference_trm == test_trm: return False print("Transformations differ:\n" "======================:\n" "Reference transformation:\n{0}\n" "Test transformation:\n{1}\n" .format(reference_trm, test_trm)) return True
[docs] def diff_objects(reference_data, test_data, min_value=None, max_value=None, graph_max_label_diff=0): """Print the differences between two files read by AIMS.""" kind = get_data_kind(type(reference_data)) if get_data_kind(type(test_data)) != kind: print("Files contain different kinds of data:\n" " Kind of reference: {0}\n" " Kind of test: {1}" .format(kind, get_data_kind(type(test_data)) or repr(type(test_data)))) return True # From now on the data are guaranteed to be of the same kind (not # necessarily the same datatype) # Handle rc_ptr if hasattr(reference_data, "isNull"): if reference_data.isNull(): # Null == Null, non-null != null if hasattr(reference_data, "isNull"): return reference_data.isNull() else: return False else: reference_data = reference_data.get() if hasattr(test_data, "isNull"): if not test_data.isNull(): test_data = test_data.get() else: return False if (hasattr(reference_data, "header") and hasattr(test_data, "header")): difference_found = diff_headers(reference_data.header(), test_data.header()) #if not difference_found: #print("Headers are identical") data_difference_found = None if type(reference_data) in VOLUME_TYPES: data_difference_found = diff_volumes(reference_data, test_data, min_value=min_value, max_value=max_value) elif type(reference_data) in MESH_TYPES: data_difference_found = diff_meshes(reference_data, test_data) elif type(reference_data) in TEXTURE_TYPES: data_difference_found = diff_textures(reference_data, test_data, min_value=min_value, max_value=max_value) elif type(reference_data) in BUCKET_TYPES: data_difference_found = diff_bucketmaps(reference_data, test_data, min_value=min_value, max_value=max_value) elif type(reference_data) is aims.AffineTransformation3d: data_difference_found = diff_trm(reference_data, test_data) elif type(reference_data) is aims.Graph: difference_found = not graph_comparison.same_graphs( reference_data, test_data, verbose=True, max_label_diff=graph_max_label_diff) else: raise NotImplementedError("Comparison of {} is not implemented" .format(type(reference_data))) #if data_difference_found is not None and not data_difference_found: #print("Data are identical") difference_found = difference_found or data_difference_found return difference_found
[docs] def diff_files(reference_file_name, test_file_name, min_value=None, max_value=None): """Print the differences between two files read by AIMS.""" reference_data = aims.read(reference_file_name) test_data = aims.read(test_file_name) return diff_objects(reference_data, test_data, min_value=min_value, max_value=max_value)