Source code for data_slicer.dataloading

"""
Tools for putting different data formats into suitable numpy arrays.
"""
import os
import pickle
from argparse import Namespace
from warnings import catch_warnings

import numpy as np

[docs]class Dataloader() :
    """ 
    Base dataloader class (interface) from which others inherit some 
    methods (specifically the ``__repr__()`` function). 
    """
    name = 'Base'

    def __init__(self, *args, **kwargs) :
        pass

    def __repr__(self, *args, **kwargs) :
        return '<class Dataloader_{}>'.format(self.name)

[docs]    def print_m(self, *messages) :
        """ Print message to console, adding the dataloader name. """
        s = '[Dataloader {}]'.format(self.name)
        print(s, *messages)
    
[docs]    def load_data(self, *args, **kwargs) :
        """ Method stub to be overwritten by subclasses. Return data in the 
        form of an argparse.Namespace object *D* with the following structure: 

        ======  ============================================================
        D.data  np.array containing the data.
        D.axes  list of length len(D.data.shape). Contains a 1d np.array 
                representing each axis (or *None*)
        ======  ============================================================
        """
        raise NotImplementedError(('{} is an abstract base class. Use an '
                                   'appropriate subclass instead.').format(
                                   type(self)))

[docs]class Dataloader_Pickle(Dataloader) :
    """ Confer documentation of 
    :func:`~data_slicer.dataloading.Dataloader_Pickle.load_data()`. 
    """
    name = 'Pickle'

[docs]    def load_data(self, filename) :
        """ Load data that has been saved using python's `pickle` module. 
        The data is assumed to be either just a naked array, a dictionary 
        containing the keys *data* and *axes* or an argparse.Namespace 
        instance, containing these same keys.
        """
        # Open the file and get a handle for it
        with open(filename, 'rb') as f :
            filedata = pickle.load(f)
        
        if isinstance(filedata, type(np.array([]))) :
            axes = 3*[None]
            data = filedata
        elif isinstance(filedata, type(dict())) :
            data = filedata['data']
            try :
                axes = filedata['axes']
            except KeyError :
                # Try the other allowed definition of the axes
                pass
            try :
                axes = [filedata[i+'axis'] for i in 'xyz']
            except KeyError :
                # Both methods did not work - raise an error
                raise ValueError(('The pickled dictionary either needs to '
                                  'contain the key *axes* or all of the keys '
                                  '*xaxis*, *yaxis* and *zaxis*.'))

        elif isinstance(filedata, type(Namespace())) :
            # Ensure all needed fields are present
            keys = filedata.__dict__.keys()
            if 'data' not in keys :
                raise TypeError('Invalid Namespace object.')
            elif 'axes' not in keys :
                filedata.axes = None
            return filedata
        else :
            raise(TypeError('Filetype not understood.'))
        
        # Convert to Namespace
        D = Namespace(data=data, axes=axes)
        return D

[docs]class Dataloader_3dtxt(Dataloader) :
    """ Confer documentation of 
    :func:`~data_slicer.dataloading.Dataloader_3dtxt.load_data()`. 
    """
    name = '3d txt'

[docs]    def load_data(self, filename) :
        """ Load data of shape (nx, ny, nz) that is stored in a .txt file in 
        the format::

            #Z      Y       X       I(X, Y, Z)
            z0      y0      x0      I(0, 0, 0)
            z1      y0      x0      I(0, 0, 1)
            z2      y0      x0      I(0, 0, 2)
            z3      y0      x0      I(0, 0, 3)
            ...
            z(nz-1) y0      x0      I(0, 0, nz-1)
            z(nz)   y0      x0      I(0, 0, nz)
            z0      y1      x0      I(0, 1, 0)
            z1      y1      x0      I(0, 1, 1)
            z2      y1      x0      I(0, 1, 2)
            ...
            z(nz-1) y1      x0      I(0, 1, nz-1)
            z(nz)   y1      x0      I(0, 1, nz)
            ...
            ...
            z0      y(ny)   x0      I(0, ny, 0)
            z1      y(ny)   x0      I(0, ny, 1)
            ...
            z(nz)   y(ny)   x0      I(0, ny, nz)
            z0      y0      x1      I(1, 0, 0)
            z1      y0      x1      I(1, 0, 1)
            ...
            ...
            z(nz)   y(ny)   x(nx)   I(nz, ny, nx)

        """
        data = np.loadtxt(filename)

        # Get the length of the x, y and z axes by counting the number of 
        # unique elements. Then construct the axes.
        z_elements = set(data[:,0])
        nz = len(list(z_elements))
        # Pick the first *nz* elements
        zaxis = data[:nz,0]

        y_elements = set(data[:,1])
        ny = len(list(y_elements))
        # Pick every nz'th element, but only up to ny*nz
        yaxis = data[:ny*nz:nz,1]

        x_elements = set(data[:,2])
        nx = len(list(x_elements))
        # Pick every ny*nz'th element
        xaxis = data[::ny*nz,2]
 
        res = data[:,3].reshape(nx, ny, nz)

        # Return an argparse.Namespace
        D = Namespace(data=res, axes=[xaxis, yaxis, zaxis])
        return D

registered_loaders = [Dataloader_Pickle, Dataloader_3dtxt]

# Function to try all dataloaders in all_dls
[docs]def load_data(filename, exclude=None, suppress_warnings=False) :
    """ Try to load some dataset 'filename' by iterating through `all_dls` 
    and appliyng the respective dataloader's load_data method. If it works: 
    great. If not, try with the next dataloader. 
    Collects and prints all raised exceptions in case that no dataloader 
    succeeded.
    """ 
    # Sanity check: does the given path even exist in the filesystem?
    if not os.path.exists(filename) :
        raise FileNotFoundError(filename) 

    # If only a single string is given as exclude, pack it into a list
    if exclude is not None and type(exclude)==str :
        exclude = [exclude]
    
    # Keep track of all exceptions in case no loader succeeds
    exceptions = dict()

    # Suppress warnings
    with catch_warnings() :
        if suppress_warnings :
            simplefilter('ignore')
        for dataloader in registered_loaders :
            # Instantiate a dataloader object
            dl = dataloader()

            # Skip to the next if this dl is excluded (continue brings us 
            # back to the top of the loop, starting with the next element)
            if exclude is not None and dl.name in exclude : 
                continue

            # Try loading the data
            try :
                namespace = dl.load_data(filename)
            except Exception as e :
                # Temporarily store the exception
                exceptions.update({dl : e})
                # Try the next dl
                continue

            # Reaching this point must mean we succeeded. Print warnings from 
            # this dataloader, if any occurred
            print('Loaded data with {}.'.format(dl))
            try :
                print(dl, ': ', exceptions[dl])
            except KeyError :
                pass
            
            return namespace

    # Reaching this point means something went wrong. Print all exceptions.
    for dl in exceptions :
        print(dl)
        e = exceptions[dl]
        print('Exception {}: {}'.format(type(e), e))

    raise Exception('Could not load data {}.'.format(filename))

# Convenience for creating txt files
[docs]def three_d_to_txt(outfilename, data, axes=3*[None], force=False) :
    """ Create a txt file that can be read by 
    :class:`~data_slicer.dataloading.Dataloader_txt`.

    **Parameters**

    ===========  ==============================================================
    outfilename  str; filename and/or path to the file to write into.
    data         np.array; 3d array of the data.
    axes         list; [xaxis, yaxis, zaxis]. Any of these can be *None*, in 
                 which case incremental integers will be used.
    force        boolean; Whether or not to overwrite an existing file. If 
                 *False* and a file of *outfilename* exists, an Exception is 
                 raised.
    ===========  ==============================================================
    """
    x, y, z = data.shape
    # Set all undefined axes to pixels
    for i,ax in enumerate(axes) :
        if ax is None :
            axes[i] = range(data.shape[i])

    # Choose the opening mode
    if force :
        mode = 'w'
    else :
        mode = 'x'

    # Write the file
    with open(outfilename, mode) as f :
        # Prepare the format string
        fmt = 4*'{:>15} '
        fmt += '\n'
        # Write a header line
        f.write('#' + fmt.format('z', 'y', 'x', 'data'))
        # Append a space for alignment
        fmt = ' ' + fmt
        for i in range(x) :
            for j in range(y) :
                for k in range(z) :
                    line = fmt.format(axes[2][k], axes[1][j], axes[0][i], 
                                      data[i,j,k])
                    f.write(line)

# Function to create a python pickle file from a data namespace
[docs]def dump(D, filename, force=False) :
    """ Wrapper for :func:`pickle.dump`. Does not overwrite if a file of 
    the given name already exists, unless *force* is True.

    **Parameters**

    ========  ==================================================================
    D         python object to be stored.
    filename  str; name of the output file to create.
    force     boolean; if True, overwrite existing files.
    ========  ==================================================================
    """
    # Check if file already exists
    if not force and os.path.isfile(filename) :
        question = 'File <{}> exists. Overwrite it? (y/N)'.format(filename)
        answer = input(question)
        # If the answer is anything but a clear affirmative, stop here
        if answer.lower() not in ['y', 'yes'] :
            print('Not overwriting existing file.')
            return

    with open(filename, 'wb') as f :
        pickle.dump(D, f)

    message = 'Wrote to file <{}>.'.format(filename)
    print(message)

[docs]def load_pickle(filename) :
    """ Shorthand for loading python objects stored in pickle files.

    **Parameters**

    ========  ==================================================================
    filename  str; name of file to load.
    ========  ==================================================================
    """
    with open(filename, 'rb') as f :
        return pickle.load(f)

if __name__=="__main__" :
    a = np.array([[[0, 0, 0], [1, 2, 3]], [[1, 1, 1], [11, 22, 33]]])
    foofile = 'foofoo.txt'
    three_d_to_txt(foofile, a, axes=[[11, 12], [21, 22], [31, 32, 33]], 
                   force=True)
    b = load_data(foofile)
    print(a)
    print(b.data)
    print(b.axes)