Source code for fissa.core

"""Main user interface for FISSA.

Authors:
    - Sander W Keemink (swkeemink@scimail.eu)
    - Scott C Lowe
"""

from __future__ import print_function
from past.builtins import basestring

import collections
import glob
import os.path
import sys
import warnings

import numpy as np
from scipy.io import savemat

from . import datahandler
from . import deltaf
from . import neuropil as npil
from . import roitools

try:
    from multiprocessing import Pool
    has_multiprocessing = True
except BaseException:
    warnings.warn('Multiprocessing library is not installed, using single ' +
                  'core instead. To use multiprocessing install it by: ' +
                  'pip install multiprocessing')
    has_multiprocessing = False


[docs]def extract_func(inputs):
    """Extract data using multiprocessing.

    Parameters
    ----------
    inputs : list
        list of inputs

        0. image array
        1. the rois
        2. number of neuropil regions
        3. how much larger neuropil region should be then central ROI

    Returns
    -------
    dict
        Data across cells.
    dict
        Polygons for each ROI.
    """
    image = inputs[0]
    rois = inputs[1]
    nNpil = inputs[2]
    expansion = inputs[3]

    # get data as arrays and rois as masks
    curdata = datahandler.image2array(image)
    base_masks = datahandler.rois2masks(rois, curdata)

    # get the mean image
    mean = datahandler.getmean(curdata)

    # predefine dictionaries
    data = collections.OrderedDict()
    roi_polys = collections.OrderedDict()

    # get neuropil masks and extract signals
    for cell in range(len(base_masks)):
        # neuropil masks
        npil_masks = roitools.getmasks_npil(base_masks[cell], nNpil=nNpil,
                                            expansion=expansion)
        # add all current masks together
        masks = [base_masks[cell]] + npil_masks

        # extract traces
        data[cell] = datahandler.extracttraces(curdata, masks)

        # store ROI outlines
        roi_polys[cell] = [''] * len(masks)
        for i in range(len(masks)):
            roi_polys[cell][i] = roitools.find_roi_edge(masks[i])

    return data, roi_polys, mean


[docs]def separate_func(inputs):
    """Extraction function for multiprocessing.

    Parameters
    ----------
    inputs : list
        list of inputs

        0. Array with signals to separate
        1. Alpha input to npil.separate
        2. Method
        3. Current ROI number

    Returns
    -------
    numpy.ndarray
        The raw separated traces.
    numpy.ndarray
        The separated traces matched to the primary signal.
    numpy.ndarray
        Mixing matrix.
    dict
        Metadata for the convergence result.

    """
    X = inputs[0]
    alpha = inputs[1]
    method = inputs[2]
    Xsep, Xmatch, Xmixmat, convergence = npil.separate(
        X, method, maxiter=20000, tol=1e-4, maxtries=1, alpha=alpha
    )
    ROInum = inputs[3]
    print('Finished ROI number ' + str(ROInum))
    return Xsep, Xmatch, Xmixmat, convergence


[docs]class Experiment():
    """Does all the steps for FISSA."""

[docs]    def __init__(self, images, rois, folder, nRegions=4,
                 expansion=1, alpha=0.1, ncores_preparation=None,
                 ncores_separation=None, method='nmf',
                 lowmemory_mode=False, datahandler_custom=None):
        """Initialisation. Set the parameters for your Fissa instance.

        Parameters
        ----------
        images : str or list
            The raw recording data.
            Should be one of:

            - the path to a directory containing TIFF files (string),
            - an explicit list of TIFF files (list of strings),
            - a list of array_like data already loaded into memory, each shaped
              `(frames, y-coords, x-coords)`.

            Note that each TIFF/array is considered a single trial.

        rois : str or list
            The roi definitions.
            Should be one of:

            - the path to a directory containing ImageJ ZIP files (string),
            - the path of a single ImageJ ZIP file (string),
            - a list of ImageJ ZIP files (list of strings),
            - a list of arrays, each encoding a ROI polygons,
            - a list of lists of binary arrays, each representing a ROI mask.

            This can either be a single roiset for all trials, or a different
            roiset for each trial.

        folder : str
            Output path to a directory in which the extracted data will
            be stored.
        nRegions : int, optional
            Number of neuropil regions to draw. Use a higher number for
            densely labelled tissue. Default is 4.
        expansion : float, optional
            Expansion factor for the neuropil region, relative to the
            ROI area. Default is 1. The total neuropil area will be
            `nRegions * expansion * area(ROI)`.
        alpha : float, optional
            Sparsity regularizaton weight for NMF algorithm. Set to zero to
            remove regularization. Default is 0.1. (Not used for ICA method.)
        ncores_preparation : int, optional (default: None)
            Sets the number of subprocesses to be used during the data
            preparation steps (ROI and subregions definitions, data
            extraction from tifs, etc.).
            If set to `None` (default), there will be as many subprocesses
            as there are threads or cores on the machine. Note that this
            behaviour can, especially for the data preparation step,
            be very memory-intensive.
        ncores_separation : int, optional (default: None)
            Same as `ncores_preparation`, but for the separation step.
            Note that this step requires less memory per subprocess, and
            hence can often be set higher than `ncores_preparation`.
        method : {'nmf', 'ica'}, optional
            Which blind source-separation method to use. Either `'nmf'`
            for non-negative matrix factorization, or `'ica'` for
            independent component analysis. Default (recommended) is
            `'nmf'`.
        lowmemory_mode : bool, optional
            If `True`, FISSA will load TIFF files into memory frame-by-frame
            instead of holding the entire TIFF in memory at once. This
            option reduces the memory load, and may be necessary for very
            large inputs. Default is `False`.
        datahandler_custom : object, optional
            A custom datahandler for handling ROIs and calcium data can
            be given here. See datahandler.py (the default handler) for
            an example.

        """
        if isinstance(images, basestring):
            self.images = sorted(glob.glob(os.path.join(images, '*.tif*')))
        elif isinstance(images, list):
            self.images = images
        else:
            raise ValueError('images should either be string or list')

        if isinstance(rois, basestring):
            if rois[-3:] == 'zip':
                self.rois = [rois] * len(self.images)
            else:
                self.rois = sorted(glob.glob(os.path.join(rois, '*.zip')))
        elif isinstance(rois, list):
            self.rois = rois
            if len(rois) == 1:  # if only one roiset is specified
                self.rois *= len(self.images)
        else:
            raise ValueError('rois should either be string or list')
        global datahandler
        if lowmemory_mode:
            from . import datahandler_framebyframe as datahandler
        if datahandler_custom is not None:
            datahandler = datahandler_custom

        # define class variables
        self.folder = folder
        self.raw = None
        self.info = None
        self.mixmat = None
        self.sep = None
        self.result = None
        self.deltaf_raw = None
        self.deltaf_result = None
        self.nRegions = nRegions
        self.expansion = expansion
        self.alpha = alpha
        self.nTrials = len(self.images)  # number of trials
        self.means = []
        self.ncores_preparation = ncores_preparation
        self.ncores_separation = ncores_separation
        self.method = method

        # check if any data already exists
        if not os.path.exists(folder):
            os.makedirs(folder)
        if os.path.isfile(os.path.join(folder, 'preparation.npy')):
            if os.path.isfile(os.path.join(folder, 'separated.npy')):
                self.separate()
            else:
                self.separation_prep()

[docs]    def separation_prep(self, redo=False):
        """Prepare and extract the data to be separated.

        For each trial, performs the following steps:

        - Load in data as arrays
        - Load in ROIs as masks
        - Grow and seaparate ROIs to define neuropil regions
        - Using neuropil and original ROI regions, extract traces from data

        After running this you can access the raw data (i.e. pre-separation)
        as `self.raw` and `self.rois`. self.raw is a list of arrays.
        `self.raw[cell][trial]` gives you the traces of a specific cell and
        trial, across cell and neuropil regions. `self.roi_polys` is a list of
        lists of arrays. `self.roi_polys[cell][trial][region][0]` gives you the
        polygon for the region for a specific cell, trial and region. `region=0`
        is the cell, and `region>0` gives the different neuropil regions.
        For separateable masks, it is possible multiple outlines are found,
        which can be accessed as `self.roi_polys[cell][trial][region][i]`,
        where `i` is the outline index.

        Parameters
        ----------
        redo : bool, optional
            If `False`, we load previously prepared data when possible.
            If `True`, we re-run the preparation, even if it has previously
            been run. Default is `False`.

        """
        # define filename where data will be present
        fname = os.path.join(self.folder, 'preparation.npy')

        # try to load data from filename
        if not redo:
            try:
                nCell, raw, roi_polys = np.load(fname)
                print('Reloading previously prepared data...')
            except BaseException:
                redo = True

        if redo:
            print('Doing region growing and data extraction....')
            # define inputs
            inputs = [0] * self.nTrials
            for trial in range(self.nTrials):
                inputs[trial] = [self.images[trial], self.rois[trial],
                                 self.nRegions, self.expansion]

            # Do the extraction
            if has_multiprocessing and sys.version_info < (3, 0):
                # define pool
                pool = Pool(self.ncores_preparation)

                # run extraction
                results = pool.map(extract_func, inputs)
                pool.close()
                pool.join()

            elif has_multiprocessing:
                with Pool(self.ncores_preparation) as pool:
                    # run extraction
                    results = pool.map(extract_func, inputs)

            else:
                results = [0] * self.nTrials
                for trial in range(self.nTrials):
                    results[trial] = extract_func(inputs[trial])

            # get number of cells
            nCell = len(results[0][1])

            # predefine data structures
            raw = [[None for t in range(self.nTrials)] for c in range(nCell)]
            roi_polys = np.copy(raw)

            # store results
            for trial in range(self.nTrials):
                self.means += [results[trial][2]]
                for cell in range(nCell):
                    raw[cell][trial] = results[trial][0][cell]
                    roi_polys[cell][trial] = results[trial][1][cell]

            # save
            np.save(fname, (nCell, raw, roi_polys))

        # store relevant info
        self.nCell = nCell  # number of cells
        self.raw = raw
        self.roi_polys = roi_polys
        # Wipe outputs of separate(), as they no longer match self.raw
        self.info = None
        self.mixmat = None
        self.sep = None
        self.result = None
        # Wipe outputs of calc_deltaf(), as they no longer match self.raw
        self.deltaf_raw = None
        self.deltaf_result = None

[docs]    def separate(self, redo_prep=False, redo_sep=False):
        """Separate all the trials with FISSA algorithm.

        After running `separate`, data can be found as follows:

        self.sep
            Raw separation output, without being matched. Signal `i` for
            a specific cell and trial can be found as
            `self.sep[cell][trial][i,:]`.
        self.result
            Final output, in order of presence in cell ROI.
            Signal `i` for a specific cell and trial can be found at
            `self.result[cell][trial][i, :]`.
            Note that the ordering is such that `i = 0` is the signal
            most strongly present in the ROI, and subsequent entries
            are in diminishing order.
        self.mixmat
            The mixing matrix (how to go between `self.separated` and
            `self.raw` from the `separation_prep()` function).
        self.info
            Information about separation routine, iterations needed, etc.

        Parameters
        ----------
        redo_prep : bool, optional
            Whether to redo the preparation. Default is `False.` Note that
            if this is true, we set `redo_sep = True` as well.
        redo_sep : bool, optional
            Whether to redo the separation. Default is `False`. Note that
            this parameter is ignored if `redo_prep` is set to `True`.

        """
        # Do data preparation
        self.separation_prep(redo_prep)
        if redo_prep:
            redo_sep = True

        # Define filename to store data in
        fname = os.path.join(self.folder, 'separated.npy')
        if not redo_sep:
            try:
                info, mixmat, sep, result = np.load(fname)
                print('Reloading previously separated data...')
            except BaseException:
                redo_sep = True

        # separate data, if necessary
        if redo_sep:
            print('Doing signal separation....')
            # predefine data structures
            sep = [[None for t in range(self.nTrials)]
                   for c in range(self.nCell)]
            result = np.copy(sep)
            mixmat = np.copy(sep)
            info = np.copy(sep)

            # loop over cells to define function inputs
            inputs = [0] * self.nCell
            for cell in range(self.nCell):
                # initiate concatenated data
                X = np.concatenate(self.raw[cell], axis=1)

                # check for below 0 values
                if X.min() < 0:
                    warnings.warn('Found values below zero in signal, ' +
                                  'setting minimum to 0.')
                    X -= X.min()

                # update inputs
                inputs[cell] = [X, self.alpha, self.method, cell]

            # Do the extraction
            if has_multiprocessing and sys.version_info < (3, 0):
                # define pool
                pool = Pool(self.ncores_separation)

                # run separation
                results = pool.map(separate_func, inputs)
                pool.close()
                pool.join()

            elif has_multiprocessing:
                with Pool(self.ncores_separation) as pool:
                    # run separation
                    results = pool.map(separate_func, inputs)
            else:
                results = [0] * self.nCell
                for cell in range(self.nCell):
                    results[cell] = separate_func(inputs[cell])

            # read results
            for cell in range(self.nCell):
                curTrial = 0
                Xsep, Xmatch, Xmixmat, convergence = results[cell]
                for trial in range(self.nTrials):
                    nextTrial = curTrial + self.raw[cell][trial].shape[1]
                    sep[cell][trial] = Xsep[:, curTrial:nextTrial]
                    result[cell][trial] = Xmatch[:, curTrial:nextTrial]
                    curTrial = nextTrial

                    # store other info
                    mixmat[cell][trial] = Xmixmat
                    info[cell][trial] = convergence

            # save
            np.save(fname, (info, mixmat, sep, result))

        # store
        self.info = info
        self.mixmat = mixmat
        self.sep = sep
        self.result = result
        # Wipe deltaf_result, as it no longer matches self.raw
        self.deltaf_result = None

[docs]    def calc_deltaf(self, freq, use_raw_f0=True, across_trials=True):
        """Calculate deltaf/f0 for raw and result traces.

        The results can be accessed as self.deltaf_raw and self.deltaf_result.
        self.deltaf_raw is only the ROI trace instead of the traces across all
        subregions.

        Parameters
        ----------
        freq : float
            Imaging frequency, in Hz.
        use_raw_f0 : bool, optional
            If `True` (default), use an f0 estimate from the raw ROI trace
            for both raw and result traces. If `False`, use individual f0
            estimates for each of the traces.
        across_trials : bool, optional
            If `True`, we estimate a single baseline f0 value across all
            trials. If `False`, each trial will have their own baseline f0,
            and df/f0 value will be relative to the trial-specific f0.
            Default is `True`.

        """
        deltaf_raw = [[None for t in range(self.nTrials)]
                      for c in range(self.nCell)]
        deltaf_result = np.copy(deltaf_raw)

        # loop over cells
        for cell in range(self.nCell):
            # if deltaf should be calculated across all trials
            if across_trials:
                # get concatenated traces
                raw_conc = np.concatenate(self.raw[cell], axis=1)[0, :]
                result_conc = np.concatenate(self.result[cell], axis=1)

                # calculate deltaf/f0
                raw_f0 = deltaf.findBaselineF0(raw_conc, freq)
                raw_conc = (raw_conc - raw_f0) / raw_f0
                result_f0 = deltaf.findBaselineF0(
                    result_conc, freq, 1
                ).T[:, None]
                if use_raw_f0:
                    result_conc = (result_conc - result_f0) / raw_f0
                else:
                    result_conc = (result_conc - result_f0) / result_f0

                # store deltaf/f0s
                curTrial = 0
                for trial in range(self.nTrials):
                    nextTrial = curTrial + self.raw[cell][trial].shape[1]
                    signal = raw_conc[curTrial:nextTrial]
                    deltaf_raw[cell][trial] = signal
                    signal = result_conc[:, curTrial:nextTrial]
                    deltaf_result[cell][trial] = signal
                    curTrial = nextTrial
            else:
                # loop across trials
                for trial in range(self.nTrials):
                    # get current signals
                    raw_sig = self.raw[cell][trial][0, :]
                    result_sig = self.result[cell][trial]

                    # calculate deltaf/fo
                    raw_f0 = deltaf.findBaselineF0(raw_sig, freq)
                    result_f0 = deltaf.findBaselineF0(
                        result_sig, freq, 1
                    ).T[:, None]
                    result_f0[result_f0 < 0] = 0
                    raw_sig = (raw_sig - raw_f0) / raw_f0
                    if use_raw_f0:
                        result_sig = (result_sig - result_f0) / raw_f0
                    else:
                        result_sig = (result_sig - result_f0) / result_f0

                    # store deltaf/f0s
                    deltaf_raw[cell][trial] = raw_sig
                    deltaf_result[cell][trial] = result_sig

        self.deltaf_raw = deltaf_raw
        self.deltaf_result = deltaf_result

[docs]    def save_to_matlab(self):
        """Save the results to a matlab file.

        Can be found in `folder/matlab.mat`.

        This will give you a filename.mat file which if loaded in Matlab gives
        the following structs: ROIs, result, raw.

        If df/f0 was calculated, these will also be stored as `df_result`
        and `df_raw`, which will have the same format as result and raw.

        These can be interfaced with as follows, for cell 0, trial 0:

        - `ROIs.cell0.trial0{1}` polygon for the ROI
        - `ROIs.cell0.trial0{2}` polygon for first neuropil region
        - `result.cell0.trial0(1,:)` final extracted cell signal
        - `result.cell0.trial0(2,:)` contaminating signal
        - `raw.cell0.trial0(1,:)` raw measured celll signal
        - `raw.cell0.trial0(2,:)` raw signal from first neuropil region
        """
        # define filename
        fname = os.path.join(self.folder, 'matlab.mat')

        # initialize dictionary to save
        M = collections.OrderedDict()

        def reformat_dict_for_matlab(orig_dict):
            new_dict = collections.OrderedDict()
            # loop over cells and trial
            for cell in range(self.nCell):
                # get current cell label
                c_lab = 'cell' + str(cell)
                # update dictionary
                new_dict[c_lab] = collections.OrderedDict()
                for trial in range(self.nTrials):
                    # get current trial label
                    t_lab = 'trial' + str(trial)
                    # update dictionary
                    new_dict[c_lab][t_lab] = orig_dict[cell][trial]
            return new_dict

        M['ROIs'] = reformat_dict_for_matlab(self.roi_polys)
        M['raw'] = reformat_dict_for_matlab(self.raw)
        M['result'] = reformat_dict_for_matlab(self.result)
        if getattr(self, 'deltaf_raw', None) is not None:
            M['df_raw'] = reformat_dict_for_matlab(self.deltaf_raw)
        if getattr(self, 'deltaf_result', None) is not None:
            M['df_result'] = reformat_dict_for_matlab(self.deltaf_result)

        savemat(fname, M)