"""Main user interface for FISSA.
Authors:
- Sander W Keemink (swkeemink@scimail.eu)
- Scott C Lowe
"""
from __future__ import print_function
from past.builtins import basestring
import collections
import glob
import os.path
import sys
import warnings
import numpy as np
from scipy.io import savemat
from . import datahandler
from . import deltaf
from . import neuropil as npil
from . import roitools
try:
from multiprocessing import Pool
has_multiprocessing = True
except BaseException:
warnings.warn('Multiprocessing library is not installed, using single ' +
'core instead. To use multiprocessing install it by: ' +
'pip install multiprocessing')
has_multiprocessing = False
[docs]def separate_func(inputs):
"""Extraction function for multiprocessing.
Parameters
----------
inputs : list
list of inputs
0. Array with signals to separate
1. Alpha input to npil.separate
2. Method
3. Current ROI number
Returns
-------
numpy.ndarray
The raw separated traces.
numpy.ndarray
The separated traces matched to the primary signal.
numpy.ndarray
Mixing matrix.
dict
Metadata for the convergence result.
"""
X = inputs[0]
alpha = inputs[1]
method = inputs[2]
Xsep, Xmatch, Xmixmat, convergence = npil.separate(
X, method, maxiter=20000, tol=1e-4, maxtries=1, alpha=alpha
)
ROInum = inputs[3]
print('Finished ROI number ' + str(ROInum))
return Xsep, Xmatch, Xmixmat, convergence
[docs]class Experiment():
"""Does all the steps for FISSA."""
[docs] def __init__(self, images, rois, folder, nRegions=4,
expansion=1, alpha=0.1, ncores_preparation=None,
ncores_separation=None, method='nmf',
lowmemory_mode=False, datahandler_custom=None):
"""Initialisation. Set the parameters for your Fissa instance.
Parameters
----------
images : str or list
The raw recording data.
Should be one of:
- the path to a directory containing TIFF files (string),
- an explicit list of TIFF files (list of strings),
- a list of array_like data already loaded into memory, each shaped
`(frames, y-coords, x-coords)`.
Note that each TIFF/array is considered a single trial.
rois : str or list
The roi definitions.
Should be one of:
- the path to a directory containing ImageJ ZIP files (string),
- the path of a single ImageJ ZIP file (string),
- a list of ImageJ ZIP files (list of strings),
- a list of arrays, each encoding a ROI polygons,
- a list of lists of binary arrays, each representing a ROI mask.
This can either be a single roiset for all trials, or a different
roiset for each trial.
folder : str
Output path to a directory in which the extracted data will
be stored.
nRegions : int, optional
Number of neuropil regions to draw. Use a higher number for
densely labelled tissue. Default is 4.
expansion : float, optional
Expansion factor for the neuropil region, relative to the
ROI area. Default is 1. The total neuropil area will be
`nRegions * expansion * area(ROI)`.
alpha : float, optional
Sparsity regularizaton weight for NMF algorithm. Set to zero to
remove regularization. Default is 0.1. (Not used for ICA method.)
ncores_preparation : int, optional (default: None)
Sets the number of subprocesses to be used during the data
preparation steps (ROI and subregions definitions, data
extraction from tifs, etc.).
If set to `None` (default), there will be as many subprocesses
as there are threads or cores on the machine. Note that this
behaviour can, especially for the data preparation step,
be very memory-intensive.
ncores_separation : int, optional (default: None)
Same as `ncores_preparation`, but for the separation step.
Note that this step requires less memory per subprocess, and
hence can often be set higher than `ncores_preparation`.
method : {'nmf', 'ica'}, optional
Which blind source-separation method to use. Either `'nmf'`
for non-negative matrix factorization, or `'ica'` for
independent component analysis. Default (recommended) is
`'nmf'`.
lowmemory_mode : bool, optional
If `True`, FISSA will load TIFF files into memory frame-by-frame
instead of holding the entire TIFF in memory at once. This
option reduces the memory load, and may be necessary for very
large inputs. Default is `False`.
datahandler_custom : object, optional
A custom datahandler for handling ROIs and calcium data can
be given here. See datahandler.py (the default handler) for
an example.
"""
if isinstance(images, basestring):
self.images = sorted(glob.glob(os.path.join(images, '*.tif*')))
elif isinstance(images, list):
self.images = images
else:
raise ValueError('images should either be string or list')
if isinstance(rois, basestring):
if rois[-3:] == 'zip':
self.rois = [rois] * len(self.images)
else:
self.rois = sorted(glob.glob(os.path.join(rois, '*.zip')))
elif isinstance(rois, list):
self.rois = rois
if len(rois) == 1: # if only one roiset is specified
self.rois *= len(self.images)
else:
raise ValueError('rois should either be string or list')
global datahandler
if lowmemory_mode:
from . import datahandler_framebyframe as datahandler
if datahandler_custom is not None:
datahandler = datahandler_custom
# define class variables
self.folder = folder
self.raw = None
self.info = None
self.mixmat = None
self.sep = None
self.result = None
self.deltaf_raw = None
self.deltaf_result = None
self.nRegions = nRegions
self.expansion = expansion
self.alpha = alpha
self.nTrials = len(self.images) # number of trials
self.means = []
self.ncores_preparation = ncores_preparation
self.ncores_separation = ncores_separation
self.method = method
# check if any data already exists
if not os.path.exists(folder):
os.makedirs(folder)
if os.path.isfile(os.path.join(folder, 'preparation.npy')):
if os.path.isfile(os.path.join(folder, 'separated.npy')):
self.separate()
else:
self.separation_prep()
[docs] def separation_prep(self, redo=False):
"""Prepare and extract the data to be separated.
For each trial, performs the following steps:
- Load in data as arrays
- Load in ROIs as masks
- Grow and seaparate ROIs to define neuropil regions
- Using neuropil and original ROI regions, extract traces from data
After running this you can access the raw data (i.e. pre-separation)
as `self.raw` and `self.rois`. self.raw is a list of arrays.
`self.raw[cell][trial]` gives you the traces of a specific cell and
trial, across cell and neuropil regions. `self.roi_polys` is a list of
lists of arrays. `self.roi_polys[cell][trial][region][0]` gives you the
polygon for the region for a specific cell, trial and region. `region=0`
is the cell, and `region>0` gives the different neuropil regions.
For separateable masks, it is possible multiple outlines are found,
which can be accessed as `self.roi_polys[cell][trial][region][i]`,
where `i` is the outline index.
Parameters
----------
redo : bool, optional
If `False`, we load previously prepared data when possible.
If `True`, we re-run the preparation, even if it has previously
been run. Default is `False`.
"""
# define filename where data will be present
fname = os.path.join(self.folder, 'preparation.npy')
# try to load data from filename
if not redo:
try:
nCell, raw, roi_polys = np.load(fname)
print('Reloading previously prepared data...')
except BaseException:
redo = True
if redo:
print('Doing region growing and data extraction....')
# define inputs
inputs = [0] * self.nTrials
for trial in range(self.nTrials):
inputs[trial] = [self.images[trial], self.rois[trial],
self.nRegions, self.expansion]
# Do the extraction
if has_multiprocessing and sys.version_info < (3, 0):
# define pool
pool = Pool(self.ncores_preparation)
# run extraction
results = pool.map(extract_func, inputs)
pool.close()
pool.join()
elif has_multiprocessing:
with Pool(self.ncores_preparation) as pool:
# run extraction
results = pool.map(extract_func, inputs)
else:
results = [0] * self.nTrials
for trial in range(self.nTrials):
results[trial] = extract_func(inputs[trial])
# get number of cells
nCell = len(results[0][1])
# predefine data structures
raw = [[None for t in range(self.nTrials)] for c in range(nCell)]
roi_polys = np.copy(raw)
# store results
for trial in range(self.nTrials):
self.means += [results[trial][2]]
for cell in range(nCell):
raw[cell][trial] = results[trial][0][cell]
roi_polys[cell][trial] = results[trial][1][cell]
# save
np.save(fname, (nCell, raw, roi_polys))
# store relevant info
self.nCell = nCell # number of cells
self.raw = raw
self.roi_polys = roi_polys
# Wipe outputs of separate(), as they no longer match self.raw
self.info = None
self.mixmat = None
self.sep = None
self.result = None
# Wipe outputs of calc_deltaf(), as they no longer match self.raw
self.deltaf_raw = None
self.deltaf_result = None
[docs] def separate(self, redo_prep=False, redo_sep=False):
"""Separate all the trials with FISSA algorithm.
After running `separate`, data can be found as follows:
self.sep
Raw separation output, without being matched. Signal `i` for
a specific cell and trial can be found as
`self.sep[cell][trial][i,:]`.
self.result
Final output, in order of presence in cell ROI.
Signal `i` for a specific cell and trial can be found at
`self.result[cell][trial][i, :]`.
Note that the ordering is such that `i = 0` is the signal
most strongly present in the ROI, and subsequent entries
are in diminishing order.
self.mixmat
The mixing matrix (how to go between `self.separated` and
`self.raw` from the `separation_prep()` function).
self.info
Information about separation routine, iterations needed, etc.
Parameters
----------
redo_prep : bool, optional
Whether to redo the preparation. Default is `False.` Note that
if this is true, we set `redo_sep = True` as well.
redo_sep : bool, optional
Whether to redo the separation. Default is `False`. Note that
this parameter is ignored if `redo_prep` is set to `True`.
"""
# Do data preparation
self.separation_prep(redo_prep)
if redo_prep:
redo_sep = True
# Define filename to store data in
fname = os.path.join(self.folder, 'separated.npy')
if not redo_sep:
try:
info, mixmat, sep, result = np.load(fname)
print('Reloading previously separated data...')
except BaseException:
redo_sep = True
# separate data, if necessary
if redo_sep:
print('Doing signal separation....')
# predefine data structures
sep = [[None for t in range(self.nTrials)]
for c in range(self.nCell)]
result = np.copy(sep)
mixmat = np.copy(sep)
info = np.copy(sep)
# loop over cells to define function inputs
inputs = [0] * self.nCell
for cell in range(self.nCell):
# initiate concatenated data
X = np.concatenate(self.raw[cell], axis=1)
# check for below 0 values
if X.min() < 0:
warnings.warn('Found values below zero in signal, ' +
'setting minimum to 0.')
X -= X.min()
# update inputs
inputs[cell] = [X, self.alpha, self.method, cell]
# Do the extraction
if has_multiprocessing and sys.version_info < (3, 0):
# define pool
pool = Pool(self.ncores_separation)
# run separation
results = pool.map(separate_func, inputs)
pool.close()
pool.join()
elif has_multiprocessing:
with Pool(self.ncores_separation) as pool:
# run separation
results = pool.map(separate_func, inputs)
else:
results = [0] * self.nCell
for cell in range(self.nCell):
results[cell] = separate_func(inputs[cell])
# read results
for cell in range(self.nCell):
curTrial = 0
Xsep, Xmatch, Xmixmat, convergence = results[cell]
for trial in range(self.nTrials):
nextTrial = curTrial + self.raw[cell][trial].shape[1]
sep[cell][trial] = Xsep[:, curTrial:nextTrial]
result[cell][trial] = Xmatch[:, curTrial:nextTrial]
curTrial = nextTrial
# store other info
mixmat[cell][trial] = Xmixmat
info[cell][trial] = convergence
# save
np.save(fname, (info, mixmat, sep, result))
# store
self.info = info
self.mixmat = mixmat
self.sep = sep
self.result = result
# Wipe deltaf_result, as it no longer matches self.raw
self.deltaf_result = None
[docs] def calc_deltaf(self, freq, use_raw_f0=True, across_trials=True):
"""Calculate deltaf/f0 for raw and result traces.
The results can be accessed as self.deltaf_raw and self.deltaf_result.
self.deltaf_raw is only the ROI trace instead of the traces across all
subregions.
Parameters
----------
freq : float
Imaging frequency, in Hz.
use_raw_f0 : bool, optional
If `True` (default), use an f0 estimate from the raw ROI trace
for both raw and result traces. If `False`, use individual f0
estimates for each of the traces.
across_trials : bool, optional
If `True`, we estimate a single baseline f0 value across all
trials. If `False`, each trial will have their own baseline f0,
and df/f0 value will be relative to the trial-specific f0.
Default is `True`.
"""
deltaf_raw = [[None for t in range(self.nTrials)]
for c in range(self.nCell)]
deltaf_result = np.copy(deltaf_raw)
# loop over cells
for cell in range(self.nCell):
# if deltaf should be calculated across all trials
if across_trials:
# get concatenated traces
raw_conc = np.concatenate(self.raw[cell], axis=1)[0, :]
result_conc = np.concatenate(self.result[cell], axis=1)
# calculate deltaf/f0
raw_f0 = deltaf.findBaselineF0(raw_conc, freq)
raw_conc = (raw_conc - raw_f0) / raw_f0
result_f0 = deltaf.findBaselineF0(
result_conc, freq, 1
).T[:, None]
if use_raw_f0:
result_conc = (result_conc - result_f0) / raw_f0
else:
result_conc = (result_conc - result_f0) / result_f0
# store deltaf/f0s
curTrial = 0
for trial in range(self.nTrials):
nextTrial = curTrial + self.raw[cell][trial].shape[1]
signal = raw_conc[curTrial:nextTrial]
deltaf_raw[cell][trial] = signal
signal = result_conc[:, curTrial:nextTrial]
deltaf_result[cell][trial] = signal
curTrial = nextTrial
else:
# loop across trials
for trial in range(self.nTrials):
# get current signals
raw_sig = self.raw[cell][trial][0, :]
result_sig = self.result[cell][trial]
# calculate deltaf/fo
raw_f0 = deltaf.findBaselineF0(raw_sig, freq)
result_f0 = deltaf.findBaselineF0(
result_sig, freq, 1
).T[:, None]
result_f0[result_f0 < 0] = 0
raw_sig = (raw_sig - raw_f0) / raw_f0
if use_raw_f0:
result_sig = (result_sig - result_f0) / raw_f0
else:
result_sig = (result_sig - result_f0) / result_f0
# store deltaf/f0s
deltaf_raw[cell][trial] = raw_sig
deltaf_result[cell][trial] = result_sig
self.deltaf_raw = deltaf_raw
self.deltaf_result = deltaf_result
[docs] def save_to_matlab(self):
"""Save the results to a matlab file.
Can be found in `folder/matlab.mat`.
This will give you a filename.mat file which if loaded in Matlab gives
the following structs: ROIs, result, raw.
If df/f0 was calculated, these will also be stored as `df_result`
and `df_raw`, which will have the same format as result and raw.
These can be interfaced with as follows, for cell 0, trial 0:
- `ROIs.cell0.trial0{1}` polygon for the ROI
- `ROIs.cell0.trial0{2}` polygon for first neuropil region
- `result.cell0.trial0(1,:)` final extracted cell signal
- `result.cell0.trial0(2,:)` contaminating signal
- `raw.cell0.trial0(1,:)` raw measured celll signal
- `raw.cell0.trial0(2,:)` raw signal from first neuropil region
"""
# define filename
fname = os.path.join(self.folder, 'matlab.mat')
# initialize dictionary to save
M = collections.OrderedDict()
def reformat_dict_for_matlab(orig_dict):
new_dict = collections.OrderedDict()
# loop over cells and trial
for cell in range(self.nCell):
# get current cell label
c_lab = 'cell' + str(cell)
# update dictionary
new_dict[c_lab] = collections.OrderedDict()
for trial in range(self.nTrials):
# get current trial label
t_lab = 'trial' + str(trial)
# update dictionary
new_dict[c_lab][t_lab] = orig_dict[cell][trial]
return new_dict
M['ROIs'] = reformat_dict_for_matlab(self.roi_polys)
M['raw'] = reformat_dict_for_matlab(self.raw)
M['result'] = reformat_dict_for_matlab(self.result)
if getattr(self, 'deltaf_raw', None) is not None:
M['df_raw'] = reformat_dict_for_matlab(self.deltaf_raw)
if getattr(self, 'deltaf_result', None) is not None:
M['df_result'] = reformat_dict_for_matlab(self.deltaf_result)
savemat(fname, M)