def get_allen_natural_movie_data(session_id, brain_area, resize_width, save_path, cache_dir='../../ecephys_cache_dir/'): # Load cache file print('Loading cache dir, will download if it does not already exist') manifest_path = os.path.join(cache_dir, 'manifest.json') cache = EcephysProjectCache.from_warehouse(manifest=manifest_path) # Load up a particular recording session print('Loading session data') sessions_table = cache.get_session_table() session = cache.get_session_data(session_id) # Load stimulus tables (trial start/end times and stimulus conditions) print('Loading stimulus tables, may take a few minutes') stimulus_tables = [ session.get_stimulus_table("natural_movie_one"), session.get_stimulus_table("natural_movie_three") ] # Load movie frames/data print('Loading movie frames, may take a few minutes') raw_movie_frames = [ cache.get_natural_movie_template(1), cache.get_natural_movie_template(3) ] # Get the ids of units in given brain area unit_ids = session.units[session.units["ecephys_structure_acronym"] == brain_area].index.to_numpy() # Get raw spike times (not chunked into clips yet) print('Getting spike times for each movie') presentation_ids_1 = session.stimulus_presentations.loc[( session.stimulus_presentations['stimulus_name'] == 'natural_movie_one' )].index.values spike_times_1 = session.presentationwise_spike_times( stimulus_presentation_ids=presentation_ids_1, unit_ids=unit_ids) presentation_ids_3 = session.stimulus_presentations.loc[( session.stimulus_presentations['stimulus_name'] == 'natural_movie_three')].index.values spike_times_3 = session.presentationwise_spike_times( stimulus_presentation_ids=presentation_ids_3, unit_ids=unit_ids) neural_data = [] power = [] split_half = [] spike_rate = [] noise_ceiling = [] # Loop through each unit to get processed data for idx, unit_id in enumerate(unit_ids): print('Starting unit', idx + 1, '/', len(unit_ids)) # First get spike counts per frame for each film (one or three) and block (0 or 1) _, spikes_1_0, spikes_r_1_0 = get_spike_counts_by_frame( session, spike_times_1, 'natural_movie_one', 0, brain_area, unit_id) _, spikes_1_1, spikes_r_1_1 = get_spike_counts_by_frame( session, spike_times_1, 'natural_movie_one', 1, brain_area, unit_id) _, spikes_3_0, spikes_r_3_0 = get_spike_counts_by_frame( session, spike_times_3, 'natural_movie_three', 0, brain_area, unit_id) _, spikes_3_1, spikes_r_3_1 = get_spike_counts_by_frame( session, spike_times_3, 'natural_movie_three', 1, brain_area, unit_id) # Join spikes together for each block spikes_r_1 = np.concatenate([spikes_r_1_0, spikes_r_1_1], axis=0) spikes_r_3 = np.concatenate([spikes_r_3_0, spikes_r_3_1], axis=0) # Get signal and noise power for each block sp_1, np_1, _ = sahani_quick(spikes_r_1) sp_3, np_3, _ = sahani_quick(spikes_r_3) sp_ = (sp_1 + sp_3) / 2 np_ = (np_1 + np_3) / 2 # Average noise/signal ratio for each block sp_np_ratio = ((np_1 / sp_1) + (np_3 / sp_3)) / 2 # Get split-half correlation r_1 = split_half_r(spikes_r_1) r_3 = split_half_r(spikes_r_3) r = (r_1 + r_3) / 2 # Get noise ceiling nc_1, ev_1, on_1 = get_noise_ceiling(spikes_r_1) nc_3, ev_3, on_3 = get_noise_ceiling(spikes_r_3) noise_ceiling_mean = (nc_1 + nc_3) / 2, (ev_1 + ev_3) / 2, (on_1 + on_3) / 2 # Average spikes block-average across both blocks spikes_1 = np.mean(spikes_r_1, axis=0) spikes_3 = np.mean(spikes_r_3, axis=0) # Smooth both with Gaussian kernel spikes_1_smoothed = ndimage.gaussian_filter1d(spikes_1, sigma=1 / 2) spikes_3_smoothed = ndimage.gaussian_filter1d(spikes_3, sigma=1 / 2) # Get spike rate (spikes/frame) spike_rate_1 = np.mean(spikes_1) spike_rate_3 = np.mean(spikes_3) spike_rate_mean = (spike_rate_1 + spike_rate_3) / 2 # Now divide these neural responses into 50 frame chunks # along with corresponding 50 frame chunks of (resized) visual stimuli/movies stimuli, unit_responses = get_chunked_data( session, raw_movie_frames, [spikes_1_smoothed, spikes_3_smoothed], resize_width, True) stimuli_nonbp, unit_responses = get_chunked_data( session, raw_movie_frames, [spikes_1_smoothed, spikes_3_smoothed], resize_width, False) neural_data.append(unit_responses) power.append([sp_, np_, sp_np_ratio]) split_half.append(r) spike_rate.append(spike_rate_mean) noise_ceiling.append(noise_ceiling_mean) neural_data = np.array(neural_data) power = np.array(power) split_half = np.array(split_half) spike_rate = np.array(spike_rate) noise_ceiling = np.array(noise_ceiling) np.save(save_path, { "stimuli": stimuli, "stimuli_nonbp": stimuli_nonbp, "neural_data": neural_data, "power": power, "split_half": split_half, "spike_rate": spike_rate, "noise_ceiling": noise_ceiling }, allow_pickle=True) print('Saved data at', args.save_path) return stimuli, neural_data, power
import numpy as np import pandas as pd from pandas import ExcelWriter from allensdk.brain_observatory.ecephys.ecephys_session import EcephysSession from allensdk.brain_observatory.ecephys.ecephys_project_cache import EcephysProjectCache from keras.layers import Dense, Dropout, LSTM, BatchNormalization, Flatten, TimeDistributed from keras.models import Sequential from keras.utils import to_categorical import tensorflow as tf from keras.optimizers import Adam import decoding_functions from openpyxl import Workbook, load_workbook data_dir = '/Users/bioel/PycharmProjects/untitled4/ecephys_cache_dir' manifest_path = os.path.join(data_dir, 'manifest.json') cache = EcephysProjectCache.from_warehouse(manifest=manifest_path) session_table = cache.get_session_table() all_sessions_all = session_table.loc[session_table.session_type == 'brain_observatory_1.1'].index # here to toggle in case some sessions elicit errors: # all_sessions_pre = all_sessions_all[0:28] all_sessions_post = all_sessions_all[29:] ####################################################### all_sessions = np.append(all_sessions_pre, all_sessions_post) wb = Workbook() SVM_acc = np.empty(len(all_sessions)) SNN_acc = np.empty(len(all_sessions)) DNN_acc = np.empty(len(all_sessions))
def getSummaryData(dataDirectory): print('Getting cache...') manifest_path = os.path.join(dataDirectory, "manifest.json") cache = EcephysProjectCache.from_warehouse(manifest=manifest_path) sessions = cache.get_session_table() return cache, sessions
def get_allen_sessions( data_directory='/mnt/Data/Datasets/allen/ecephys_cache_dir/'): manifest_path = os.path.join(data_directory, "manifest.json") cache = EcephysProjectCache.from_warehouse(manifest=manifest_path) sessions = cache.get_session_table() return sessions, cache
import numpy as np import pandas as pd import pprint pp = pprint.PrettyPrinter(depth=10).pprint from mpl_toolkits import mplot3d import matplotlib.pyplot as plt import pprint op = pprint.PrettyPrinter(depth=11).pprint from allensdk.brain_observatory.ecephys.ecephys_project_cache import EcephysProjectCache # Cut nan regions cutNaNRegions = True # Get session and cache print('Getting session data...') cache = EcephysProjectCache.from_warehouse( manifest="./example_ecephys_project_cache/manifest.json") sessions = cache.get_session_table() session = cache.get_session_data(session_id=791319847) # Print probe properties print('Showing possible probes...') channelInfo = cache.get_channels() probeIDs = session.probes.index.values for probe_id in probeIDs: probeChannels = channelInfo.loc[channelInfo.ecephys_probe_id == probe_id].index structure_acronyms, intervals = session.channel_structure_intervals( probeChannels) print('Probe: %s : %s' % (probe_id, structure_acronyms)) # Select probe and get LFP data
import matplotlib.pyplot as plt import numpy as np import os import pandas as pd import platform import scipy.stats as stats import seaborn as sns import signal as signal from allensdk.brain_observatory.ecephys.ecephys_project_cache import EcephysProjectCache from allensdk.brain_observatory.ecephys import ecephys_session cache = EcephysProjectCache.fixed(manifest=manifest_path) """ Neuropixels dataset from the Allen Institute """ # Load data. platstring = platform.platform() if "Darwin" in platstring: # OS X data_root = "/Volumes/Brain2019/" elif "Windows" in platstring: # Windows (replace with the drive letter of USB drive) data_root = "E:/" elif ("amzn1" in platstring): # then on AWS data_root = "/data/" else: # then your own linux platform
def get_all_timeseries_to_df(sessionIDs = [], regions = [], datatype = "both"): """ Purpose: retrieve LFP and/or spike time data from across sessions restricted by region Inputs: sessionIDs: a list of session IDs from the sessions object. If left empty [], the function will default to go through all the sessions and check whether any given session contained the regions specified in the 'regions' argument and will add the session to the list of session IDs regions: a list of regions (as labeled in the manual_structure_acronym column: ['None', 'TH', 'DG', 'CA', 'VISmma', 'MB', 'VISpm', 'VISp', 'VISl', 'VISrl', 'VISam', 'VIS', 'VISal', 'VISmmp']) datatype: "lfps", "spikes", or "both". Defines whether you want to retrieve just the lfp data for the sessionIDs and regions, just the spike data, or both. Defaults to both Returns: Pandas dataframe for each datatype- if datatype is "both", then provide a variable name for EACH dataframe: LFP dataset contains: one row for each channelID, the timeseries array as a list, the channel's vertical, horizontal, and structure label, the probe ID that the channel belongs to, the mouse genotype, and the session ID Spikes contains: one row for each unit ID, unit's timeseries array as a list, the channel on which the unit was recorded, all of that channel's QC data, the channel's vertical, horizontal, and structure label, the probe ID that the channel belongs to, the mouse genotype, and the session ID Example: v1_lfps, v1_spikes = get_all_timeseries_to_df(sessionIDs = sessions.index[0:3], regions = ['VISp'], datatype = "both") will return the lfp and spike data for every channel recorded in VISp in the first 2 sessions listed in the sessions dataframe """ #Get cache and info about all the sessions cache = EcephysProjectCache.fixed(manifest=manifest_path) sessions = cache.get_sessions() allchannelsinfo = cache.get_channels() allunitsinfo = cache.get_units() #If no session ID is passed into the list, find all sessions that contain the regions #and append that session ID to the sessionIDs list if len(sessionIDs) == 0: sessionIDs = [] for i in np.arange(len(sessions.structure_acronyms)): sessionid = sessions.structure_acronyms.index[i] if any(elem in sessions.structure_acronyms[sessionid] for elem in regions): sessionIDs.append(sessionid) #Double check that the regions specified actually appear in the sessionIDs specified for sessionID in sessionIDs: for elem in regions: if elem not in sessions.structure_acronyms[sessionID]: print("Session {} does not contain region {}.".format(sessionID, elem)) all_lfps_df = pd.DataFrame() all_spikes_df = pd.DataFrame() #Grab channel and unit info for each region for sessionID in sessionIDs: session_info = cache.get_session_data(sessionID) session_channels = session_info.channels session_probes = session_info.probes session_units = session_info.units for region in regions: region_lfps = {} region_spikes = {} region_lfps_df = pd.DataFrame() region_spikes_df = pd.DataFrame() region_channelinfo = session_channels[session_channels.manual_structure_acronym == region] region_units = session_units[session_units.peak_channel_id.isin(region_channelinfo.index)] #Depending on what type of data, grab the lfps and/or spike times if datatype == "both": session_spike_times = session_info.spike_times for probeid in session_channels.probe_id[session_channels.manual_structure_acronym == region].unique(): print('retrieving probe {} from session {} cache'.format(probeid, sessionID)) probe_lfp = session_info.get_lfp(probeid) region_channels_lfp = probe_lfp.loc[dict(channel = probe_lfp.channel.isin(region_channelinfo.index))] for chan in region_channels_lfp["channel"].values: print('appending channel {} from probe {} for area {}'.format(chan, probeid, region)) region_lfps[chan] = region_channels_lfp[:, 'channel' == chan].values for unit in region_units.index: print('appending unit {} from session {} for area {}'.format(unit, sessionID, region)) region_spikes[unit] = session_spike_times[unit] elif datatype == "lfp": for probeid in session_channels.probe_id[session_channels.manual_structure_acronym == region].unique(): print('retrieving probe {} from session {} cache'.format(probeid, sessionID)) probe_lfp = session_info.get_lfp(probeid) region_channels_lfp = probe_lfp.loc[dict(channel = probe_lfp.channel.isin(region_channelinfo.index))] for chan in region_channels_lfp["channel"].values: print('appending channel {} from probe {} for area {}'.format(chan, probeid, region)) region_lfps[chan] = region_channels_lfp[:, 'channel' == chan].values elif datatype == "spikes": session_spike_times = session_info.spike_times for unit in region_units.index: print('appending unit {} from session {} for area {}'.format(unit, sessionID, region)) region_spikes[unit] = session_spike_times[unit] #Build up the dataset after each region is added #lfp dataset print('putting all the lfps from region {} and session {} to the larger dataset if lfps were requested'.format(region, sessionID)) region_lfps_df['channel_id'] = region_lfps.keys() region_lfps_df['lfp_timeseries'] = region_lfps.values() region_lfps_df = pd.merge(region_lfps_df, region_channelinfo.loc[:, 'manual_structure_acronym':'probe_id'], left_on = 'channel_id', right_on = region_channelinfo.index) region_lfps_df['sessionID'] = sessionID region_lfps_df['genotype'] = sessions.genotype[sessions.index == sessionID].unique()[0] #spike dataset print('putting all the spikes from region {} and session {} to the larger dataset if spikes were requested'.format(region, sessionID)) region_spikes_df['unit_id'] = region_spikes.keys() region_spikes_df['spike_timeseries'] = region_spikes.values() region_spikes_df = pd.merge(region_spikes_df, region_units, left_on = 'unit_id', right_on = region_units.index) region_spikes_df['sessionID'] = sessionID region_spikes_df['genotype'] = sessions.genotype[sessions.index == sessionID].unique()[0] #append to the overarching datasets that aren't tied to specific session id or region all_lfps_df = all_lfps_df.append(region_lfps_df) all_spikes_df = all_spikes_df.append(region_spikes_df) if datatype == "both": return all_lfps_df, all_spikes_df elif datatype == "lfp": return all_lfps_df elif datatype == "spikes": return all_spikes_df