def getAllFeatures(train, mapper):
    print "this is getAllFeatures"
    # every record has a cluster value calculated by lda
    w2c_f, w2c_w = 10, 14
    lda_dict_1 = util.read_dict(util.features_prefix + 'id_lda_256.pkl')
    lda_dict_2 = util.read_dict(util.features_prefix + 'id_lda_512.pkl')
    k_mean_dict_1 = util.read_dict(util.features_prefix + 'c_k_all_64.pkl')
    k_mean_dict_2 = util.read_dict(util.features_prefix + 'c_k_all_128.pkl')
    sentence_dict_path = util.txt_prefix + 'id_sentences.pkl'
    word2vec_path = util.txt_prefix + str(w2c_f) + 'features_1minwords_' + str(
        w2c_w) + 'context.pkl'
    sentence_dic = util.read_dict(sentence_dict_path)
    model = Word2Vec.load(word2vec_path)

    train_X = train[features]
    train_X = mapper.transform(train_X)  # .values
    new_train_X = []
    for i in xrange(len(train_X)):
        id = train_X[i][0]
        lda_1 = lda_dict_1[id]
        lda_2 = lda_dict_2[id]
        s = sentence_dic.get(id)
        f = np.concatenate(([train_X[i][1:].astype(np.float32)], [
            sentence_to_matrix_vec(s, model, w2c_f, k_mean_dict_1,
                                   k_mean_dict_2)
        ]),
                           axis=1)[0]
        f = np.concatenate(([f], [[lda_1, lda_2]]), axis=1)[0]
        new_train_X.append(f)
    new_train_X = np.array(new_train_X)
    return new_train_X
def getAllFeatures(train, mapper):
    print "this is getAllFeatures"
    # every record has a cluster value calculated by lda
    w2c_f, w2c_w = 10, 14
    lda_dict_1 = util.read_dict(util.features_prefix + 'id_lda_256.pkl')
    lda_dict_2 = util.read_dict(util.features_prefix + 'id_lda_512.pkl')
    k_mean_dict_1 = util.read_dict(util.features_prefix + 'c_k_all_64.pkl')
    k_mean_dict_2 = util.read_dict(util.features_prefix + 'c_k_all_128.pkl')
    sentence_dict_path = util.txt_prefix + 'id_sentences.pkl'
    word2vec_path = util.txt_prefix + str(w2c_f) + 'features_1minwords_' + str(w2c_w) + 'context.pkl'
    sentence_dic = util.read_dict(sentence_dict_path)
    model = Word2Vec.load(word2vec_path)

    train_X = train[features]
    train_X = mapper.transform(train_X)  # .values
    new_train_X = []
    for i in xrange(len(train_X)):
        id = train_X[i][0]
        lda_1 = lda_dict_1[id]
        lda_2 = lda_dict_2[id]
        s = sentence_dic.get(id)
        f = np.concatenate(([train_X[i][1:].astype(np.float32)],
                            [sentence_to_matrix_vec(s, model, w2c_f, k_mean_dict_1, k_mean_dict_2)]), axis=1)[0]
        f = np.concatenate(([f], [[lda_1, lda_2]]), axis=1)[0]
        new_train_X.append(f)
    new_train_X = np.array(new_train_X)
    return new_train_X
示例#3
0
def FIcompare(folder, cells, currents = [], freqs = [],\
    firing_rate_data = 'firing_rate_data.txt'):
    '''
    f = FIcompare(folder, cells, currents = [], freqs = [],\
        firing_rate_data = 'firing_rate_data.txt'):
        Plot current clamp firing traces with certain currents input and with firing
        frequencies in a certain range.
    parameters:
        folder (string) - directory to the folder with raw data
        cells (array_like) - indices of neurons to plot
        currents (array_like) - list of input currents
        freqs (list) - of two scalars, range of the firing rates to be included
        firing_rate_data (string) - firing rate data file directory
    return:
        f (list) - list of figure windows
    '''
    data = util.read_dict(firing_rate_data, 'int')
    f = []
    for cell in cells:
        for trial, stim, fr in zip(*data[cell][1]):
            if (len(currents) == 0 or stim in currents) and \
                (len(freqs) == 0 or (freqs[0] <= fr and fr < freqs[1])):
                trace, sr, st = util.load_wave(folder + util.gen_name(cell, trial))
                f.append(plot.plot_trace_v(trace, sr))
                f[-1].setWindowTitle('Cell {0:d}, Trial {1:d}, I = {2:.2e}'.\
                    format(cell, trial, st[2]))
    return f
示例#4
0
 def __init__(self, folder, data_file, fi_file):
     '''
     Get basic information about the data.
     parameters:
         folder (String) - directory to folder of the raw data files
         data_file (String) - directory to the data file with the cell type info
         fi_file (String) - directory to the data file with the firing rate data
     '''
     self.folder = folder  # raw data folder directory
     self.data = pd.read_csv(data_file)  # cell type info data
     self.fi_data = util.read_dict(fi_file, 'int')  # firing rate and stim current data
     self.trial_data = []  # chosen data to plot
示例#5
0
def read_puff_out(md_dir):
    """
  Yields a dictionary representing the properties of 
  PushApartByVel at each frame of a pulsed simulation from the
  specified md.puff.out file.
  """
    # get time in ps, typical MD step is 0.001 ps = 1 fs
    config = os.path.join(md_dir, 'md.puff.config')
    parms = util.read_dict(config)
    dt = 0.001 * parms['n_step_per_pulse']
    time = 0.0
    for line in open(os.path.join(md_dir, 'md.puff.out')):
        entry = eval(line)
        entry['time'] = time
        yield entry
        time += dt
示例#6
0
def guess_n_frame_per_ps(basename):
    """
  Returns the n_frame_per_ps of a trajectory by reading any
  .config files that would have been generated using simualte.py.
  """
    config = basename + ".config"
    try:
        params = util.read_dict(config)
        # assuming 1fs time step
        n_step_per_ps = 1000
        if 'n_step_per_snapshot' in params:
            n_step_per_snapshot = params['n_step_per_snapshot']
        n_frame_per_ps = n_step_per_ps / n_step_per_snapshot
    except:
        n_frame_per_ps = 50
    return n_frame_per_ps
示例#7
0
def read_puff_out(md_dir):
  """
  Yields a dictionary representing the properties of 
  PushApartByVel at each frame of a pulsed simulation from the
  specified md.puff.out file.
  """
  # get time in ps, typical MD step is 0.001 ps = 1 fs
  config = os.path.join(md_dir, 'md.puff.config')
  parms = util.read_dict(config)
  dt = 0.001*parms['n_step_per_pulse']
  time = 0.0
  for line in open(os.path.join(md_dir, 'md.puff.out')):
    entry = eval(line)
    entry['time'] = time
    yield entry
    time += dt
示例#8
0
def guess_n_frame_per_ps(basename):
  """
  Returns the n_frame_per_ps of a trajectory by reading any
  .config files that would have been generated using simualte.py.
  """
  config = basename + ".config"
  try:
    params = util.read_dict(config)
    # assuming 1fs time step
    n_step_per_ps = 1000 
    if 'n_step_per_snapshot' in params:
      n_step_per_snapshot = params['n_step_per_snapshot']
    n_frame_per_ps = n_step_per_ps / n_step_per_snapshot
  except:
    n_frame_per_ps = 50
  return n_frame_per_ps
示例#9
0
def IV_curve(folder, cells, data_file, type_file='', out='IV_curve.png'):
    f = plt.figure()
    ax = f.add_subplot(111)
    data = util.read_dict(folder + data_file, 'int')
    if len(type_file):
        type_data = pd.read_csv(folder + type_file)
        groups = type_data['group']
        ncolors = len(np.unique(groups))
        cm = plt.get_cmap('gist_rainbow')
        cl = np.array([cm(1 * i / ncolors) for i in range(ncolors)])
    for cell in cells:
        if len(type_file):
            color = cl[np.nonzero(np.unique(groups) == \
                groups[np.nonzero(type_data['No'] == cell)[0][0]])[0][0]]
        else:
            color = 'k'
        ax.plot(data[cell][0], data[cell][1], color=color)
    f.savefig(folder + out, dpi=200, transparent=True)
    return 0
示例#10
0
def FI_slope(data_file, cells):
    '''
    slope = FI_slope(data_file, cells, stims = []):
        Calculate firing slope using averaged firing rate data.
    parameters:
        data_file (String) - directory to firing rate data file
        cells (array_like) - indices of cells to be analyzed
    return:
        slope (array_like) - FI curve slope of all the cells
    '''
    data = util.read_dict(data_file, 'int')
    slope = []
    for cell in cells:
        stims = np.array(data[cell][0][0])
        rates = np.array(data[cell][0][1])
        firing_ind = np.nonzero(rates)[
            0]  # indices of point with firing rate above zero
        p = np.polyfit(stims[firing_ind], rates[firing_ind], 1)
        slope.append(p[0])
    return slope
示例#11
0
def FI_curve(data_file, type_file = None, ave = False, stims = [], cl = [], \
    cells = [], sigtp = 0, out = 'tmp.png'):
    '''
    FI_curve(data_file, type_file = None, ave = False, stims = [], cl = [], out = 'tmp.png')
        Plot FI curves, for each cell, average replicated traces with the same current
        stimulation.
    parameters:
        data_file (string) - directory of data file with firing rate data, refer to 
            ap.firing_rate
        type_file (string) - directory of cell type csv files with cell indices in column No
            type value (0, 1, 2, ...) in column group. If not provided, won't 
            differentiate cell types.
        ave (boolean) - whether to average cells of the same type
        stim (array_like) - stimulation current steps. If not provided, use the steps of the 
            first cell from the data file, assuming all the cells have the same steps as the 
            first one.
        cl (array_like) - color of the different types. If type is specified but color is not 
            provided, generate color from gist_rainbow color map.
        cells (array_like) - ids of cells to be analyze, default is an empty list, this will
            calculate all the cells in the type_file.
        sigtp (float) - significance test p-value, default is 0, means no test
        out (string) - directory of output figure file
    '''

    data = util.read_dict(data_file, 'int')
    type_data = pd.read_csv(type_file)
    if not len(stims):
        stims = list(data.values())[0][0][0]
    else:
        stims = list(stims)
    ind = 0
    if len(cells):
        keys = cells
    elif type_file != None:
        keys = type_data['No']
    else:
        keys = data.keys()
    crates = np.empty((len(keys), len(stims)))
    crates[:] = np.nan
    _cells = []
    for key in keys:
        values = data[key]
        _cells.append(key)
        '''
        _stim = values[0]
        _target = np.array([stims]).T * np.ones((1, len(_stim)))
        ind = np.nonzero(_target == _stim)[1]
        print(key)
        print(np.array(values[1])[:, ind].mean(0).reshape((1, -1)).shape)
        if 'crates' in locals():
            crates = np.vstack((crates, \
                np.array(values[1])[:, ind].mean(0).reshape((1, -1))))
        else:
            crates = np.array(values[1])[:, ind].mean(0).reshape((1, -1))
        '''
        _stim = np.array(values[0][0])
        for s in stims:
            s_ind = np.nonzero(abs(_stim - s) < 1e-14)[0]
            if len(s_ind):
                crates[ind][stims.index(s)] = values[0][1][s_ind[0]]
        ind = ind + 1

    if len(cells):
        cells = np.array(cells)
        crates = crates[[_cells.index(d) for d in cells], :]
    else:
        cells = np.array(_cells)
    stims = np.array(stims) * 1e12

    if type_file != None:
        '''
        type_data = util.read_csv(type_file)
        types = type_data[np.nonzero(type_data[:, [0]] == \
            np.ones((type_data.shape[0], 1)) * cells)[1], -1]
        '''
        types = type_data.loc[np.nonzero(
            np.array(type_data['No']) == cells.reshape(-1, 1) *
            np.ones((1, len(type_data.index))))[1], 'group']
        if sigtp != 0 and len(np.unique(types)) == 2:
            ps = []
            for i in range(len(stims)):
                p = util.permutationTest(*[crates[types == d, i].flatten() \
                        for d in np.unique(types)])
                ps.append(p)
            print(ps)
            ps = np.array(ps) < sigtp

    print('type', types)
    print('cells', cells)
    f = plt.figure()
    ax = f.add_subplot(111)
    if ave:
        if type_file == None:
            mrates = np.nanmean(crates, 0)
            se = np.nanstd(crates, 0) / crates.shape[0]
            ax.errorbar(stims, mrates, se, ecolor='k', label='Average')
        else:
            if not len(cl):
                ncolors = len(np.unique(types))
                cm = plt.get_cmap('gist_rainbow')
                cl = [cm(1 * i / ncolors) for i in range(ncolors)]
            for t, color in zip(np.unique(types), cl):
                print('t', t)
                print('color', color)
                _crates = crates[types == t, :]
                print('cells', cells[types == t])
                mrates = np.nanmean(_crates, 0)
                se = np.nanstd(_crates, 0) / _crates.shape[0]
                ax.errorbar(stims, mrates, se, color = color, \
                    label = t, lw = 2)
    else:
        if type_file == None:
            if not len(cl):
                ncolors = len(np.unique(types))
                cm = plt.get_cmap('gist_rainbow')
                cl = [cm(1 * i / ncolors) for i in range(ncolors)]
            for crate, color, c in zip(ctates, cl, cells):
                ax.plot(stims, crate, ecolor=color, label=str(c))
        else:
            if not len(cl):
                ncolors = len(np.unique(types))
                cm = plt.get_cmap('gist_rainbow')
                cl = [cm(1 * i / ncolors) for i in range(ncolors)]
            for i, t in enumerate(np.unique(types)):
                _crates = crates[types == t, :]
                for crate in _crates:
                    ax.plot(stims, crate, c=cl[i])  #, label = str(t))
    ax.legend(loc=2)
    ax.set_xlabel('Current (pA)')
    ax.set_ylabel('Firing rate (Hz)')
    mpl.rcParams['font.size'] = 30

    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.yaxis.set_ticks_position('left')
    ax.xaxis.set_ticks_position('bottom')
    ax.set_xlim([stims[0], stims[-1] + 5])
    f.savefig(out, dpi=96, bbox_inches='tight', transparent=True)
    plt.close(f)
    del (f)
    # return f
    return 0
示例#12
0
  "genion": "",
  "genbox": "",

  "vmd": "",
  "psfgen": "",
  "namd2": "",
  "flipdcd": "",

  "mod9v8": ""
}
home_dir = os.path.expanduser('~')
binaries_fname = os.path.join(home_dir, '.pdbremix.config')
if not os.path.isfile(binaries_fname):
  util.write_dict(binaries_fname, binaries)
else:
  binaries = util.read_dict(binaries_fname)


def binary(bin, arg_str='', out_name=None, in_fname=None):
  """
  Runs an external binary, handles arguments, writes out
  equivalent .sh file, log file, and can pipe in in_fname.
  """
  if bin in binaries and binaries[bin]:
    bin = binaries[bin]
  else:
    util.check_program(bin)
  if arg_str:
    util.run_with_output_file(
        '%s %s' % (bin, arg_str), out_name, in_fname)
  return '"%s"' % bin
from util import read_dict, write_dic
import jieba
import re
import os
from gensim.models import Word2Vec
from gensim.models.ldamodel import LdaModel
from gensim import corpora
from sklearn.cluster import KMeans

if __name__ == "__main__":
    sentence_dict_path = util.txt_prefix + 'id_sentences.pkl'
    if os.path.exists(sentence_dict_path) is False:
        print sentence_dict_path, ' does not exit'
        exit()
    if os.path.exists(util.txt_prefix + 'id_texts.pkl') is False:
        id_sentence = read_dict(sentence_dict_path)
        print len(id_sentence)
        id_text = {}
        for i in id_sentence.keys():
            sentence = id_sentence[i]
            temp = ' '.join(sentence)
            temp = re.sub('-|\\)|\\(|(|/|)', ' ', temp).replace(')', '')
            cut_str = jieba.cut(temp)
            text = " ".join(cut_str)
            text = re.sub(r'\s{2,}', ' ', text)
            id_text.setdefault(i, (text.replace('(', '')).split(' '))
        write_dic(id_text, util.txt_prefix + 'id_texts.pkl')
    id_text = read_dict(util.txt_prefix + 'id_texts.pkl')
    texts = id_text.values()
    features, words = 60, 14
    if os.path.exists(util.txt_prefix + str(features) + 'features_1minwords_' +
from util import read_dict, write_dic
import jieba
import re
import os
from gensim.models import Word2Vec
from gensim.models.ldamodel import LdaModel
from gensim import corpora
from sklearn.cluster import KMeans

if __name__ == "__main__":
    sentence_dict_path = util.txt_prefix + 'id_sentences.pkl'
    if os.path.exists(sentence_dict_path) is False:
        print sentence_dict_path, ' does not exit'
        exit()
    if os.path.exists(util.txt_prefix + 'id_texts.pkl') is False:
        id_sentence = read_dict(sentence_dict_path)
        print len(id_sentence)
        id_text = {}
        for i in id_sentence.keys():
            sentence = id_sentence[i]
            temp = ' '.join(sentence)
            temp = re.sub('-|\\)|\\(|(|/|)', ' ', temp).replace(')', '')
            cut_str = jieba.cut(temp)
            text = " ".join(cut_str)
            text = re.sub(r'\s{2,}', ' ', text)
            id_text.setdefault(i, (text.replace('(', '')).split(' '))
        write_dic(id_text, util.txt_prefix + 'id_texts.pkl')
    id_text = read_dict(util.txt_prefix + 'id_texts.pkl')
    texts = id_text.values()
    features, words = 60, 14
    if os.path.exists(util.txt_prefix + str(features) + 'features_1minwords_' + str(words) + 'context.pkl') is False: