Пример #1
0
def KL_divergence(resultrootdir,
                  normal,
                  mutant,
                  layerlist,
                  num_node=const.num_node):
    """
    compute KL divergence
    
    Parameters
    =======================================
    resultrootdir : str
        root directory for result
    normal : str
        ID of normal worm
    mutant : str
        ID of mutant worm
    layerlist : list of str
        list of layer
    num_node : int, optional
        the number of node in a hidden layer 
    """
    tag = normal + '_vs_' + mutant

    for layer in layerlist:
        print(layer)

        resultlist = []

        savemodeldir_normal = os.path.join(resultrootdir, tag, 'gmm_model',
                                           normal, layer)
        savemodeldir_mutant = os.path.join(resultrootdir, tag, 'gmm_model',
                                           mutant, layer)

        for n in range(num_node):
            print('node-' + str(n))

            if os.path.exists(os.path.join(savemodeldir_normal, 'node-' + str(n) + '.pkl')) \
                    and os.path.exists(os.path.join(savemodeldir_mutant, 'node-' + str(n) + '.pkl')):
                gmm1 = joblib.load(
                    os.path.join(savemodeldir_normal,
                                 'node-' + str(n) + '.pkl'))
                gmm2 = joblib.load(
                    os.path.join(savemodeldir_mutant,
                                 'node-' + str(n) + '.pkl'))
                kl = gmm_kl_bysampling(gmm1, gmm2)

                resultlist.append([n, kl])

        np.savetxt(os.path.join(resultrootdir, tag, layer + '_kldiv.csv'),
                   np.array(resultlist),
                   header=io_utils.delimited_list(['node', 'kldiv'], ' '))
Пример #2
0
def mean(resultrootdir, normal, mutant, layerlist, savebinary,
         normal_timesteps, mutant_timesteps):
    """
    diff of mean
    
    E_mutant - E_normal and E_normal - E_mutant
    
    Parameters
    =======================================
    resultrootdir : str
        root directory for result
    normal : str
        ID of normal worm
    mutant : str
        ID of mutant worm
    layerlist : list of str
        list of layer
    savebinary : bool
        whether scores are saved by binary
    """
    tag = normal + '_vs_' + mutant

    for layer in layerlist:
        print(layer)
        scorelist = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, normal), layer, savebinary,
            normal_timesteps)
        normal_varlist = np.array(
            [np.mean(score, axis=1) for score in scorelist])  #time axis

        scorelist = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, mutant), layer, savebinary,
            mutant_timesteps)
        mutant_varlist = np.array(
            [np.mean(score, axis=1) for score in scorelist])

        normal_var = np.mean(normal_varlist, axis=0)  #all trajectories
        mutant_var = np.mean(mutant_varlist, axis=0)
        varmn = mutant_var - normal_var  #mutant_var / normal_var
        varnm = normal_var - mutant_var  #normal_var / mutant_var

        resultlist = np.array(
            np.stack((np.arange(len(normal_var)), normal_var, mutant_var,
                      varmn, varnm))).T
        np.savetxt(os.path.join(resultrootdir, tag, layer + '_mean.csv'),
                   resultlist,
                   header=io_utils.delimited_list([
                       'node', 'mean_' + normal, 'mean_' + mutant, 'mean_' +
                       mutant + '/' + normal, 'mean_' + normal + '/' + mutant
                   ], ' '))
Пример #3
0
def allfeature(excelfile,
               output_dir,
               start_index=0,
               prefix='',
               minrow=3,
               maxrow=np.inf,
               labelcol=None,
               skip=0.):
    featuredir = os.path.join(output_dir, const.allfeature)

    if not os.path.exists(featuredir):
        os.makedirs(featuredir)

    count = 0

    for sheet in excelfile:

        global current_sheet_for_error
        global current_line_for_error
        current_sheet_for_error = sheet.basename + ' ' + sheet.name

        if sheet.nrows < minrow:
            continue

        timelist = np.array([])
        labellist = np.array([])
        speedlist = np.array([])
        accelerationlist = np.array([])
        accelerationlist_angle = np.array([])
        relative_anglelist = np.array([])
        anglelist = np.array([])
        other_featurelist = {}
        d_other_featurelist = {}
        for h in range(len(sheet.header)):
            if sheet.header[h] == 'time':
                continue
            if h == labelcol:
                continue
            other_featurelist[sheet.header[h]] = np.array([])
            d_other_featurelist[sheet.header[h]] = np.array([])

        additional_methodslist = [np.array([]), np.array([]), np.array([])]

        i0 = -1
        for i in range(sheet.nrows):
            current_line_for_error = i
            if i < 2:
                continue
            if i > maxrow:
                break
            if sheet.colist[i].time < skip:
                continue
            if sheet.colist[i].x is None or sheet.colist[i - 1].x is None or sheet.colist[i - 2].x is None or \
                            sheet.colist[i].y is None or sheet.colist[i - 1].y is None or sheet.colist[
                        i - 2].y is None or \
                            sheet.colist[i].time is None or sheet.colist[i - 1].time is None or sheet.colist[
                        i - 2].time is None:  # pass if empty
                continue
            else:

                if i0 == -1:
                    i0 = i
                # print +i
                # print sheet.colist[i].x,sheet.colist[i-1].x,sheet.colist[i-2].x
                timelist = np.append(timelist, sheet.colist[i].time)
                timediff = float(sheet.colist[i].time -
                                 sheet.colist[i - 1].time)
                v_t = []
                # angle relative to previous
                v_ref = [
                    sheet.colist[i - 1].x - sheet.colist[i - 2].x,
                    sheet.colist[i - 1].y - sheet.colist[i - 2].y
                ]

                v_t.append(sheet.colist[i].x - sheet.colist[i - 1].x)
                v_t.append(sheet.colist[i].y - sheet.colist[i - 1].y)

                angle_t = angle(v_t, v_ref)
                relative_anglelist = np.append(relative_anglelist,
                                               angle_t / timediff)
                speedlist = np.append(speedlist, length(v_t) / timediff)

                acc, accangle = acceleration(
                    sheet.colist[i - 2].p, sheet.colist[i - 1].p,
                    sheet.colist[i].p,
                    sheet.colist[i - 1].time - sheet.colist[i - 2].time,
                    timediff)
                accelerationlist = np.append(accelerationlist, acc)
                accelerationlist_angle = np.append(accelerationlist_angle,
                                                   accangle)

                anglelist = np.append(anglelist,
                                      np.arctan2(v_t[1], v_t[0]) / timediff)

                # distance from initial point
                additional_methodslist[0] = np.append(
                    additional_methodslist[0],
                    length([
                        sheet.colist[i].x - sheet.colist[i0].x,
                        sheet.colist[i].y - sheet.colist[i0].y
                    ]))
                # angle from initial point
                additional_methodslist[1] = np.append(
                    additional_methodslist[1],
                    np.arctan2(sheet.colist[i].y - sheet.colist[i0].y,
                               sheet.colist[i].x - sheet.colist[i0].x))
                # travel distance
                if len(additional_methodslist[2]) == 0:
                    additional_methodslist[2] = np.append(
                        additional_methodslist[2], 0)
                else:
                    additional_methodslist[2] = np.append(
                        additional_methodslist[2],
                        length(v_t) + additional_methodslist[2][-1])

                for h in range(len(sheet.header)):
                    if sheet.header[h] == 'time':
                        continue
                    if h == labelcol:
                        labellist = np.append(
                            labellist,
                            sheet.colist[i].dic[sheet.header[labelcol]])
                        continue
                    other_featurelist[sheet.header[h]] = np.append(
                        other_featurelist[sheet.header[h]],
                        sheet.colist[i].dic[sheet.header[h]])
                    d_other_featurelist[sheet.header[h]] = np.append(
                        d_other_featurelist[sheet.header[h]],
                        (sheet.colist[i].dic[sheet.header[h]] -
                         sheet.colist[i - 1].dic[sheet.header[h]]) / timediff)

        angle_normalization(relative_anglelist)

        savefilename = prefix + str(count + start_index) + '.csv'

        featurelist = [
            speedlist, accelerationlist, accelerationlist_angle,
            relative_anglelist, anglelist
        ]
        headers = ['speed', 'acceleration', 'acc_angle', 'rel_angle', 'angle']
        for h in range(len(sheet.header)):
            if sheet.header[h] == 'time':
                continue
            if h == labelcol:
                continue
            featurelist += [
                other_featurelist[sheet.header[h]],
                d_other_featurelist[sheet.header[h]]
            ]
            headers += [sheet.header[h], 'd_' + sheet.header[h]]

        avg_featurelist = [
            moving_average(feat, num_points=min(10, len(feat)))
            for feat in featurelist
        ]
        var_featurelist = [
            moving_variance(feat, num_points=min(10, len(feat)))
            for feat in featurelist
        ]

        avg_headers = ['moving_avg_' + header for header in headers]
        var_headers = ['moving_var_' + header for header in headers]

        if labelcol is not None:
            headers = ['label'] + headers
            featurelist = [labellist] + featurelist

        headers = ['time'] + headers + list(
            additional_methods_name[2:]) + avg_headers + var_headers
        featurelist = [timelist] + featurelist + list(
            np.array(
                additional_methodslist)) + avg_featurelist + var_featurelist

        np.savetxt(os.path.join(featuredir, savefilename),
                   np.array(featurelist).transpose(),
                   delimiter=',',
                   header=io_utils.delimited_list(headers))
        count += 1

    return count
Пример #4
0
def train(modelrootdir, timesteps, input_dim, nclass, X_normal, X_mutant, max_epoch, batch_size, normal, mutant,
          start_epoch=0, tag='', save_interval=50, alltime=False, X_normal_test=None, X_mutant_test=None,
          test_interval=1, num_node=const.num_node, num_lstmlayer=const.num_layer, use_dropout=True, drop_prob=0.5):
    try:
        except_flg = False
        # <editor-fold desc="building or loading model">

        # load model if model file exists
        if os.path.exists(lstm.model_path(modelrootdir, tag=tag)):
            shutil.copy2(lstm.model_path(modelrootdir, tag=tag),
                         lstm.model_path(modelrootdir, tag=tag, sufix='_old'))  # save last model as *_old

        # load model
        if start_epoch > 0 and os.path.exists(lstm.model_path(modelrootdir, tag=tag)):
            print('loading model...')
            model = load_model(lstm.model_path(modelrootdir, tag=tag))
            model.summary()
        # defining model
        else:
            if alltime:
                pass
                #model = lstm.buildAttentionModelDeepCNNLSTM(timesteps, input_dim, use_dropout=use_dropout, drop_prob=drop_prob,
                #                                   hidden_unit=num_node, num_lstmlayer=num_lstmlayer)
                #model = lstm.buildAttentionModelDeepMultiCNNLSTM(timesteps, input_dim, use_dropout=use_dropout, drop_prob=drop_prob,
                #                                    hidden_unit=num_node, num_lstmlayer=num_lstmlayer)
            else:
                #model = lstm.buildAttentionModelDeepMultiCNNLSTM(timesteps, input_dim, use_dropout=use_dropout, drop_prob=drop_prob,
                #                                    hidden_unit=num_node, num_lstmlayer=num_lstmlayer)
                #model = lstm.buildAttentionModelDeepCNNLSTM(timesteps, input_dim, use_dropout=use_dropout, drop_prob=drop_prob,
                #                                    hidden_unit=num_node, num_lstmlayer=num_lstmlayer)
                #model = lstm.buildAttentionModelDeepMultiLSTM(timesteps, input_dim, use_dropout=use_dropout, drop_prob=drop_prob,
                #                                    hidden_unit=num_node, num_lstmlayer=num_lstmlayer)
                #model = lstm.buildAttentionModelDeepMultiCNN(timesteps, input_dim, use_dropout=use_dropout, drop_prob=drop_prob,
                #                                    hidden_unit=num_node, num_lstmlayer=num_lstmlayer)
                model = lstm.buildAttentionModelMultiViewCNNLSTM(timesteps, input_dim, use_dropout=use_dropout, drop_prob=drop_prob,
                                                    hidden_unit=num_node, num_lstmlayer=num_lstmlayer)
                
        if not os.path.exists(os.path.join(modelrootdir, tag)):
            os.makedirs(os.path.join(modelrootdir, tag))

        # </editor-fold>


        # loss history of training
        lossfile_time = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
        lossfile = os.path.join(modelrootdir, tag, 'loss_history_' + lossfile_time + '.csv')
        lf = open(lossfile, 'w')
        lf.write(io_utils.delimited_list(['time', 'epoch', 'batch', 'loss', 'accuracy']))
        lf.write('\n')
        lf.close()

        # accuracy history of test
        tlf = None
        if not (X_normal_test is None or X_mutant_test is None):
            test_lossfile = os.path.join(modelrootdir, tag, 'test_history_' + lossfile_time + '.csv')
            tlf = open(test_lossfile, 'w')
            header = ['time', 'epoch', 'accuracy',
                      normal + '_precision', normal + '_recall', normal + '_f-measure',
                      mutant + '_precision', mutant + '_recall', mutant + '_f-measure',
                      'avg' + '_precision', 'avg' + '_recall', 'avg' + '_f-measure'] + [normal + '_' + str(x) for x in range(len(X_normal_test))] + [mutant + '_' + str(x) for x in range(len(X_mutant_test))]

            tlf.write(io_utils.delimited_list(header))
            tlf.write('\n')

        if model.output.get_shape().ndims == 3:
            Y_normal = io_utils.hotvec_time(nclass, 0, len(X_normal), timesteps)
            Y_mutant = io_utils.hotvec_time(nclass, 1, len(X_mutant), timesteps)
        else:
            Y_normal = io_utils.hotvec(nclass, 0, len(X_normal)).reshape([-1, nclass])
            Y_mutant = io_utils.hotvec(nclass, 1, len(X_mutant)).reshape([-1, nclass])
        print('normal: ' + str(len(X_normal)) + 'samples')
        print('mutant: ' + str(len(X_mutant)) + 'samples')
        loop_per_epoch = (len(X_normal) + len(X_mutant)) / batch_size

        starttime = time.time()

        for epoch in range(start_epoch, max_epoch):
            losslist = np.empty([0])
            acclist = np.empty([0])

            # <editor-fold desc="train on one batch">
            for batch in range(int(loop_per_epoch)):
                index_normal = rnd.randint(0, len(X_normal), int(batch_size / 2))
                index_mutant = rnd.randint(0, len(X_mutant), int(batch_size / 2))

                batch_X_normal = X_normal[index_normal]
                batch_X_mutant = X_mutant[index_mutant]
                batch_Y_normal = Y_normal[index_normal]
                batch_Y_mutant = Y_mutant[index_mutant]

                # minimal_len = min(np.min(map(len, batch_X_normal)), np.min(map(len, batch_X_mutant)))
                # batch_X_normal = np.array([b[:minimal_len] for b in batch_X_normal])
                # batch_X_mutant = np.array([b[:minimal_len] for b in batch_X_mutant])
                # batch_Y_normal = np.array([b[:minimal_len] for b in batch_Y_normal])
                # batch_Y_mutant = np.array([b[:minimal_len] for b in batch_Y_mutant])

                #stindex = rnd.randint(0, len(X_normal[0]), batch_size / 2)
                #edindex = rnd.randint(0, len(X_normal[0]) / 10, batch_size / 2)

                # in order to add noise, replace segment of normal and mtant
                #for i in range(batch_size / 2):
                #    batch_X_normal[i][stindex[i]:stindex[i] + edindex[i]] = X_mutant[index_mutant][i][
                #                                                            stindex[i]:stindex[i] + edindex[i]]
                #    batch_X_mutant[i][stindex[i]:stindex[i] + edindex[i]] = X_normal[index_normal][i][
                #                                                            stindex[i]:stindex[i] + edindex[i]]
                #    batch_Y_normal[i][stindex[i]:stindex[i] + edindex[i]] = Y_mutant[index_mutant][i][
                #                                                            stindex[i]:stindex[i] + edindex[i]]
                #    batch_Y_mutant[i][stindex[i]:stindex[i] + edindex[i]] = Y_normal[index_normal][i][
                #                                                            stindex[i]:stindex[i] + edindex[i]]

                #batch_X_normal = np.array([b for b in batch_X_normal])
                #batch_X_mutant = np.array([b for b in batch_X_mutant])
                #batch_Y_normal = np.array([b for b in batch_Y_normal])
                #batch_Y_mutant = np.array([b for b in batch_Y_mutant])

                X_train = np.concatenate((batch_X_normal, batch_X_mutant))
                Y_train = np.concatenate((batch_Y_normal, batch_Y_mutant))
                
                #X_all = np.concatenate((X_normal, X_mutant))
                #Y_all = np.concatenate((Y_normal, Y_mutant))
                #model.fit(X_all, Y_all, nb_epoch=100, validation_split=0.1, verbose=1)

                #print 'X_train.shape' + str(X_train.shape)
                #print 'Y_train.shape' + str(Y_train.shape)
                ret = model.train_on_batch(X_train, Y_train)

                losslist = np.append(losslist, ret[0])
                acclist = np.append(acclist, ret[1])
                
                lf = open(lossfile, 'a')
                lf.write(io_utils.delimited_list(
                    [str(datetime.datetime.now()), str(epoch), str(batch), str(ret[0]), str(ret[1])]))
                lf.write('\n')
                lf.close()

                sys.stdout.write('\repoch' + str(epoch) + ' ' + io_utils.progressbar(loop_per_epoch, batch))
                sys.stdout.write(' loss:' + ('%0.5f' % ret[0]) + ' acc:' + ('%0.5f' % ret[1]))
                sys.stdout.write(' ' + str(int(time.time() - starttime)) + '[s]')
                sys.stdout.flush()

            # </editor-fold>

            sys.stdout.write('\repoch' + str(epoch) + ' ' + io_utils.progressbar(loop_per_epoch, loop_per_epoch))
            sys.stdout.write(' loss:' + ('%0.5f' % np.mean(losslist)) + ' acc:' + ('%0.5f' % np.mean(acclist)))
            sys.stdout.write(' ' + str(int(time.time() - starttime)) + '[s]')
            sys.stdout.write('\n')
            sys.stdout.flush()

            if epoch % save_interval == save_interval - 1:
                model.save(lstm.model_path(modelrootdir, tag=tag, prefix=str(epoch)))
            if epoch % test_interval == test_interval - 1:
                if not (X_normal_test is None or X_mutant_test is None):
                    accuracy, normal_eval, mutant_eval, avg_eval, est_normal, est_mutant = test.return_alleval(
                        model, X_normal_test, X_mutant_test, batch_size=batch_size, normal=normal, mutant=mutant)

                    normal_resultlist = list(map(str, est_normal == 0))
                    mutant_resultlist = list(map(str, est_mutant == 1))
                    line = [str(datetime.datetime.now()), str(epoch)] + list(map(
                        str, [accuracy] + normal_eval + mutant_eval + avg_eval
                    )) + normal_resultlist + mutant_resultlist

                    tlf.write(io_utils.delimited_list(line))
                    tlf.write('\n')

                    print('test accuracy: ' + str(accuracy))

    except:
        traceback.print_exc()
        except_flg = True

    finally:
        model.save(lstm.model_path(modelrootdir, tag=tag))
        #lf.close()
        if tlf is not None:
            tlf.close()

        if except_flg:
            print('[fail]')
            sys.exit(1)

    return model
Пример #5
0
def aggregate_criteria(
    resultrootdir,
    normal,
    mutant,
    layerlist,
    num_node=const.num_node,
    criteria_list=['var', 'mean',
                   'histdist']):  #'kldiv', 'entropy',  maekawa 20181106
    # criteria_list=['var', 'histdist']):
    """
    aggregate criteria to one file
    
    Parameters
    =======================================
    resultrootdir : str
        root directory for result
    normal : str
        ID of normal worm
    mutant : str
        ID of mutant worm
    layerlist : list of str
        list of layer
    num_node : int, optional
        the number of node in a hidden layer 
    criteria_list : list of str, optional
        aggregated criteria
    """
    tag = normal + '_vs_' + mutant

    layers = []
    ranking = []

    for layer in layerlist:
        print(layer)
        onelayer = np.arange(num_node).reshape(-1, 1)
        rankonelayer = np.arange(num_node).reshape(-1, 1)

        header = ['layer', 'node']
        rankheader = ['layer', 'node']
        for criterion in criteria_list:
            # print criterion
            filename = os.path.join(resultrootdir, tag,
                                    layer + '_' + criterion + '.csv')

            if not os.path.exists(filename):
                print('cannot find ' + filename)
                continue

            array = np.loadtxt(filename, delimiter=' ')
            onelayer = np.concatenate((onelayer, array[:, 1:]), axis=1)
            f = open(filename, 'r')
            firstline = f.readline()
            f.close()
            firstline = firstline.replace('# ',
                                          '').replace('\n',
                                                      '').replace('\r',
                                                                  '').split()
            header += firstline[1:]

            if criterion == 'var':
                varmnlist = array[:, 3]
                varnmlist = array[:, 4]
            if criterion == 'mean':
                meanmnlist = array[:, 3]
                meannmlist = array[:, 4]
            if criterion == 'histdist':
                histdistlist = array[:, 1:]


#        for agreement in np.arange(0.5, 1., 0.1):
#            # 0: Correlation -[-1, 1]+
#            Correlation = varmnlist * (1. - np.abs((histdistlist[:, 0] + 1.) / 2. - agreement)) #Maekawa 20181106
#            #Correlation = meanmnlist * (1. - np.abs((histdistlist[:, 0] + 1.) / 2. - agreement))
#            # 2: Intersect -[0, 1]+
#            Intersect = varmnlist * (1. - np.abs(histdistlist[:, 2] - agreement)) #Maekawa 20181106
#            #Intersect = meanmnlist * (1. - np.abs(histdistlist[:, 2] - agreement))
#            # 3: Bhattacharyya -[1, 0]+
#            Bhattacharyya = varmnlist * (1. - np.abs(-histdistlist[:, 3] + 1. - agreement)) #Maekawa 20181106
#            #Bhattacharyya = meanmnlist * (1. - np.abs(-histdistlist[:, 3] + 1. - agreement))
#            onelayer = np.concatenate((onelayer, np.array([Correlation, Intersect, Bhattacharyya]).T), axis=1)
#            header += [mutant + '/' + normal + '_Correlation_' + f"{agreement:.1f}",
#                       mutant + '/' + normal + '_Intersect_' + f"{agreement:.1f}",
#                       mutant + '/' + normal + '_Bhattacharyya_' + f"{agreement:.1f}"]
#
#            rankonelayer = np.concatenate((rankonelayer, np.array([Intersect]).T), axis=1)
#            rankheader += [mutant + '/' + normal + '_' + f"{agreement:.1f}"]

        for agreement in np.arange(0.5, 1., 0.1):
            # 0: Correlation -[-1, 1]+
            Correlation = varnmlist * (1. - np.abs(
                (histdistlist[:, 0] + 1.) / 2. - agreement))  #Maekawa 20181106
            #Correlation = meannmlist * (1. - np.abs((histdistlist[:, 0] + 1.) / 2. - agreement))
            # 2: Intersect -[0, 1]+
            Intersect = varnmlist * (
                1. - np.abs(histdistlist[:, 2] - agreement))  #Maekawa 20181106
            #Intersect = meannmlist * (1. - np.abs(histdistlist[:, 2] - agreement))
            # 3: Bhattacharyya -[1, 0]+
            Bhattacharyya = varnmlist * (
                1. - np.abs(-histdistlist[:, 3] + 1. - agreement)
            )  #Maekawa 20181106
            #Bhattacharyya = meannmlist * (1. - np.abs(-histdistlist[:, 3] + 1. - agreement))
            onelayer = np.concatenate(
                (onelayer, np.array([Correlation, Intersect, Bhattacharyya
                                     ]).T),
                axis=1)
            header += [
                normal + '/' + mutant + '_Correlation_' + f"{agreement:.1f}",
                normal + '/' + mutant + '_Intersect_' + f"{agreement:.1f}",
                normal + '/' + mutant + '_Bhattacharyya_' + f"{agreement:.1f}"
            ]

            rankonelayer = np.concatenate(
                (rankonelayer, np.array([Intersect]).T), axis=1)
            rankheader += [normal + '/' + mutant + '_' + f"{agreement:.1f}"]

        layers.append(onelayer)
        ranking.append(rankonelayer)

    f = open(os.path.join(resultrootdir, tag, 'aggregated.csv'), 'w')
    io_utils.writeline(f, io_utils.delimited_list(header))

    for ly in range(len(layerlist)):
        for node in layers[ly]:
            line = [layerlist[ly]] + list(node)
            line[1] = int(line[1])  # node ids are changed from float to int
            io_utils.writeline(f, io_utils.delimited_list(line))

    f.close()

    f = open(os.path.join(resultrootdir, tag, 'ranking.csv'), 'w')
    io_utils.writeline(f, io_utils.delimited_list(rankheader))

    for ly in range(len(layerlist)):
        for node in ranking[ly]:
            line = [layerlist[ly]] + list(node)
            line[1] = int(line[1])  # node ids are changed from float to int
            io_utils.writeline(f, io_utils.delimited_list(line))

    f.close()
Пример #6
0
def correlation(datasetrootdir, resultrootdir, normal, mutant, layerlist,
                savebinary):
    """
    correlation between activation and existing feature
    
    Parameters
    =======================================
    datasetrootdir : str
        root directory of dataset
    resultrootdir : str
        root directory for result
    normal : str
        ID of normal worm
    mutant : str
        ID of mutant worm
    layerlist : list of str
        list of layer
    savebinary : bool
        whether scores are saved by binary
    """
    tag = normal + '_vs_' + mutant

    normalfeaturelist, header = io_utils.get_features(
        os.path.join(datasetrootdir, normal, const.allfeature))
    mutantfeaturelist, header = io_utils.get_features(
        os.path.join(datasetrootdir, mutant, const.allfeature))

    normal_timesteps = [x.shape[1] for x in normalfeaturelist]
    mutant_timesteps = [x.shape[1] for x in mutantfeaturelist]

    normalfeature = np.concatenate(normalfeaturelist, axis=1)
    mutantfeature = np.concatenate(mutantfeaturelist, axis=1)

    normalcorlist = []
    mutantcorlist = []
    conccorlist = []
    for layer in layerlist:
        print(layer)
        normalscorelist = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, normal), layer, savebinary,
            normal_timesteps)
        mutantscorelist = io_utils.get_nodescores(
            os.path.join(resultrootdir, tag, mutant), layer, savebinary,
            mutant_timesteps)

        # correlation between raw features and activations
        # normalscore, normalfeature = arrange(normalscorelist, normalfeaturelist)
        normalscore = np.concatenate(normalscorelist, axis=1)
        normalcor = np.corrcoef(
            normalfeature,
            normalscore)[len(normalfeature):, :len(normalfeature)]

        # mutantscore, mutantfeature = arrange(mutantscorelist, mutantfeaturelist)
        mutantscore = np.concatenate(mutantscorelist, axis=1)
        mutantcor = np.corrcoef(
            mutantfeature,
            mutantscore)[len(mutantfeature):, :len(mutantfeature)]

        concatenated_score = np.concatenate((normalscore, mutantscore), axis=1)
        concatenated_feature = np.concatenate((normalfeature, mutantfeature),
                                              axis=1)
        concatenated_correlation = np.corrcoef(
            concatenated_feature, concatenated_score)[
                len(concatenated_feature):, :len(concatenated_feature)]

        # resultlist.append(np.concatenate((normalcor, mutantcor, concatenated_correlation),axis=1))
        normalcorlist.append(normalcor)
        mutantcorlist.append(mutantcor)
        conccorlist.append(concatenated_correlation)

    header = ['layer', 'node'] + header
    f = open(
        os.path.join(resultrootdir, tag, 'correlation_' + normal + '.csv'),
        'w')
    io_utils.writeline(f, io_utils.delimited_list(header))

    for ly in range(len(layerlist)):
        for node in range(len(normalcorlist[ly])):
            line = [layerlist[ly], str(node)] + list(normalcorlist[ly][node])
            io_utils.writeline(f, io_utils.delimited_list(line))
    f.close()

    f = open(
        os.path.join(resultrootdir, tag, 'correlation_' + mutant + '.csv'),
        'w')
    io_utils.writeline(f, io_utils.delimited_list(header))

    for ly in range(len(layerlist)):
        for node in range(len(mutantcorlist[ly])):
            line = [layerlist[ly], str(node)] + list(mutantcorlist[ly][node])
            io_utils.writeline(f, io_utils.delimited_list(line))
    f.close()

    f = open(os.path.join(resultrootdir, tag, 'correlation.csv'), 'w')
    io_utils.writeline(f, io_utils.delimited_list(header))

    for ly in range(len(layerlist)):
        for node in range(len(conccorlist[ly])):
            line = [layerlist[ly], str(node)] + list(conccorlist[ly][node])
            io_utils.writeline(f, io_utils.delimited_list(line))
    f.close()
Пример #7
0
def compare_attended(datasetrootdir, resultrootdir, normal, mutant, layerlist,
                     savebinary):
    """
    compare existing features in attended segments
    
    Parameters
    =======================================
    datasetrootdir : str
        root directory of dataset
    resultrootdir : str
        root directory for result
    normal : str
        ID of normal worm
    mutant : str
        ID of mutant worm
    layerlist : list of str
        list of layer
    savebinary : bool
        whether scores are saved by binary
    """
    tag = normal + '_vs_' + mutant

    normalfeaturelist, header = io_utils.get_features(
        os.path.join(datasetrootdir, normal, const.allfeature))
    mutantfeaturelist, header = io_utils.get_features(
        os.path.join(datasetrootdir, mutant, const.allfeature))

    normal_timesteps = [x.shape[1] for x in normalfeaturelist]
    mutant_timesteps = [x.shape[1] for x in mutantfeaturelist]

    normalfeature = np.concatenate(normalfeaturelist, axis=1)
    mutantfeature = np.concatenate(mutantfeaturelist, axis=1)

    feature_hist_file = os.path.join(resultrootdir, tag,
                                     "attended_feature_diff.csv")
    f = open(feature_hist_file, 'w')
    io_utils.writeline(f, io_utils.delimited_list(['layer'] + header))
    for layer in layerlist:
        if layer.startswith("attention") and "last" not in layer:
            one_line = [layer]
            hist_dir = os.path.join(resultrootdir, tag,
                                    "attended_feature_hist", layer)
            if not os.path.exists(hist_dir):
                os.makedirs(hist_dir)
            print(layer)
            normalscorelist = io_utils.get_nodescores(
                os.path.join(resultrootdir, tag, normal), layer, savebinary,
                normal_timesteps)
            mutantscorelist = io_utils.get_nodescores(
                os.path.join(resultrootdir, tag, mutant), layer, savebinary,
                mutant_timesteps)

            normalscore_all = np.concatenate(normalscorelist, axis=1)
            mutantscore_all = np.concatenate(mutantscorelist, axis=1)
            attn_max = min(np.nanmax(normalscore_all),
                           np.nanmax(mutantscore_all))
            attn_min = min(np.nanmin(normalscore_all),
                           np.nanmin(mutantscore_all))
            attn_th = (attn_max - attn_min) * 0.5 + attn_min
            print("attn_max", attn_max)
            print("attn_min", attn_min)
            print("attn_th", attn_th)
            for feat_idx, feature in enumerate(header):
                print(feature)
                norm_attended_features = []
                mutant_attended_features = []
                for features, scores in zip(normalfeaturelist,
                                            normalscorelist):  #each trajectory
                    mask = scores[0] > attn_th
                    #plt.plot(np.arange(len(features[feat_idx])), features[feat_idx])
                    #plt.show()
                    #plt.plot(np.arange(len(scores[0])), scores[0])
                    #plt.show()
                    #plt.plot(np.arange(len(mask)), mask)
                    #plt.show()
                    masked_feature = features[feat_idx][
                        mask[:len(features[feat_idx])]]
                    #plt.hist(masked_feature)
                    #plt.show()
                    norm_attended_features = norm_attended_features + masked_feature.tolist(
                    )
                for features, scores in zip(mutantfeaturelist,
                                            mutantscorelist):  #each trajectory
                    mask = scores[0] > attn_th
                    #plt.plot(np.arange(len(features[feat_idx])), features[feat_idx])
                    #plt.show()
                    #plt.plot(np.arange(len(scores[0])), scores[0])
                    ##plt.show()
                    #plt.plot(np.arange(len(mask)), mask)
                    #plt.show()
                    masked_feature = features[feat_idx][
                        mask[:len(features[feat_idx])]]
                    #plt.hist(masked_feature)
                    #plt.show()
                    mutant_attended_features = mutant_attended_features + masked_feature.tolist(
                    )
                feat_max = max(np.max(norm_attended_features),
                               np.max(mutant_attended_features))
                feat_min = min(np.min(norm_attended_features),
                               np.min(mutant_attended_features))
                hist1 = histogram.calcHist(
                    norm_attended_features, 100,
                    [feat_min, feat_max])  #, density=True)
                hist1 = hist1 / np.sum(hist1)
                hist2 = histogram.calcHist(
                    mutant_attended_features, 100,
                    [feat_min, feat_max])  #, density=True)
                hist2 = hist2 / np.sum(hist2)
                bins = np.linspace(feat_min, feat_max, 101)
                inverse_overlap = 1.0 - histogram.compareHist(hist1, hist2, 2)
                one_line.append(str(inverse_overlap))
                #plt.hist(norm_attended_features)
                #plt.hist(mutant_attended_features)
                #plt.show()
                print("inverse_overlap", inverse_overlap)
                feature_fname = re.sub(r'[\\|/|:|?|.|"|<|>|\|]', '-', feature)
                np.savetxt(os.path.join(
                    hist_dir,
                    str(feat_idx) + '-' + feature_fname + '.csv'),
                           np.vstack((bins[:-1], bins[1:], hist1, hist2)).T,
                           header='start,end,' + normal + ',' + mutant,
                           delimiter=',')
                #break
            io_utils.writeline(f, io_utils.delimited_list(one_line))
            #break
    f.close()
Пример #8
0
def test(model,
         X_normal,
         X_mutant,
         batch_size,
         normal,
         mutant,
         F_normal_test=None,
         F_mutant_test=None,
         savedir=''):
    """
    Evaluate model
    
    Parameters
    =======================================
    model : keras.models
        The model predict test data.
    X_normal : 3-dimension array [numdata, time, numfeature]
        Dimension of input data.
    X_mutant : 3-dimension array [numdata, time, numfeature]
        Dimension of input data.
    batch_size : int
        The batch size for testing.
    F_normal_test : list of str, optional
        filenames of X_normal.
    F_mutant_test : list of str, optional
        filenames of X_mutant.
    savedir : str, optional
        The directory for saveing result.
        If this parameter is '', the result is not stored.
    """
    pd_normal = predict(model, X_normal, batch_size)
    pd_mutant = predict(model, X_mutant, batch_size)

    est_normal = np.argmax(pd_normal, axis=1)
    est_mutant = np.argmax(pd_mutant, axis=1)

    accuracy, precision_normal, recall_normal, fmeasure_normal, precision_mutant, recall_mutant, fmeasure_mutant = evaluate(
        est_normal, est_mutant)

    avg_precision = np.mean((precision_normal, precision_mutant))
    avg_recall = np.mean((recall_normal, recall_mutant))
    avg_fmeasure = np.mean((fmeasure_normal, fmeasure_mutant))

    print('accuracy : ' + str(accuracy))
    print('precision (' + normal + ') : ' + str(precision_normal) +
          ', precision (' + mutant + ') : ' + str(precision_mutant))
    print('recall (' + normal + ') : ' + str(recall_normal) + ', recall (' +
          mutant + ') : ' + str(recall_mutant))
    print('f-measure (' + normal + ') : ' + str(fmeasure_normal) +
          ', f-measure (' + mutant + ') : ' + str(fmeasure_mutant))
    print('Avg precision : ' + str(avg_precision))
    print('Avg recall : ' + str(avg_recall))
    print('Avg f-measure : ' + str(avg_fmeasure))

    if savedir != '':
        if not os.path.exists(savedir):
            os.makedirs(savedir)
        result_file = open(os.path.join(savedir, 'result.txt'), 'w')
        result_file.write('accuracy : ' + str(accuracy) + os.linesep)
        result_file.write('\t precision \t recall \t f-measure' + os.linesep)
        result_file.write(normal + ' \t ' + str(precision_normal) + ' \t ' +
                          str(recall_normal) + ' \t ' + str(fmeasure_normal) +
                          os.linesep)
        result_file.write(mutant + ' \t ' + str(precision_mutant) + ' \t ' +
                          str(recall_mutant) + ' \t ' + str(fmeasure_mutant) +
                          os.linesep)
        result_file.write('Avg \t ' + str(avg_precision) + ' \t ' +
                          str(avg_recall) + ' \t ' + str(avg_fmeasure))
        result_file.close()

        if F_normal_test is not None and F_mutant_test is not None:
            n_file_result_list = []
            m_file_result_list = []

            resultlist = est_normal == 0
            for i in range(len(est_normal)):
                n_file_result_list.append(
                    [F_normal_test[i], str(resultlist[i])])
                # failure_file.write(F_normal_test[i] + ',' + str(resultlist[i]) + os.linesep)

            resultlist = est_mutant == 1
            for i in range(len(est_mutant)):
                m_file_result_list.append(
                    [F_mutant_test[i], str(resultlist[i])])
                # failure_file.write(F_mutant_test[i] + ',' + str(resultlist[i]) + os.linesep)

            n_file_result_list.sort(key=io_utils.cmp_to_key(
                lambda a, b: io_utils.compare_filename(a[0], b[0])))
            m_file_result_list.sort(key=io_utils.cmp_to_key(
                lambda a, b: io_utils.compare_filename(a[0], b[0])))

            failure_file = open(os.path.join(savedir, 'failure.txt'), 'w')

            for sl in n_file_result_list:
                failure_file.write(io_utils.delimited_list(sl))
                failure_file.write('\n')
            for sl in m_file_result_list:
                failure_file.write(io_utils.delimited_list(sl))
                failure_file.write('\n')

            failure_file.close()

    return est_normal, est_mutant