Exemplo n.º 1
0
def ReadAndExtractAll(fname='../data/features_all_v2.5.pkl'):
    '''
    read all data, extract features, write to dill
    '''

    short_pid, short_data, short_label = ReadData.ReadData(
        '../../data1/short.csv')
    long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv')
    QRS_pid, QRS_data, QRS_label = ReadData.ReadData('../../data1/QRSinfo.csv')
    center_waves = ReadData.read_mean_wave('../../data1/centerwave_raw.csv')

    all_pid = QRS_pid
    feature_list, all_feature = GetAllFeature(short_data, long_data, QRS_data,
                                              long_pid, short_pid,
                                              center_waves)
    all_label = QRS_label

    print('ReadAndExtractAll done')
    print('all_feature shape: ', np.array(all_feature).shape)
    print('feature_list shape: ', len(feature_list))
    np.nan_to_num(all_feature)

    with open(fname + '_feature_list.csv', 'w') as fout:
        for i in feature_list:
            fout.write(i + '\n')

    with open(fname, 'wb') as output:
        dill.dump(all_pid, output)
        dill.dump(all_feature, output)
        dill.dump(all_label, output)
    print('write done')
    return
Exemplo n.º 2
0
def ReadAndExtractAll(fname='../data/features_all_v2.2.pkl'):
    '''
    read all data, extract features, write to dill
    '''

    short_pid, short_data, short_label = ReadData.ReadData(
        '../../data1/short.csv')
    long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv')
    QRS_pid, QRS_data, QRS_label = ReadData.ReadData('../../data1/QRSinfo.csv')
    center_waves = ReadData.read_mean_wave(
        '../../data1/center_wave_euclid_direct.csv')

    all_pid = QRS_pid
    all_feature = GetAllFeature(short_data, long_data, QRS_data, long_pid,
                                short_pid, center_waves)
    all_label = QRS_label

    print('ReadAndExtractAll done')
    print('all_feature shape: ', np.array(all_feature).shape)

    #    with open(fname, 'wb') as output:
    #        dill.dump(all_pid, output)
    #        dill.dump(all_feature, output)
    #        dill.dump(all_label, output)

    return
def run() :
    global data_path, problem_set, instance_num
    if request.method == 'POST':
        print(request.form, flush=True)
        return
    else:
        instance = request.args.get('instance')
        if instance == "" :
            return render_template("index.html",x = x,y = y,z = z)
    J = []
    problem_set = instance[:1]
    instance_num = instance[1:]
    ReadData(os.path.join(APP_STATIC, data_path + problem_set + "_"+instance_num),J)
    ga = NSGA(500,3,Job_set = J,common_due_date=120)
    ga.run()
    pareto = [item for sublist in ga.nondominated_sort() for item in sublist]
    input = [[] for _ in range(2)]
    output = [[]]
    weight = []
    for point in pareto :
        input[0].append(point.obj[0])#weighted tardiness
        input[1].append(point.obj[1])#total_flow_time
        output[0].append(point.obj[2])#pieces
        weight.append(point.weights)
    res = (DEA_analysis(input, output))
    eff = [r['Efficiency'] for r in res]
    result["weight"] = weight
    result["Flow_time"] = input[1]
    result["Tardiness"] = input[0]
    result["Piece"] = output[0]
    result["DEA_score"] = eff
    return render_template("index.html",result = result)
Exemplo n.º 4
0
def main():
    seq = 1
    data = ReadData(dsName='airsim', subType='mr', seq=seq)
    barNames = data.getNewImgNames(subtype='bar')
    pinNames = data.getNewImgNames(subtype='pin')
    dirBar = data.path + '/images_bar'
    dirPin = data.path + '/images_pin'

    if not os.path.exists(dirBar):
        os.makedirs(dirBar)
    if not os.path.exists(dirPin):
        os.makedirs(dirPin)

    N = data.imgs.shape[0]

    for i in range(0, N):
        img = data.imgs[i]
        img = np.reshape(img, (360, 720, 3))

        pin = cv2.fisheye.undistortImage(img, K, D=D_pincus, Knew=K_pincus)
        bar = cv2.fisheye.undistortImage(img, K, D=D_barrel, Knew=K_barrel)

        # cv2.imshow('input', img)
        # cv2.imshow('pin', pin)
        # cv2.imshow('bar', bar)
        # cv2.waitKey(1)
        cv2.imwrite(barNames[i], bar * 255.0)
        cv2.imwrite(pinNames[i], pin * 255.0)
        print(i / N)
Exemplo n.º 5
0
def read_data():
    long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv')

    #    mat1 = [truncate_long(ts, 9000) for ts in long_data]
    #    mat2 = [truncate_long(ts, 6000) for ts in long_data]
    mat3 = [truncate_long(ts, 3000) for ts in long_data]

    #    mat4 = [sample_long(ts, 10) for ts in mat1]
    #    mat5 = [sample_long(ts, 10) for ts in mat2]
    #    mat6 = [sample_long(ts, 10) for ts in mat3]

    label_onehot = ReadData.Label2OneHot(long_label)

    #    plt.plot(mat1[0])
    #    plt.plot(mat4[0])

    mat = mat3

    all_feature = np.array(mat, dtype=np.float32)
    all_label = np.array(label_onehot, dtype=np.float32)

    kf = StratifiedKFold(n_splits=5, shuffle=True)
    for train_index, test_index in kf.split(all_feature, long_label):
        train_data = all_feature[train_index]
        train_label = all_label[train_index]
        test_data = all_feature[test_index]
        test_label = all_label[test_index]
        break

    train_data = np.expand_dims(np.array(train_data, dtype=np.float32), axis=2)
    test_data = np.expand_dims(np.array(test_data, dtype=np.float32), axis=2)

    return train_data, train_label, test_data, test_label
Exemplo n.º 6
0
def expand_three_part():
    long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv')

    kf = StratifiedKFold(n_splits=5, shuffle=True)
    for train_index, other_index in kf.split(np.array(long_data),
                                             np.array(long_label)):
        train_data = np.array(long_data)[train_index]
        train_label = np.array(long_label)[train_index]
        train_pid = np.array(long_pid)[train_index]
        other_data = np.array(long_data)[other_index]
        other_label = np.array(long_label)[other_index]
        other_pid = np.array(long_pid)[other_index]

        kf_1 = StratifiedKFold(n_splits=2, shuffle=True)
        for val_index, test_index in kf_1.split(np.array(other_data),
                                                np.array(other_label)):
            val_data = np.array(other_data)[val_index]
            val_label = np.array(other_label)[val_index]
            val_pid = np.array(other_pid)[val_index]
            test_data = np.array(other_data)[test_index]
            test_label = np.array(other_label)[test_index]
            test_pid = np.array(other_pid)[test_index]

            break
        break

    train_data_out, train_label_out, train_data_pid_out = slide_and_cut(
        list(train_data), list(train_label), list(train_pid))
    val_data_out, val_label_out, val_data_pid_out = slide_and_cut(
        list(val_data), list(val_label), list(val_pid))
    test_data_out, test_label_out, test_data_pid_out = slide_and_cut(
        list(test_data), list(test_label), list(test_pid))

    print(
        len(set(list(train_pid)) & set(list(val_pid))
            & set(list(test_pid))) == 0)

    # with open('../../data1/expanded_three_part_window_6000_stride_500_6.pkl', 'wb') as fout:
    #     pickle.dump(train_data_out, fout)
    #     pickle.dump(train_label_out, fout)
    #     pickle.dump(val_data_out, fout)
    #     pickle.dump(val_label_out, fout)
    #     pickle.dump(test_data_out, fout)
    #     pickle.dump(test_label_out, fout)
    #     pickle.dump(test_data_pid_out, fout)

    ### use np.save to save larger than 4 GB data
    fout = open('../../data1/expanded_three_part_window_6000_stride_299.bin',
                'wb')
    np.save(fout, train_data_out)
    np.save(fout, train_label_out)
    np.save(fout, val_data_out)
    np.save(fout, val_label_out)
    np.save(fout, test_data_out)
    np.save(fout, test_label_out)
    np.save(fout, test_data_pid_out)
    fout.close()
    print('save done')
Exemplo n.º 7
0
def expand_all():
    long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv')
    data_out, label_out, pid_out = slide_and_cut(long_data, long_label,
                                                 long_pid)

    ### use np.save to save larger than 4 GB data
    fout = open('../../data1/expanded_all_window_6000_stride_500.bin', 'wb')
    np.save(fout, data_out)
    np.save(fout, label_out)
    fout.close()
    print('save done')
Exemplo n.º 8
0
def read_data():
    long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv')
    all_pid = np.array(long_pid)
    all_feature = np.array(long_data)
    all_label = np.array(long_label)
    print('read data done')
    data_out, label_out, pid_out = slide_and_cut(all_feature, all_label,
                                                 all_pid)

    pid_map = {}
    for i in range(len(all_pid)):
        pid_map[all_pid[i]] = i

    return data_out, label_out, pid_out, pid_map
Exemplo n.º 9
0
def read_data():
    X = ReadData.read_centerwave('../../data1/centerwave_resampled.csv')
    _, _, Y = ReadData.ReadData('../../data1/QRSinfo.csv')
    all_feature = np.array(X)
    print(all_feature.shape)
    all_label = np.array(Y)
    all_label_num = np.array(ReadData.Label2OneHot(Y))
    kf = StratifiedKFold(n_splits=5, shuffle=True)
    i_fold = 1
    print('all feature shape: {0}'.format(all_feature.shape))
    for train_index, test_index in kf.split(all_feature, all_label):
        train_data = all_feature[train_index]
        train_label = all_label_num[train_index]
        test_data = all_feature[test_index]
        test_label = all_label_num[test_index]
    print('read data done')
    return all_feature, all_label_num, train_data, train_label, test_data, test_label
Exemplo n.º 10
0
def read_data():
    long_pid, long_data, long_label = ReadData.ReadData( '../../data1/centerwave.csv' )
    
    mat1 = [truncate_long(ts, 9000) for ts in long_data]
    mat2 = [truncate_long(ts, 6000) for ts in long_data]
    mat3 = [truncate_long(ts, 3000) for ts in long_data]
    
    mat4 = [sample_long(ts, 10) for ts in mat1]
    mat5 = [sample_long(ts, 10) for ts in mat2]
    mat6 = [sample_long(ts, 10) for ts in mat3]
    
    label_onehot = ReadData.Label2OneHot(long_label)
    
#    plt.plot(mat1[0])
#    plt.plot(mat4[0])

    mat1 = np.expand_dims(np.array(mat1), axis=2)
    label_onehot = np.array(label_onehot)
    
    return mat1, label_onehot
Exemplo n.º 11
0
def main():
    import ReadData
    filename = r'train_data.txt'
    print '.........读取数据...........'
    UsersItems, Items = ReadData.ReadData(filename)
    print '.........分割数据...........'
    train, test = ReadData.divideData(UsersItems)

    print '.............训练推荐............'
    flag = 0    #0: 表示训练测试, 1:表示生成结果
    near_num = 200; top_num = 1
    hiddenStates_num = 10; max_iter=30
    mPLSA = CpLSA()
    if flag==0:
        mPLSA.transformData(train)
        mPLSA.process(hiddenStates_num, max_iter)
        simUsers = mPLSA.calSimUsers(near_num)

    elif flag==1:
        mPLSA.transformData(UsersItems)
        mPLSA.process(hiddenStates_num, max_iter)
Exemplo n.º 12
0
    def initHelper(self, dsName='airsim', subType='mr', seq=[1, 3, 5]):
        self.dsName = dsName
        self.numChannel = 3 if self.dsName is not 'euroc' else 1
        self.subType = subType
        self.numDataset = len(seq)
        dataObj = [ReadData(dsName, subType, seq[i]) for i in range(0, self.numDataset)]

        # get number of data points
        self.numDataList = [dataObj[i].numData for i in range(0, self.numDataset)]
        self.numTotalData = np.sum(self.numDataList)
        self.numTotalImgData = np.sum([dataObj[i].numImgs for i in range(0, self.numDataset)])
        print(self.numDataList)
        print(self.numTotalData)

        # numeric data
        print('numeric data concat')
        self.dt = np.concatenate([dataObj[i].dt for i in range(0, self.numDataset)], axis=0)
        self.du = np.concatenate([dataObj[i].du for i in range(0, self.numDataset)], axis=0)
        self.dw = np.concatenate([dataObj[i].dw for i in range(0, self.numDataset)], axis=0)
        self.dw_gyro = np.concatenate([dataObj[i].dw_gyro for i in range(0, self.numDataset)], axis=0)
        self.dtrans = np.concatenate([dataObj[i].dtr for i in range(0, self.numDataset)], axis=0)
        self.dtr_gnd = np.concatenate([dataObj[i].dtr_gnd for i in range(0, self.numDataset)], axis=0)
        self.pos_gnd = np.concatenate([dataObj[i].pos_gnd for i in range(0, self.numDataset)], axis=0)
        self.rotM_bdy2gnd = np.concatenate([dataObj[i].rotM_bdy2gnd for i in range(0, self.numDataset)], axis=0)
        self.acc_gnd = np.concatenate([dataObj[i].acc_gnd for i in range(0, self.numDataset)], axis=0)
        print('done numeric data concat')

        # img data
        print('img data concat')
        self.numTotalImgs = sum([dataObj[i].numImgs for i in range(0, self.numDataset)])
        self.imgs = np.zeros((self.numTotalImgData, self.numChannel, 360, 720), dtype=np.float32)
        s, f = 0, 0
        for i in range(0, self.numDataset):
            temp = dataObj[i].numImgs
            f = s + temp
            self.imgs[s:f, :] = dataObj[i].imgs
            dataObj[i] = None
            s = f
        dataObj = None
        print('done img data concat')
Exemplo n.º 13
0
def read_seq():
    long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv')

    seq_pid = []
    seq_data = []
    seq_label = []

    seq_len = 1000

    for i in range(len(long_pid)):
        ts = long_data[i]
        for j in range(len(ts) // seq_len):
            seq_data.append(ts[j * seq_len:(j + 1) * seq_len])
            seq_pid.append(long_pid[i])
            seq_label.append(long_label[i])

    long_label = seq_label
    seq_data = np.array(seq_data, dtype=np.float32)
    seq_data = normalize(seq_data, axis=0)

    seq_label = ReadData.Label2OneHot(seq_label)
    seq_label = np.array(seq_label, dtype=np.float32)

    all_feature = seq_data
    all_label = seq_label

    kf = StratifiedKFold(n_splits=5, shuffle=True)
    for train_index, test_index in kf.split(all_feature, long_label):
        train_data = all_feature[train_index]
        train_label = all_label[train_index]
        test_data = all_feature[test_index]
        test_label = all_label[test_index]
        break

    train_data = np.expand_dims(np.array(train_data, dtype=np.float32), axis=2)
    test_data = np.expand_dims(np.array(test_data, dtype=np.float32), axis=2)

    return train_data, train_label, test_data, test_label
Exemplo n.º 14
0
                c = "g"
            for j in F[i]:
                if c == 'r':
                    ax.scatter(j.obj[0], j.obj[1], j.obj[2], c=c, alpha=1)
                else:
                    ax.scatter(j.obj[0], j.obj[1], j.obj[2], c=c, alpha=alpha)
        ax.set_xlabel("Tardiness")
        ax.set_ylabel("Flow Time")
        ax.set_zlabel("Pieces")
        plt.show()


if __name__ == "__main__":
    path = './Experiment_Data1/data_1_200'
    J = []
    ReadData(path, J)
    ga = NSGA(3000, 5, Job_set=J, common_due_date=120)
    ga.run()
    pareto = ga.nondominated_sort()[0]

    input = [[] for _ in range(2)]
    output = [[]]

    for point in pareto:
        input[0].append(point.obj[0])
        input[1].append(point.obj[1])
        output[0].append(point.obj[2])
    ga.plot(0.5)
    res = (DEA_analysis(input, output))
    for idx, r in enumerate(res):
        print(
Exemplo n.º 15
0
    return train_data, train_label, val_data, val_label, test_data, test_label, test_pid


if __name__ == '__main__':
    '''all_data, all_label, all_pid, pid_map = read_data()
    out_feature = get_resnet_feature(all_data, all_label, all_pid, pid_map)
    print('out_feature shape: ', out_feature.shape)
    with open('../data/feat_resnet.pkl', 'wb') as fout:
        dill.dump(out_feature, fout)
    '''
    '''
    #-----------------------------------------------test--------------------------------------------
    '''
    train_data, train_label, val_data, val_label, test_data, test_label, test_pid = read_data_from_pkl(
    )
    long_pid, long_data, long_label = ReadData.ReadData('../../data1/long.csv')
    new_test = []
    new_pid = []
    new_label = []
    for j in range(len(long_pid)):
        for i in range(len(test_pid)):
            if long_pid[j] == test_pid[i]:
                new_test.append(long_data[j])
                new_pid.append(long_pid[j])
                new_label.append(long_label[j])

    out_label = ReadData.Label2OneHot(new_label)
    out_label = np.array(out_label, dtype=np.float32)

    new_test = np.array(new_test)
    new_pid = np.array(new_pid)
Exemplo n.º 16
0
__author__ = 'saghar hosseini ([email protected])'
import numpy as np
from ReadData import *
from projection import *
##########################################################################################
#                              Load Data
##########################################################################################
# path="C:/Users/sagha_000/Documents/SVN/My_SVN/TimeVaryingSocialNetworks/datasets/as-733/"
path = "F:/Saghar_SVN/TimeVaryingSocialNetworks/datasets/twitter-pol-dataset/graphs/"
dataset = ReadData(path)
edges = dataset.read_network_snapshot(1, hasHeader=True)
nodes_list = set(edges.keys())
output_path = 'F:/Saghar_SVN/TimeVaryingSocialNetworks/datasets/twitter-pol-dataset/Results/wo_OPD/'
############################################################################################
#                               Define Parameters
############################################################################################
numberOfSnapshots = 1175
numCommunity = 10
mu = 0.1
lambdah_C = 0.0
lambdah_B = 0.0
sampleFraction = 0.25
n = len(nodes_list)
K_B = 1.0
K_C = 1.0
#############################################################################################
#variables
learning_rate_C = {}
initial_state = dict()
state = dict()
visit = {}
Exemplo n.º 17
0
    ### pred
    ### alert: encase is naop, lr is anop
    pred_proba_ENCASE = clf_ENCASE.predict_prob(feature_ENCASE)[0]
    pred_proba_mimic = get_mimic_proba(long_data[0])
    pred_final = 1 / 2 * pred_proba_ENCASE + 1 / 2 * pred_proba_mimic
    print('{0}\n{1}\n{2}'.format(pred_proba_ENCASE, pred_proba_mimic,
                                 pred_final))

    pred_label = labels[np.argsort(pred_final)[-1]]

    return pred_label


if __name__ == '__main__':
    short_pid, short_data, short_label = ReadData.ReadData(
        '../../data_val/short.csv')
    long_pid, long_data, long_label = ReadData.ReadData(
        '../../data_val/long.csv')
    QRS_pid, QRS_data, QRS_label = ReadData.ReadData(
        '../../data_val/QRSinfo.csv')
    print('=' * 60)
    print('pred begin')

    # short_data = short_data[:100]
    # long_data = long_data[:3]
    # QRS_data = QRS_data[:3]
    # long_pid = long_pid[:3]
    # short_pid = short_pid[:100]

    with open('../model/v2.5_xgb5_all.pkl', 'rb') as fin:
        clf_ENCASE = dill.load(fin)
Exemplo n.º 18
0
        row.extend(CDF(ts))
        row.extend(CoeffOfVariation(ts))
        row.extend(MAD(ts))
        row.extend(QRSBasicStat(ts))
        row.extend(QRSBasicStatPointMedian(ts))
        row.extend(QRSBasicStatDeltaRR(ts))
        row.extend(QRSYuxi(ts))
        row.extend(Variability(ts))
        row.extend(minimum_ncce(ts))
        row.extend(bin_stat(ts))
        row.extend(qrs_autocorr(ts))

        ### no

        features.append(row)

        step += 1
        if step % 1000 == 0:
            print('extracting ...', step)
            # break

    print('extract QRS DONE')

    return feature_list, features


if __name__ == '__main__':
    QRS_pid, QRS_data, QRS_label = ReadData.ReadData('../../data1/QRSinfo.csv')
    tmp_features = get_qrs_feature(QRS_data[:1])
    print(len(tmp_features[1][0]))
Exemplo n.º 19
0

    features = []
    step = 0
    for ts in table:
        row = []

        row.extend(short_basic_stat(ts))
#        row.extend(short_zero_crossing(ts))
        
        features.append(row)
        
        step += 1
        if step % 100000 == 0:
            print('extracting ...')
#            break
        
    print('extract DONE')
    
    return feature_list, features



if __name__ == '__main__':
    short_pid, short_data, short_label = ReadData.ReadData( '../../data1/short.csv' )
    QRS_pid, QRS_data, QRS_label = ReadData.ReadData( '../../data1/QRSinfo.csv' )
    tmp_features = get_short_stat_wave_feature(short_data[:10], short_pid[:10], QRS_pid[0])
    print(len(tmp_features[1][0]))
    
    
    
import dill
import features_all
##import challenge_encase_mimic

##############
#### load classifier
###############
#with open('model/v2.5_xgb5_all.pkl', 'rb') as my_in:
#    clf_final = dill.load(my_in)

##############
#### read and extract
###############
##short_pid1-12   short_label1-12 are same

long_pid0, long_data0, long_label0 = ReadData.ReadData('../data1/long0.csv')
long_pid1, long_data1, long_label1 = ReadData.ReadData('../data1/long1.csv')
long_pid2, long_data2, long_label2 = ReadData.ReadData('../data1/long2.csv')
long_pid3, long_data3, long_label3 = ReadData.ReadData('../data1/long3.csv')
long_pid4, long_data4, long_label4 = ReadData.ReadData('../data1/long4.csv')
long_pid5, long_data5, long_label5 = ReadData.ReadData('../data1/long5.csv')
long_pid6, long_data6, long_label6 = ReadData.ReadData('../data1/long6.csv')
long_pid7, long_data7, long_label7 = ReadData.ReadData('../data1/long7.csv')
long_pid8, long_data8, long_label8 = ReadData.ReadData('../data1/long8.csv')
long_pid9, long_data9, long_label9 = ReadData.ReadData('../data1/long9.csv')
long_pid10, long_data10, long_label10 = ReadData.ReadData(
    '../data1/long10.csv')
long_pid11, long_data11, long_label11 = ReadData.ReadData(
    '../data1/long11.csv')

short_pid0, short_data0, short_label0 = ReadData.ReadData(
        estimate = np.rint(prob.item(0))
        estimate_avg = np.rint(prob_avg.item(0))
        if (i in edges.keys() and j in edges[i]) or j == i:
            error += abs(1.0 - estimate)
            error_avg += abs(1.0 - estimate_avg)
        else:
            error += abs(0.0 - estimate)
            error_avg += abs(0.0 - estimate_avg)
    return error, error_avg


#########################################################################################
if __name__ == '__main__':
    path = "F:/Saghar_SVN/TimeVaryingSocialNetworks/datasets/as-733/"
    # path="C:/Users/sagha_000/Documents/SVN/My_SVN/TimeVaryingSocialNetworks/datasets/as-733/"
    Config.dataset = ReadData(path)
    numberOfSnapshots = Config.numberOfSnapshots
    numCommunity = Config.numCommunity
    error = [0.0] * numberOfSnapshots
    error_avg = error[:]
    error_file = path + 'error_output.csv'
    test_array = []
    num_cores = multiprocessing.cpu_count()
    for t in range(numberOfSnapshots):
        Config.edges = Config.dataset.read_network_snapshot(t)
        filename = path + 'state_output' + str(t) + '.csv'
        Config.state = read_in_states(filename, has_header=True)
        Config.nodes_list = set(Config.edges.keys())
        Config.nodes_list.union(Config.state.keys())
        #make it parallel
        for i in Config.state.keys():
Exemplo n.º 22
0
def train(train_dir=None, val_dir=None, mode='train'):
    if FLAGS.model == 'lstm':
        model = cnn_lstm_otc_ocr.LSTMOCR(mode)
    else:
        print("no such model")
        sys.exit()

    #开始构建图
    model.build_graph()
    #########################read  train   data###############################
    print('loading train data, please wait---------------------')
    train_feeder = utils.DataIterator(data_dir=FLAGS.train_dir, istrain=True)
    print('get image  data size: ', train_feeder.size)
    filename = train_feeder.image
    label = train_feeder.labels
    print(len(filename))
    train_data = ReadData.ReadData(filename, label)
    ##################################read  test   data######################################
    print('loading validation data, please wait---------------------')
    val_feeder = utils.DataIterator(data_dir=FLAGS.val_dir, istrain=False)
    filename1 = val_feeder.image
    label1 = val_feeder.labels
    test_data = ReadData.ReadData(filename1, label1)
    print('val get image: ', val_feeder.size)
    ##################计算batch 数
    num_train_samples = train_feeder.size
    num_batches_per_epoch = int(num_train_samples /
                                FLAGS.batch_size)  # 训练集一次epoch需要的batch数
    num_val_samples = val_feeder.size
    num_batches_per_epoch_val = int(num_val_samples /
                                    FLAGS.batch_size)  # 验证集一次epoch需要的batch数
    ###########################data################################################

    with tf.device('/cpu:0'):

        config = tf.ConfigProto(allow_soft_placement=True)

        #######################read  data###################################

        with tf.Session(config=config) as sess:
            #初始化data迭代器
            train_data.init_itetator(sess)
            test_data.init_itetator(sess)
            train_data = train_data.get_nex_batch()
            test_data = test_data.get_nex_batch()
            #全局变量初始化
            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=100)  #存储模型
            train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train',
                                                 sess.graph)

            #导入预训练模型
            if FLAGS.restore:
                ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
                if ckpt:
                    # the global_step will restore sa well
                    saver.restore(sess, ckpt)
                    print('restore from the checkpoint{0}'.format(ckpt))
                else:
                    print("No checkpoint")

            print(
                '=============================begin training============================='
            )
            accuracy_res = []
            epoch_res = []
            tmp_max = 0
            tmp_epoch = 0

            for cur_epoch in range(FLAGS.num_epochs):

                train_cost = 0
                batch_time = time.time()
                for cur_batch in range(num_batches_per_epoch):
                    #获得这一轮batch数据的标号##############################
                    #read_data_start = time.time()
                    batch_inputs, batch_labels = sess.run(train_data)
                    #print('read data timr',time.time()-read_data_start)
                    process_data_start = time.time()
                    #print('233333333333333',type(batch_labels))

                    new_batch_labels = utils.sparse_tuple_from_label(
                        batch_labels.tolist())  # 对了
                    batch_seq_len = np.asarray(
                        [FLAGS.max_stepsize for _ in batch_inputs],
                        dtype=np.int64)
                    #print('process data timr', time.time() - process_data_start)

                    #train_data_start = time.time()
                    #print('2444444',batch_inputs.shape())
                    feed = {
                        model.inputs: batch_inputs,
                        model.labels: new_batch_labels,
                        model.seq_len: batch_seq_len
                    }
                    # if summary is needed
                    # batch_cost,step,train_summary,_ = sess.run([cost,global_step,merged_summay,optimizer],feed)

                    summary_str, batch_cost, step, _ = \
                        sess.run([model.merged_summay, model.cost, model.global_step,
                                  model.train_op], feed)

                    # calculate the cost
                    train_cost += batch_cost * FLAGS.batch_size
                    #print  train_cost
                    #train_writer.add_summary(summary_str, step)
                    #print('train data timr', time.time() - train_data_start)
                    # save the checkpoint
                    if step % FLAGS.save_steps == 1:
                        if not os.path.isdir(FLAGS.checkpoint_dir):
                            os.mkdir(FLAGS.checkpoint_dir)
                        logger.info('save the checkpoint of{0}', format(step))
                        saver.save(sess,
                                   os.path.join(FLAGS.checkpoint_dir,
                                                'ocr-model'),
                                   global_step=step)
                    if (cur_batch) % 100 == 1:
                        print('batch', cur_batch, ': time',
                              time.time() - batch_time, 'loss', batch_cost)
                        batch_time = time.time()
                    # train_err += the_err * FLAGS.batch_size
                    # do validation
                    if step % FLAGS.validation_steps == 0:
                        validation_start_time = time.time()
                        acc_batch_total = 0
                        lastbatch_err = 0
                        lr = 0
                        for j in range(num_batches_per_epoch_val):
                            batch_inputs, batch_labels = sess.run(test_data)
                            new_batch_labels = utils.sparse_tuple_from_label(
                                batch_labels.tolist())  # 对了
                            batch_seq_len = np.asarray(
                                [FLAGS.max_stepsize for _ in batch_inputs],
                                dtype=np.int64)
                            val_feed = {
                                model.inputs: batch_inputs,
                                model.labels: new_batch_labels,
                                model.seq_len: batch_seq_len
                            }


                            dense_decoded, lr = \
                                sess.run([model.dense_decoded, model.lrn_rate],
                                         val_feed)

                            acc = utils.accuracy_calculation(
                                batch_labels.tolist(),
                                dense_decoded,
                                ignore_value=-1,
                                isPrint=True)
                            acc_batch_total += acc
                        accuracy = (acc_batch_total *
                                    FLAGS.batch_size) / num_val_samples
                        accuracy_res.append(accuracy)
                        epoch_res.append(cur_epoch)
                        if accuracy > tmp_max:
                            tmp_max = accuracy
                            tmp_epoch = cur_epoch
                        avg_train_cost = train_cost / (
                            (cur_batch + 1) * FLAGS.batch_size)

                        # train_err /= num_train_samples
                        now = datetime.datetime.now()
                        log = "{}/{} {}:{}:{} Epoch {}/{}, " \
                              "max_accuracy = {:.3f},max_Epoch {},accuracy = {:.3f},acc_batch_total = {:.3f},avg_train_cost = {:.3f}, " \
                              " time = {:.3f},lr={:.8f}"

                        print(
                            log.format(now.month, now.day, now.hour,
                                       now.minute, now.second, cur_epoch + 1,
                                       FLAGS.num_epochs, tmp_max, tmp_epoch,
                                       accuracy, acc_batch_total,
                                       avg_train_cost,
                                       time.time() - validation_start_time,
                                       lr))
Exemplo n.º 23
0
        F1_list.append(F1_test)
        wrong_stat.extend(MyEval.WrongStat(i_fold, pred, test_label, test_pid))
        i_fold += 1

        clf_final_list.append(clf_final)

    avg_f1 = np.mean(F1_list)
    print('\n\nAvg F1: ', avg_f1)
    fout.write(str(avg_f1) + '=============================\n')
    wrong_stat = pd.DataFrame(wrong_stat,
                              columns=['i_fold', 'pid', 'gt', 'pred'])
    # wrong_stat.to_csv('../../stat/wrong_stat_f1'+str(np.mean(F1_list))+'.csv')


if __name__ == "__main__":

    all_feature = ReadData.read_centerwave('../../data1/centerwave_raw.csv')
    all_pid, _, all_label = ReadData.ReadData('../../data1/QRSinfo.csv')
    # print(sorted([len(i) for i in all_feature])[:100])
    all_feature = [np.array(i) for i in all_feature]

    # all_pid = all_pid[:5]
    # all_label = all_label[:5]
    # all_feature = all_feature[:5]

    print('read data done')
    fout = open('../../logs/knn', 'w')
    for i in range(100):
        TestKNN(all_pid, all_feature, all_label, fout)
    fout.close()
Exemplo n.º 24
0
        clf_ENCASE = dill.load(fin)

    ### extract features
    feature_ENCASE = GetAllFeature_test(short_data, long_data, QRS_data,
                                        long_pid, short_pid)
    feature_ENCASE[0][-1] = feature_ENCASE[0][-1] + 0.0000001

    ### pred
    ### alert: encase is naop, lr is anop
    pred_proba_ENCASE = clf_ENCASE.predict_prob(feature_ENCASE)[0]
    pred_proba_mimic = get_mimic_proba(long_data[0])
    pred_final = 1 / 2 * pred_proba_ENCASE + 1 / 2 * pred_proba_mimic
    print('{0}\n{1}\n{2}'.format(pred_proba_ENCASE, pred_proba_mimic,
                                 pred_final))

    pred_label = labels[np.argsort(pred_final)[-1]]

    return pred_label


if __name__ == '__main__':
    short_pid, short_data, short_label = ReadData.ReadData('data1/short.csv')
    long_pid, long_data, long_label = ReadData.ReadData('data1/long.csv')
    QRS_pid, QRS_data, QRS_label = ReadData.ReadData('data1/QRSinfo.csv')
    print('=' * 60)
    print('pred begin')

    res = pred_one_sample(short_data[0:40], long_data[0:1], QRS_data[0:1],
                          long_pid[0:1], short_pid[0:40])
    print('pred done, the label of {0} is {1}'.format(long_pid[0], res))
Exemplo n.º 25
0
import ReadData
import Classifier

train_acc = []
test_acc = []
cm = []

for i in range(10):
    segsamplesize = 8
    firstk = 20
    secondk = 6
    numrdsamples = firstk * 150
    testtrainsplit = 0.2

    ##Read Files
    rd = ReadData.ReadData(testtrainsplit)
    rd.ReadFiles()
    rd.SeparateData()

    ##Generate Training data
    #Usage Argument : Training data, segment size, Outer k means, Inner K means, Random sample size for Outer K means
    trvq = GenTrainingData.GenTrainingData(rd.X_train, segsamplesize, firstk,
                                           secondk, numrdsamples)
    trvq.GenClusterData()
    trvq.GenClusters()
    trvq.VectorQuantization()

    ##Generate Training data
    #Usage Argument : Training data, firstcentroids, secondcentroids, segment size, Outer k means, Inner K means

    tevq = GenTestData.GenTestData(rd.X_test, trvq.firstcentroids,
Exemplo n.º 26
0
import numpy as np
import ReadData
import Sequence
from sklearn.metrics import roc_auc_score
import Distance


#name="human_muscle"
#name="fly_blastoderm"
name="human_HBB"

## 获取数据集 整个数据集,正数据集,负数据集 都是序列,没有标签
#datasets,pos,neg=rd.getData2("fly_blastoderm")
  ## 获取数据 kmer 从2-->6
print("---------",name,"------------")
rd=ReadData.ReadData()

datasets,pos,neg=rd.getData2(name)
## 正例数据集条数
possize=len(pos)
## 合并两个list
datasets=pos+neg


sq=Sequence.Sequence()

flag=True
    ## 获取整个数据集的kmer集合,dict
    
    ## 获取字典集合
d2set,d2dic=sq.getSeqKerSet(datasets,2)
Exemplo n.º 27
0
            ans[i, 0] = A[i, 0] / B[i, i]
    return ans


def K_effective(newmark, mass, stiffness):
    b1 = newmark.b1
    K = b1 * mass + stiffness
    return K


dt = 0.01
clock = Clock(dt, [0, 2])
newmark = Newmark(dt)
assembly = Assembly(newmark)

ReadData(assembly, '1FL.xlsx')
neq = assembly.eq_number
output_check(assembly, 'formal_1FL_check.txt')

with open('formal_1FL.txt', 'w') as p:
    p.write('%8s %7s %12s' % ('time', 'iter', 'ux_top'))

tol = 1e-4
# f = TimeSeries(0, 1, [0, 100, 100])
seismic = assembly.seismics[0]
# ef = dok_matrix([[f.at(clock.current_time)/2], [0], [0], [0], [0], [0]])

while not clock.is_end:
    ef = LM.force_ex(assembly, seismic.at(clock.current_time))
    P = assembly.get_internal_force('d', 'pos_origin')
    a = assembly.get_dof('a')