def get_file_symbol_info(self,set_of_files):
                '''
                The function is used for calculating symbol count  
                '''
                count_no_files = len(set_of_files)
                load_obj = loadData()
                dict_sym_to_file={}
                count_sym={}
                total_count_sym=0
                for fileName in set_of_files:   
                    root_obj, trace_obj_dict = load_obj.loadInkml(fileName)    
                    symbols = load_obj.get_symbol(root_obj,trace_obj_dict)

                    for sym_obj in symbols:
                        sym_id=sym_obj.symbol_class
                        if  sym_id in dict_sym_to_file:
                            dict_sym_to_file[sym_id].append(fileName)
                        else:
                           dict_sym_to_file[sym_id]=[]
                           dict_sym_to_file[sym_id].append(fileName)
                            
                        if sym_id in count_sym:
                            count_sym[sym_id] = count_sym[sym_id] + 1
                        else:
                            count_sym[sym_id] = 1
                        total_count_sym = total_count_sym +  1
                        
                return  dict_sym_to_file,count_sym,total_count_sym   
Пример #2
0
    def extract_data_without_pca_relationship(self, file_set,
                                              file_path_till_Traininkml):
        '''
        The function is used to extract data from inkml files
        '''
        load_obj = loadData()
        count = 0
        X_train = []
        y_train = []
        stroke_to_pixel = []
        count_fName = 0
        for fileName in file_set:
            count_fName = count_fName + 1
            print("File No=%d ") % (count_fName)
            fileName = fileName.strip("\'")
            fileName = fileName.replace(
                '/home/sbp3624/PatternRecog/TrainINKML_v3/',
                file_path_till_Traininkml)
            root_obj, trace_obj_dict = load_obj.loadInkml(fileName)
            symbols = load_obj.get_symbol(root_obj, trace_obj_dict)
            # get relationship data

            for symbol in symbols:
                features = symbol.get_features()
                X_train.append(features)
                y_train.append(symbol.symbol_class)
        N = len(X_train)

        return X_train, y_train
    def get_file_symbol_info(self, set_of_files):
        '''
                The function is used for calculating symbol count  
                '''
        count_no_files = len(set_of_files)
        load_obj = loadData()
        dict_sym_to_file = {}
        count_sym = {}
        total_count_sym = 0
        for fileName in set_of_files:
            root_obj, trace_obj_dict = load_obj.loadInkml(fileName)
            symbols = load_obj.get_symbol(root_obj, trace_obj_dict)

            for sym_obj in symbols:
                sym_id = sym_obj.symbol_class
                if sym_id in dict_sym_to_file:
                    dict_sym_to_file[sym_id].append(fileName)
                else:
                    dict_sym_to_file[sym_id] = []
                    dict_sym_to_file[sym_id].append(fileName)

                if sym_id in count_sym:
                    count_sym[sym_id] = count_sym[sym_id] + 1
                else:
                    count_sym[sym_id] = 1
                total_count_sym = total_count_sym + 1

        return dict_sym_to_file, count_sym, total_count_sym
 def extract_data_without_pca_relationship(self,file_set,file_path_till_Traininkml):
     '''
     The function is used to extract data from inkml files
     '''
     load_obj = loadData()
     count = 0
     X_train= []
     y_train=[]
     stroke_to_pixel=[]
     count_fName=0
     for fileName in file_set:
         count_fName=count_fName+1
         print ("File No=%d ") % (count_fName)
         fileName=fileName.strip("\'")
         fileName=fileName.replace('/home/sbp3624/PatternRecog/TrainINKML_v3/',file_path_till_Traininkml)
         root_obj, trace_obj_dict = load_obj.loadInkml(fileName)
         symbols = load_obj.get_symbol(root_obj,trace_obj_dict)
         # get relationship data
         
         
         for symbol in symbols:
             features = symbol.get_features()
             X_train.append(features)
             y_train.append(symbol.symbol_class)
     N=len(X_train) 
     
     return X_train,y_train
def main():
    maxProcesses = multiprocessing.cpu_count()
    lamb = 0
    lambdaMaxValue = 1
    lambdaUpdateStepSize = 0.05
    rho = .0001
    dataSize = 10
    type = 1
    (trainingSet, x_train, y_train, x_test, y_test, trainSize, testSize,
     sizeOptimizationVariable) = loadData(dataSize, type)
    G1 = generateGraph(60)
    nodes = G1.number_of_nodes()
    edges = G1.number_of_edges()
    print "Number of Nodes = ", nodes, " , Number of Edges = ", edges
    print "Diameter is ", nx.diameter(G1)
    # Initialize ADMM variables
    (A, sqn, sqp, x, u, z) = initializeADMM(G1, sizeOptimizationVariable)
    plot1 = list()
    plot2 = list()
    plot3 = list()
    while (lamb <= lambdaMaxValue or lamb == 0):
        print "For lambda = ", lamb
        start_time = time.time()
        (x, u, z,
         localVariables) = runADMM(G1, lamb, rho + math.sqrt(lamb), x, u, z,
                                   trainingSet, A, sqn, sqp, maxProcesses,
                                   sizeOptimizationVariable, trainSize,
                                   testSize, x_train, y_train, x_test, y_test)
        print("ADMM finished in %s seconds" % (time.time() - start_time))
        (parameters, trainRMSE) = RRLayer(x, x_train, y_train[:,
                                                              0].transpose(),
                                          sizeOptimizationVariable - 1)
        testRMSE = getAccuracy(x, parameters, x_test, y_test[:, 0].transpose(),
                               sizeOptimizationVariable - 1)
        plot1.append(trainRMSE)
        plot2.append(testRMSE)
        plot3.append(lamb)
        print "trainRMSE =", trainRMSE, "testRMSE =", testRMSE
        showImageMatrix(x, False, False, '', 'x matrix')
        showImageMatrix(u, False, False, '', 'u matrix')
        showImageMatrix(z, False, False, '', 'z matrix')
        showclusterPerformance(
            x, False, True, 'train:' + str(trainRMSE) + ' test: ' +
            str(testRMSE) + ' lamda: ' + str(lamb),
            "EXP/norm/" + str(lamb) + ".png")
        if (lamb == 0):
            lamb = 0.01
        else:
            lamb = lamb + lambdaUpdateStepSize
    showOverallAccuracy(plot1, plot2, plot3, False, True)
    def classification(self,file_path_till_Traininkml,classifier_obj,str_opt):
        '''
        The function is used to classify the data.
        Input:
        file_path_till_Traininkml : Path of inkml files
        classifier_obj : Classifier object
        str_opt: Test or Train 

        '''
        load_obj = loadData()
        lg_folder_name="classification_"+str_opt
        file_write_obj=FileWrite(lg_folder_name)
        flag=False
        with open('split_files.txt','r') as f:
            for line in f:
                if flag:
                    files=line
                    files=files.strip("Set([")
                    files=files.strip("])\n")            
                    list_files=files.split(', ')
                    break
                
                elif line.startswith(str_opt):
                    flag=True
                    continue
        
        count_traces=0
        count=0
        print 'Classification started'
        count=0
        for fileName in list_files:
            count=count+1
            fileName=fileName.strip("'")
            print "count= %d" % (count)
            fileName=fileName.replace("/home/sbp3624/PatternRecog/TrainINKML_v3/",file_path_till_Traininkml)
            print fileName+"\n"
            root_obj, trace_obj_dict = load_obj.loadInkml(fileName)
            symbols = load_obj.get_symbol(root_obj,trace_obj_dict)
            symbol_list=symbols
            
            X_test=[]
            count_traces=0
            for symbol in symbol_list:
                features=symbol.get_features()
                X_test.append(features)
                count_traces=count_traces+len(symbol.symbol_list)
            X_test_final=np.asarray(X_test)
            predict_labels=classifier_obj.predict(X_test_final)
            #Write this to lg file.
            file_write_obj.write_to_lg(predict_labels,fileName,symbol_list,count_traces,lg_folder_name)              
    def sym_segmentation(self,classifier_obj,file_path_till_traininkml,str_opt,rel_classifier_obj):
        '''
        The function calls methods from MinimumSpanningTree to segment,classify and parse symbols
        Input 
        classifier_obj - Classifier pretrained model
        file_path_till_traininkml - path to inkml file
        str_opt - Train or Test
        rel_classifier_obj - Realationship classifier pretrained model.   
        '''
        load_obj = loadData()
        m=MinimumSpanningTree()
        symbol_obj=Symbol()
        
       
      
        lg_folder_name="parsing_"+str_opt
        file_write_obj=FileWrite(lg_folder_name)
      
       
        flag=False
        
        with open('split_files.txt','r') as f:
            for line in f:
                if flag:
                    files=line
                    files=files.strip("Set([")
                    files=files.strip("])\n")            
                    list_files=files.split(', ')
                    break
                

                elif line.startswith(str_opt):
                    flag=True
                    continue

        count=0


        for fileName in list_files:
            count=count+1
            fileName=fileName.strip("'")
            print "count= %d" % (count)
            fileName=fileName.replace("/home/sbp3624/PatternRecog/TrainINKML_v3/",file_path_till_traininkml)
            root_obj, trace_obj_dict = load_obj.loadInkml(fileName)
          
         
            m.get_segmentation(trace_obj_dict,classifier_obj,symbol_obj,file_write_obj,fileName,lg_folder_name,rel_classifier_obj)
Пример #8
0
    def sym_segmentation(self, classifier_obj, file_path_till_traininkml,
                         str_opt, rel_classifier_obj):
        '''
        The function calls methods from MinimumSpanningTree to segment,classify and parse symbols
        Input 
        classifier_obj - Classifier pretrained model
        file_path_till_traininkml - path to inkml file
        str_opt - Train or Test
        rel_classifier_obj - Realationship classifier pretrained model.   
        '''
        load_obj = loadData()
        m = MinimumSpanningTree()
        symbol_obj = Symbol()

        lg_folder_name = "parsing_" + str_opt
        file_write_obj = FileWrite(lg_folder_name)

        flag = False

        with open('split_files.txt', 'r') as f:
            for line in f:
                if flag:
                    files = line
                    files = files.strip("Set([")
                    files = files.strip("])\n")
                    list_files = files.split(', ')
                    break

                elif line.startswith(str_opt):
                    flag = True
                    continue

        count = 0

        for fileName in list_files:
            count = count + 1
            fileName = fileName.strip("'")
            print "count= %d" % (count)
            fileName = fileName.replace(
                "/home/sbp3624/PatternRecog/TrainINKML_v3/",
                file_path_till_traininkml)
            root_obj, trace_obj_dict = load_obj.loadInkml(fileName)

            m.get_segmentation(trace_obj_dict, classifier_obj, symbol_obj,
                               file_write_obj, fileName, lg_folder_name,
                               rel_classifier_obj)
Пример #9
0
    def sym_parsing(self, rel_classifier_obj, file_path_till_traininkml,
                    str_opt):
        '''
        The function calls methods from MinimumSpanningTree class to segment and classify symbols and then parses the symbols, finally
        writing it to an lg files.
        '''
        load_obj = loadData()
        e = Graph()
        symbol_obj = Symbol()
        lg_folder_name = "parsing_" + str_opt
        file_write_obj = FileWrite(lg_folder_name)
        flag = False

        with open('split_files.txt', 'r') as f:
            for line in f:
                if flag:
                    files = line
                    files = files.strip("Set([")
                    files = files.strip("])\n")
                    list_files = files.split(', ')
                    break

                elif line.startswith(str_opt):
                    flag = True
                    continue

        count = 0

        for fileName in list_files:
            count = count + 1
            fileName = fileName.strip("'")
            print "count= %d" % (count)
            fileName = fileName.replace(
                "/home/sbp3624/PatternRecog/TrainINKML_v3/",
                file_path_till_traininkml)
            root_obj, trace_obj_dict = load_obj.loadInkml(fileName)
            symbols = load_obj.get_symbol(root_obj, trace_obj_dict)
            adj_matrix, dict_mapping_Symbol_index, index_to_symbol = e.LineOfSight(
                symbols, rel_classifier_obj)
            dict_map_rel_to_syms = self.get_parse_layout(
                adj_matrix, dict_mapping_Symbol_index, index_to_symbol,
                rel_classifier_obj)
            dict_map_rel_to_syms = dict_map_rel_to_syms[
                0]  # because the funtion returns a tuple
            self.write_to_lg(fileName, symbols, dict_map_rel_to_syms,
                             lg_folder_name)
    def sym_parsing(self,rel_classifier_obj,file_path_till_traininkml,str_opt):
      
        '''
        The function calls methods from MinimumSpanningTree class to segment and classify symbols and then parses the symbols, finally
        writing it to an lg files.
        '''
        load_obj = loadData()
        e=Graph()
        symbol_obj=Symbol()
        lg_folder_name="parsing_"+str_opt
        file_write_obj=FileWrite(lg_folder_name)
        flag=False
        
        with open('split_files.txt','r') as f:
            for line in f:
                if flag:
                    files=line
                    files=files.strip("Set([")
                    files=files.strip("])\n")            
                    list_files=files.split(', ')
                    break
                

                elif line.startswith(str_opt):
                    flag=True
                    continue

        count=0


        for fileName in list_files:
            count=count+1
            fileName=fileName.strip("'")
            print "count= %d" % (count)
            fileName=fileName.replace("/home/sbp3624/PatternRecog/TrainINKML_v3/",file_path_till_traininkml)
            root_obj, trace_obj_dict = load_obj.loadInkml(fileName)
            symbols = load_obj.get_symbol(root_obj,trace_obj_dict)
            adj_matrix,dict_mapping_Symbol_index,index_to_symbol=e.LineOfSight(symbols,rel_classifier_obj)
            dict_map_rel_to_syms=self.get_parse_layout(adj_matrix,dict_mapping_Symbol_index,index_to_symbol,rel_classifier_obj)
            dict_map_rel_to_syms=dict_map_rel_to_syms[0]  # because the funtion returns a tuple
            self.write_to_lg(fileName,symbols,dict_map_rel_to_syms,lg_folder_name)
def main(argv):

    load_obj = loadData()
    file_xml_path = argv[0]
    file_location = argv[1]
    no_of_train = int(argv[2])
    no_of_val = int(argv[3])
    dict_data, chars, list_data = load_obj.loadData_word(
        file_location, file_xml_path)
    len_dict = len(dict_data)
    list_data_random = []
    dict_data_random = {}
    while (len(list_data) > 0):
        rand_seed = np.random.randint(len(list_data))
        img_id = list_data[rand_seed]
        list_data_random.append(img_id)
        dict_data_random[img_id] = dict_data[img_id]
        list_data.remove(img_id)

    train_data = list_data_random[:no_of_train]
    val_data = list_data_random[no_of_train:no_of_train + no_of_val]
    test_data = list_data_random[no_of_train + no_of_val:]
    print('Total no of training samples created =%d') % (len(train_data))
    print('Total no of validation samples created =%d') % (len(val_data))
    print('Total no of testing samples created =%d') % (len(test_data))
    file_dict_data = open('dict_data', 'wb')
    file_data_chars = open('chars_data', 'wb')
    file_data_train = open('training_data', 'wb')
    file_data_val = open('validation_data', 'wb')
    file_data_test = open('testing_data', 'wb')
    cp.dump(dict_data_random, file_dict_data)
    cp.dump(train_data, file_data_train)
    cp.dump(val_data, file_data_val)
    cp.dump(test_data, file_data_test)
    cp.dump(chars, file_data_chars)
    file_dict_data.close()
    file_data_train.close()
    file_data_val.close()
    file_data_test.close()
    file_data_chars.close()
    print 'Finished Creating data'
Пример #12
0
def ATT():
    aspli, lgsubi = intentional_attack(loadData())
    asplr, lgsubr = random_attack(loadData())
    draw_attect(aspli, lgsubi, asplr, lgsubr)
Пример #13
0
def Core():
    core = coreness(loadData())
    showMessage('Coreness:', core)
Пример #14
0
def Clust():
    clust = clustering(loadData())
    showMessage('clustering', clust)
Пример #15
0
def ASPl():
    aspl = Aspl(loadData())
    showMessage('Average Shortest Path Longth', aspl)
Пример #16
0
def distrib():
    dis = distribution(loadData())
    showMessage('distribution', dis)
Пример #17
0
def degree():
    showdegree(loadData())
    nodeNum, edgeNum, averDegree = showother(loadData())
    showMessage('information', 'nodeNum:', nodeNum, 'edgeNum:', edgeNum,
                'averDegree:', averDegree)
Пример #18
0
def drawG():
    nx.draw(drawgraph(loadData()), node_size=20)
    plt.show()
# 得到CNN输出、softmax输出以及预测结果
logits, softmax, prediction = combine_layers(s_image_holder, t_image_holder)

# 得到网络的损失
loss = loss(logits, label_holder)

# 定义优化器
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

# 得到正确率
top_k_op = tf.nn.in_top_k(logits, label_holder, 1, name='top_k_op')

# 读入数据
import loadData

train_img, train_label = loadData()
train_img_batch, train_label_batch = tf.train.shuffle_batch(
    [train_img, train_label],
    batch_size=batch_size,
    capacity=2000,
    min_after_dequeue=128)

# 初始化
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# tensorboard合并
merged = tf.summary.merge_all()
writer = tf.summary.FileWriter("./logs/", sess.graph)

# 启动线程队列
# 本次测试用不到优化器、损失函数等
'''
***********************************************************************************************************
***********************************************************************************************************
                                                VGGNet定义完毕
***********************************************************************************************************
***********************************************************************************************************

'''

prediction, softmax, logits = VGGNet(path=VGG_16_Model)

# 利用tf.nn.in_top_k计算输出结果top k的准确率,这里就是用top 1
top_k_op = tf.nn.in_top_k(logits, label_holder, 1, name='top_k_op')

test_img, test_label = loadData(type='validation')
test_img_batch, test_label_batch = tf.train.batch([test_img, test_label],
                                                  batch_size=batch_size,
                                                  capacity=num_examples)

# 全局参数初始化
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

# 读取模型参数
saver = tf.train.Saver()
model_path = "./VGGNet_19/model_parameters.ckpt"
saver.restore(sess, model_path)
print('CNN_model loaded')

# 这一步是启动图片数据增强的线程队列,一共使用16个线程来加速
Пример #21
0
num_units = 128

#callBacks = [EarlyStopping(monitor='loss', patience=10, min_delta = 0.01 , verbose=0, mode='min'),TensorBoard(log_dir='output/graph_10Per', histogram_freq=1), History()]

callBacks = [
    EarlyStopping(monitor='val_loss',
                  patience=10,
                  min_delta=0.01,
                  verbose=1,
                  mode='min'),
    TensorBoard(log_dir='output/graph_10Per', histogram_freq=1),
    History()
]

#Load Dataset
X_train, Y_train, X_val, Y_val = loadData()

#Create Model
model = createModel(num_units, input_dim, output_dim, x_seq_length,
                    y_seq_length)

model.summary()

callBacks = [
    EarlyStopping(monitor='loss',
                  patience=10,
                  min_delta=0.01,
                  verbose=0,
                  mode='auto'),
    TensorBoard(log_dir='output/graph_10Per', histogram_freq=1),
    History()
Пример #22
0
    def get_relationship_data(self, file_path_till_traininkml, str_opt,
                              file_path_lg_train):
        '''
        The method extracts the data required for training relationship classifier.
        '''

        load_obj = loadData()
        symbol_obj = Symbol()
        X_rel_train = []
        y_rel_train = []
        flag = False
        with open('split_files.txt', 'r') as f:
            for line in f:
                if flag:
                    files = line
                    files = files.strip("Set([")
                    files = files.strip("])\n")
                    list_files = files.split(', ')
                    break

                elif line.startswith(str_opt):
                    flag = True
                    continue

        count = 0
        for fileName in list_files:

            count = count + 1
            fileName = fileName.strip("'")
            print "count= %d" % (count)
            fileName = fileName.replace(
                "/home/sbp3624/PatternRecog/TrainINKML_v3/",
                file_path_till_traininkml)
            fileName_lg = basename(fileName)
            pos = fileName_lg.find(".")
            fileName_sub = fileName_lg[:pos] + ".lg"
            fileName_lg = file_path_lg_train + fileName_sub
            root_obj, trace_obj_dict = load_obj.loadInkml(fileName)
            dict_sym = {}
            list_Obj = []
            list_R = []

            with open(fileName_lg, "r") as f_read:
                for line in f_read:
                    line = line.strip("\n")
                    line = line.replace(" ", "")
                    if line.startswith("O"):
                        list_obj = line.split(",")
                        dict_sym[list_obj[1]] = list_obj[4:]
                    elif line.startswith("R"):
                        list_R = line.split(",")
                        list_1 = dict_sym[list_R[1]]
                        list_1 += dict_sym[list_R[2]]
                        rel_label = list_R[3]
                        list_traceobj_rel = []
                        total_points = []
                        for trace_id in list_1:
                            #list_traceobj_rel.append(trace_obj_dict[int(trace_id)])
                            total_points += trace_obj_dict[int(
                                trace_id)].original_points
                        #First get the original points then normalize
                        trace_obj = Trace(points_float=total_points)
                        trace_obj.normalization()
                        list_traceobj_rel.append(trace_obj)
                        symbol_obj.symbol_list = list_traceobj_rel
                        features = symbol_obj.get_features()
                        X_rel_train.append(features)
                        y_rel_train.append(rel_label)
                        list_traceobj_rel.remove(trace_obj)

        return X_rel_train, y_rel_train
    def get_relationship_data(self,file_path_till_traininkml,str_opt,file_path_lg_train):
        '''
        The method extracts the data required for training relationship classifier.
        '''

        load_obj=loadData()
        symbol_obj=Symbol()
        X_rel_train=[]
        y_rel_train=[]
        flag=False
        with open('split_files.txt','r') as f:
            for line in f:
                if flag:
                    files=line
                    files=files.strip("Set([")
                    files=files.strip("])\n")            
                    list_files=files.split(', ')
                    break
                

                elif line.startswith(str_opt):
                    flag=True
                    continue
                
        
        count=0
        for fileName in list_files:
         
            count=count+1
            fileName=fileName.strip("'")
            print "count= %d" % (count)
            fileName=fileName.replace("/home/sbp3624/PatternRecog/TrainINKML_v3/",file_path_till_traininkml) 
            fileName_lg=basename(fileName)
            pos=fileName_lg.find(".")
            fileName_sub=fileName_lg[:pos] + ".lg"
            fileName_lg=file_path_lg_train+fileName_sub
            root_obj, trace_obj_dict = load_obj.loadInkml(fileName)
            dict_sym={}
            list_Obj=[]
            list_R=[]
      
            with open(fileName_lg,"r") as f_read:
                for line in f_read:
                    line=line.strip("\n")
                    line=line.replace(" ","")
                    if line.startswith("O"):
                        list_obj=line.split(",")
                        dict_sym[list_obj[1]]=list_obj[4:]
                    elif line.startswith("R"):
                        list_R=line.split(",")
                        list_1=dict_sym[list_R[1]]
                        list_1+=dict_sym[list_R[2]]
                        rel_label=list_R[3]
                        list_traceobj_rel=[]           
                        total_points=[]
                        for trace_id in list_1:
                            #list_traceobj_rel.append(trace_obj_dict[int(trace_id)])
                            total_points+=trace_obj_dict[int(trace_id)].original_points
                        #First get the original points then normalize
                        trace_obj=Trace(points_float=total_points)
                        trace_obj.normalization()
                        list_traceobj_rel.append(trace_obj)
                        symbol_obj.symbol_list=list_traceobj_rel
                        features=symbol_obj.get_features()
                        X_rel_train.append(features)
                        y_rel_train.append(rel_label)
                        list_traceobj_rel.remove(trace_obj)


                        
        return X_rel_train,y_rel_train
    def get_symbol_distribution(self, path):
        '''
                The function calculates symbol distribution.
                '''
        load_obj = loadData()
        count = 0
        list_all_files = []
        count_break = 0
        filePath = path
        count = 0
        with open(filePath, "r") as file_read:
            counnt = count + 1
            for line in file_read:
                fileName = "/home/sbp3624/PatternRecog/TrainINKML_v3/" + line
                fileName = fileName.strip('\n')
                list_all_files.append(fileName)
                count_break = count_break + 1

        dict_file_to_sym_train = {}
        count_no_files = len(list_all_files)
        count = 0
        train_count = 2 * count_no_files / 3
        test_count = count_no_files - train_count
        train_file = Set()
        test_file = Set()

        for i in xrange(count_no_files):
            if i < train_count:
                fileName = list_all_files[i]
                train_file.add(fileName)
            else:
                fileName = list_all_files[i]
                test_file.add(fileName)

        dict_sym_to_file_train = {}
        dict_sym_to_file_test = {}
        count_train = {}
        count_test = {}
        total_no_sym_train = 0
        total_no_sym_test = 0
        no_iter = 0

        while (no_iter < 30):
            print "Start iteration = %d" % (no_iter)
            dict_sym_to_file_train, count_train, total_no_sym_train = self.get_file_symbol_info(
                train_file)
            dict_sym_to_file_test, count_test, total_no_sym_test = self.get_file_symbol_info(
                test_file)

            count_terminate = 0
            for sym in count_test:
                if sym in count_train:
                    prob_sym_train = float(count_train[sym]) / (
                        count_train[sym] + count_test[sym])
                    prob_sym_test = 1 - prob_sym_train

                    if prob_sym_train >= 0.63 and prob_sym_train < 0.67 and prob_sym_test >= 0.3 and prob_sym_test <= 0.4:
                        count_terminate = count_terminate + 1
                    else:
                        if prob_sym_train > 0.67:
                            #swap file from train -> test
                            random_no = np.random.randint(
                                len(dict_sym_to_file_train[sym]))
                            swap_file = dict_sym_to_file_train[sym][
                                random_no]  # randomly choose a file that belong to a symbol
                            #Remove that file from all the symbols
                            train_file.discard(swap_file)
                            #add this to the test dict
                            test_file.add(swap_file)
                        else:
                            random_no = np.random.randint(
                                len(dict_sym_to_file_test[sym]))
                            swap_file = dict_sym_to_file_test[sym][random_no]
                            test_file.discard(swap_file)
                            train_file.add(swap_file)
            no_iter = no_iter + 1

        #Write the distribution of files in a txt

        file_write = open("split_files.txt", "a")
        file_write.write("Train Files \n")
        file_write.write(str(train_file))
        file_write.write("\n")
        file_write.write("Test Files \n")
        file_write.write(str(test_file))

        file_prob_dist = open('file_prob_dist.txt', 'a')
        # get the distribution
        dict_sym_to_file_train, count_train, total_no_sym_train = self.get_file_symbol_info(
            train_file)
        dict_sym_to_file_test, count_test, total_no_sym_test = self.get_file_symbol_info(
            test_file)

        for sym in count_test:
            if sym in count_train:
                prob_sym_train = float(
                    count_train[sym]) / (count_train[sym] + count_test[sym])
                prob_sym_test = 1 - prob_sym_train
                file_prob_dist.write(sym + "    " + str(prob_sym_train) +
                                     "    " + str(prob_sym_test) + "\n")

        file_write.close()
        file_prob_dist.close()
        return train_file, test_file
Пример #25
0
from loadData import *
from displayData import *
from logisticRegression import *
from predict import *
from loadWeight import *
from sklearn.metrics import classification_report


""" 第1部分 加载数据集 """
X, y = loadData('ex3data1.mat')


""" 第2部分 可视化 """
displayData(X)
plt.show()

raw_X, raw_y = loadData('ex3data1.mat')


""" 第3部分 向量化逻辑回归 """
X = np.insert(raw_X, 0, values=np.ones(raw_X.shape[0]), axis=1)
y_matrix = []

for k in range(1, 11):
    y_matrix.append((raw_y == k).astype(int))

y_matrix = [y_matrix[-1]] + y_matrix[:-1]
y = np.array(y_matrix)

t0 = logisticRegression(X, y[0])
print(t0.shape)
Пример #26
0
from tensorflow.contrib import opt
from sklearn.metrics import classification_report
from loadData import *
from displayData import *
from feedForward import *
from nnCostFunction import *
from computeNumericalGradient import *
from checkNNGradients import *
""" 第1部分 可视化数据集 """
X, _ = loadData('ex4data1.mat')
displayData(X)
plt.show()
""" 第2部分 模型表示 """
X_raw, y_raw = loadData('ex4data1.mat', transpose=False)
X = np.insert(X_raw, 0, np.ones(X_raw.shape[0]), axis=1)


def expand_y(y):
    res = []
    for i in y:
        y_array = np.zeros(10)
        y_array[i - 1] = 1
        res.append(y_array)
    return np.array(res)


y = expand_y(y_raw)


def load_weight(path):
    data = sio.loadmat(path)
        def get_symbol_distribution(self,path):
                '''
                The function calculates symbol distribution.
                '''
                load_obj = loadData()
                count = 0
                list_all_files=[]
                count_break=0          
                filePath=path
                count=0 
                with open(filePath,"r") as file_read:
                        counnt=count+1
                        for line in file_read:
                                fileName = "/home/sbp3624/PatternRecog/TrainINKML_v3/" + line
                                fileName = fileName.strip('\n')
                                list_all_files.append(fileName)
                                count_break=count_break+1
                     

                dict_file_to_sym_train={}
                count_no_files = len(list_all_files)
                count=0
                train_count = 2*count_no_files/3
                test_count = count_no_files - train_count
                train_file = Set()
                test_file=Set()
                
                for i in xrange(count_no_files):
                        if i < train_count:
                                fileName = list_all_files[i]
                                train_file.add(fileName)
                        else:
                                fileName = list_all_files[i]    
                                test_file.add(fileName)
                        

                dict_sym_to_file_train={}
                dict_sym_to_file_test={}
                count_train={}
                count_test={}
                total_no_sym_train=0
                total_no_sym_test=0
                no_iter = 0
                   

                while (no_iter<30):
                        print "Start iteration = %d" % (no_iter)
                        dict_sym_to_file_train,count_train,total_no_sym_train = self.get_file_symbol_info(train_file)
                        dict_sym_to_file_test,count_test,total_no_sym_test = self.get_file_symbol_info(test_file)
                        
                        count_terminate = 0            
                        for sym in count_test:
                                if sym in count_train:
                                        prob_sym_train =float (count_train[sym])/(count_train[sym]+count_test[sym])
                                        prob_sym_test = 1-prob_sym_train

                                        if prob_sym_train>=0.63 and prob_sym_train <0.67 and prob_sym_test >=0.3 and prob_sym_test<=0.4 :
                                                count_terminate = count_terminate + 1
                                        else:
                                                if prob_sym_train>0.67:
                                                        #swap file from train -> test
                                                        random_no= np.random.randint(len(dict_sym_to_file_train[sym])) 
                                                        swap_file = dict_sym_to_file_train[sym][random_no]    # randomly choose a file that belong to a symbol
                                                        #Remove that file from all the symbols
                                                        train_file.discard(swap_file)
                                                        #add this to the test dict
                                                        test_file.add(swap_file)
                                                else:
                                                        random_no= np.random.randint(len(dict_sym_to_file_test[sym]))
                                                        swap_file = dict_sym_to_file_test[sym][random_no]
                                                        test_file.discard(swap_file)
                                                        train_file.add(swap_file)
                        no_iter = no_iter + 1                            


                 #Write the distribution of files in a txt

                file_write = open("split_files.txt","a")
                file_write.write("Train Files \n")
                file_write.write(str(train_file))
                file_write.write("\n")
                file_write.write("Test Files \n")
                file_write.write(str(test_file))
                
                file_prob_dist=open('file_prob_dist.txt','a')        
                # get the distribution
                dict_sym_to_file_train,count_train,total_no_sym_train = self.get_file_symbol_info(train_file)
                dict_sym_to_file_test,count_test,total_no_sym_test = self.get_file_symbol_info(test_file)
                    
                
                for sym in count_test:
                        if sym in count_train:
                            prob_sym_train =float(count_train[sym])/(count_train[sym]+count_test[sym])
                            prob_sym_test = 1-prob_sym_train
                            file_prob_dist.write(sym+"    "+str(prob_sym_train)+"    "+str(prob_sym_test)+"\n")
                            

                file_write.close()
                file_prob_dist.close()
                return train_file,test_file
import matplotlib.pyplot as plt
from  matplotlib.ticker import FuncFormatter
import pandas as pd
import numpy as np
from pandas.api.types import CategoricalDtype
from loadData import *
from textwrap import wrap

path = './data/survey_responses.csv'

df = loadData(path)
df = convertDataType(df)

df = df[df['mode_of_transport'] == 'walking']
df = df[['pedestrian_crossings',
        'congested_route',
        'walk_alone',
        'well_maintained_pavements',
        'litter_on_route',
        'lighting_at_night',
        'pavements_not_wide']]

df = df.rename(columns = {'pedestrian_crossings': 'Not enough pedestrian crossings',
                        'congested_route': 'Route is congested',
                        'walk_alone': 'Often walk alone',
                        'well_maintained_pavements': 'Pavements are well maintained',
                        'litter_on_route': 'Too much litter',
                        'lighting_at_night': 'Not enough lighting at night',
                        'pavements_not_wide': 'Pavements are not wide enough'})

df = df.melt(var_name = 'Statement', value_name = 'Answer')
Пример #29
0
from computeSimilarity import *
from rabinFingerprint import *
from shingle import *
from minHash import *

from computeSimiliarity import *
from rabinFingerprint import *


import datetime



if __name__ == "__main__":
    starttime = datetime.datetime.now()         # save the time
    result = loadData("dtoEmails_1_0.txt")      # load data
    body = extractBody(result)                  # extract body
    shingleBody = go_shingle(go_tokens(body),2) # compute shingle
    hashShingle = hashFunction(shingleBody)     # create hash table

    # rabinFingerprint(shingleBody) # compute fingerprint

    minHashShingle = minHash(hashShingle,200)    # create min hash
    similarity = computeSimilarity(minHashShingle)
    f = open("out.txt","w")
    for i in range(len(similarity)):
        for j in range(len(similarity[i])):
            if(1>similarity[i][j]>0.5):
                f.write("%3d %4d %0.3f\n" % (i,j,similarity[i][j]))
    # use Lsh
    # similiarity = computeSimiliary(hashShingle[0:50]) # compute similarity
Пример #30
0
def main():
    Data = loadData('pokemonGo.csv', 'cleanedData.csv')
    userChoice = UserChoice()
    pokemonList = Data.nameList
    cityList = Data.cityList
    basicData = Data.basicData
    largeData = Data.largeData
    while True:
        '''
        The first level input tells the program to start or quit
            '''
        main_input = input(
            '\nPlease enter \'pokemon\' to start main program.\n'
            'Please enter \'quit\' at anytime to quit the program.'
            '\n>  ')
        if main_input.lower() in ['q', 'quit', 'bye']:
            break
        elif main_input.lower() in ['pokemon']:
            '''
            To start with, select a pokemon of your choice. And print basic statistics about this pokemon
            '''
            pokemon_selection = userChoice.select_pokemon(pokemonList)
            if pokemon_selection == 'Wish you luck in pokemon world. Goodbye':
                return (print('\n till Next Time! Goodbye.'))
            else:
                basic_info = pokemonStats(basicData, pokemon_selection)
                print(
                    '\n You have selected No.%d %s. Its a %s and %s type pokemon.'
                    'Its Combat Power is %d and its Health Power is %d.' %
                    (pokemon_selection, pokemonList[pokemon_selection],
                     basic_info.pokemonType1, basic_info.pokemonType2,
                     basic_info.pokemonMaxCP, basic_info.pokemonMaxHP))
                while True:
                    '''
                    The second menu asks user to select basic info or pokemon go info
                    '''
                    secondary_input = input(
                        '\nYou have selected No.%d %s.'
                        '\nWhat do you want to know about it?'
                        '\nPlease enter \'basic info\' for basic info,'
                        '\nPlease enter \'go\' for pokemon go info,'
                        '\nPlease enter \'back\' back to previous menu.'
                        '\n>  ' %
                        (pokemon_selection, pokemonList[pokemon_selection]))
                    if secondary_input.lower() in ['q', 'quit', 'bye']:
                        return (print('\n till Next Time! Goodbye.'))
                    elif secondary_input.lower() in ['back']:
                        break
                    elif secondary_input.lower() in [
                            'basic info', 'basicinfo', 'basic information',
                            'information'
                    ]:
                        while True:
                            '''
                            The Basic Info menu asks user to select basic info function
                            '''
                            third_input = input(
                                '\nYou have selected basic info about %s'
                                '\nPlease select a function we have here'
                                '\nA. Show its overall CP ranking/percentile/mean'
                                '\nB. Show its overall HP ranking/percentile/mean'
                                '\nC. Show the same type Pokemons with it'
                                '\nD. Show its CP ranking/percentile/mean among the same type pokemons'
                                '\nE. Show its HP ranking/percentile/mean among the same type pokemons'
                                '\nType \'back\' back to previous menu'
                                '\nType \'quit\' to quit the program'
                                '\n>  ' % (pokemonList[pokemon_selection]))
                            if third_input.lower() in ['q', 'quit', 'bye']:
                                return (print('\n till Next Time! Goodbye.'))
                            elif third_input.lower() in ['back']:
                                break
                            elif third_input.lower() in ['a', 'a.', '1']:
                                print(
                                    '\n CP of %s is %d. It ranks %d among all pokemons. Overall the mean CP is %d'
                                    % (pokemonList[pokemon_selection],
                                       basic_info.pokemonMaxCP,
                                       basic_info.overallRanking()[0],
                                       basic_info.meanPower(basicData, 'cp')))
                                continue
                            elif third_input.lower() in ['b', 'b.', '2']:
                                print(
                                    '\n HP of %s is %d. It ranks %d among all pokemons. Overall the mean HP is %d'
                                    % (pokemonList[pokemon_selection],
                                       basic_info.pokemonMaxHP,
                                       basic_info.overallRanking()[1],
                                       basic_info.meanPower(basicData, 'hp')))
                                continue
                            elif third_input.lower() in ['c', 'c.', '3']:
                                print(
                                    '\n %s is a type %s pokemon. There are %d pokemons who are also type %s.'
                                    '\n They are %s' %
                                    (pokemonList[pokemon_selection],
                                     basic_info.pokemonType1,
                                     basic_info.countSameTypePokemons()[0],
                                     basic_info.pokemonType1,
                                     basic_info.listSameTypePokemons()
                                     [0].to_string()))
                                if len(basic_info.listSameTypePokemons()) > 1:
                                    print(
                                        '\n %s is also a type %s pokemon. There are %d pokemons who are also type %s.'
                                        '\n They are %s' %
                                        (pokemonList[pokemon_selection],
                                         basic_info.pokemonType2,
                                         basic_info.countSameTypePokemons()[1],
                                         basic_info.pokemonType2,
                                         basic_info.listSameTypePokemons()
                                         [1].to_string()))
                                continue
                            elif third_input.lower() in ['d', 'd.', '4']:
                                print(
                                    '\n CP of %s is %d. Among %s pokemons, the mean CP is %d. And it ranks %d among them.'
                                    %
                                    (pokemonList[pokemon_selection],
                                     basic_info.pokemonMaxCP,
                                     basic_info.pokemonType1,
                                     basic_info.meanPower(
                                         basic_info.listSameTypePokemons()[0],
                                         'cp'),
                                     basic_info.sameTypeRanking()[0][0]))
                                if len(basic_info.listSameTypePokemons()) > 1:
                                    print(
                                        '\n CP of %s is %d. Among %s pokemons, the mean CP is %d. And it ranks %d among them.'
                                        %
                                        (pokemonList[pokemon_selection],
                                         basic_info.pokemonMaxCP,
                                         basic_info.pokemonType2,
                                         basic_info.meanPower(
                                             basic_info.listSameTypePokemons()
                                             [1], 'cp'),
                                         basic_info.sameTypeRanking()[1][0]))
                                continue
                            elif third_input.lower() in ['e', 'e.', '5']:
                                print(
                                    '\n HP of %s is %d. Among %s pokemons, the mean HP is %d. And it ranks %d among them.'
                                    %
                                    (pokemonList[pokemon_selection],
                                     basic_info.pokemonMaxHP,
                                     basic_info.pokemonType1,
                                     basic_info.meanPower(
                                         basic_info.listSameTypePokemons()[0],
                                         'hp'),
                                     basic_info.sameTypeRanking()[0][1]))
                                if len(basic_info.listSameTypePokemons()) > 1:
                                    print(
                                        '\n HP of %s is %d. Among %s pokemons, the mean HP is %d. And it ranks %d among them.'
                                        %
                                        (pokemonList[pokemon_selection],
                                         basic_info.pokemonMaxHP,
                                         basic_info.pokemonType2,
                                         basic_info.meanPower(
                                             basic_info.listSameTypePokemons()
                                             [1], 'hp'),
                                         basic_info.sameTypeRanking()[1][1]))
                                continue
                            else:
                                print(
                                    '\n Sorry, Please follow the input instructions and enter a letter or back.'
                                )
                                continue
                    elif secondary_input.lower() in [
                            'go', 'pokemongo', 'pokemon go', 'game info'
                    ]:
                        while True:
                            '''
                            The Pokemon Go Info menu asks user to select pokemon go info function
                            Moreover, user can select a certain city for info about this city
                            '''
                            third_input = input(
                                '\nYou have selected pokemon go info about %s'
                                '\nPlease select a function we have here'
                                '\nA. Show whether people have seen it in pokemon go'
                                '\nB. Show the cities where it was often observed in histogram'
                                '\nC. Show the pokemons whom it was often observed together with'
                                '\nD. Show distribution of its appearance time of the day in pie chart'
                                '\nE. Show World Map of all pokemon occurrence in basemap'
                                '\nF. Show World Map of %s occurrence in basemap'
                                '\nG. Show Box Plot of pokemons occurrence distance to gym'
                                '\nH. Show Line Chart of all pokemons occurence freq vs hour of day'
                                '\n\nType \'city\' to select a certain city for more details'
                                '\nType \'back\' back to previous menu'
                                '\nType \'quit\' to quit the program'
                                '\n\n>  ' % (pokemonList[pokemon_selection],
                                             pokemonList[pokemon_selection]))
                            if third_input.lower() in ['q', 'quit', 'bye']:
                                return (print('\n till Next Time! Goodbye.'))
                            elif third_input.lower() in ['back']:
                                break
                            elif third_input.lower() in ['a', 'a.', '1']:
                                ans = hasItAppearedGlobally(
                                    largeData, pokemon_selection)
                                if ans == True:
                                    print(
                                        '\n %s has appeared in Pokemon Go world'
                                        % pokemonList[pokemon_selection])
                                else:
                                    print(
                                        '\n %s has not appeared in Pokemon Go world yet'
                                        % pokemonList[pokemon_selection])
                                continue
                            elif third_input.lower() in ['b', 'b.', '2']:
                                if hasItAppearedGlobally(
                                        largeData, pokemon_selection) == True:
                                    ans = ID_city_freq(largeData,
                                                       pokemon_selection)
                                    print(
                                        '\n Histogram of Top 10 cities %s has appeared in is saved in: '
                                        '\n Frequency of pokemon %dappears in different cities.png'
                                        % (pokemonList[pokemon_selection],
                                           pokemon_selection))
                                else:
                                    print(
                                        '\n %s has not appeared in Pokemon Go world yet.'
                                        % pokemonList[pokemon_selection])
                                continue
                            elif third_input.lower() in ['c', 'c.', '3']:
                                if hasItAppearedGlobally(
                                        largeData, pokemon_selection) == True:
                                    ans = co_occurance(largeData,
                                                       pokemon_selection)
                                    print(
                                        '\n Top 5 pokemons %s has appeared together with are: '
                                    )
                                    for i in range(len(ans)):
                                        print('\n %d %s' %
                                              (pokemon_selection,
                                               pokemonList[pokemon_selection]))
                                else:
                                    print(
                                        '\n %s has not appeared in Pokemon Go world yet.'
                                        % pokemonList[pokemon_selection])
                                continue
                            elif third_input.lower() in ['d', 'd.', '4']:
                                if hasItAppearedGlobally(
                                        largeData, pokemon_selection) == True:
                                    ans = appeared_time(
                                        largeData, pokemon_selection)
                                    print(
                                        '\n Percentage of %s has appearance time of the day is saved in: '
                                        '\n pie chart of pokemon %d showing up periods.png'
                                        % (pokemonList[pokemon_selection],
                                           pokemon_selection))
                                else:
                                    print(
                                        '\n %s has not appeared in Pokemon Go world yet.'
                                        % pokemonList[pokemon_selection])
                                continue
                            elif third_input.lower() in ['e', 'e.', '5']:
                                ans = worldmap(largeData)
                                continue
                            elif third_input.lower() in ['f', 'f.', '6']:
                                if hasItAppearedGlobally(
                                        largeData, pokemon_selection) == True:
                                    ans = worldmap(
                                        pokemonwideDataframe(
                                            largeData, pokemon_selection))
                                    continue
                                else:
                                    print(
                                        '\n %s has not appeared in Pokemon Go world yet.'
                                        % pokemonList[pokemon_selection])
                                continue
                            elif third_input.lower() in ['g', 'g.', '7']:
                                ans = boxplotGym(largeData)
                                continue
                            elif third_input.lower() in ['h', 'h.', '8']:
                                ans = appearline_according_hour(largeData)
                                continue
                            elif third_input.lower() in ['city']:
                                city_selection = userChoice.select_city(
                                    cityList)
                                if city_selection == 'Wish you luck in pokemon world, goodbye':
                                    return (
                                        print('\n till Next Time! Goodbye.'))
                                else:
                                    while True:
                                        '''
                                            After selecting a city, offer functions user can use
                                            '''
                                        fourth_input = input(
                                            '\nYou have selected pokemon go info about %s and %s'
                                            '\nPlease select a function we have here'
                                            '\nA. Show whether people have seen it, in %s'
                                            '\nB. Show the pokemons which were often observed in %s in histogram'
                                            '\nC. Show the pokemons whom it was often observed together with, in %s'
                                            '\nD. Show distribution of its appearance time of the day in pie chart, in %s'
                                            '\nE. Show scatterplot of windspeed and temperature relationship, in %s'
                                            '\nType \'back\' back to previous menu'
                                            '\nType \'quit\' to quit the program'
                                            '\n>  ' %
                                            (pokemonList[pokemon_selection],
                                             city_selection, city_selection,
                                             city_selection, city_selection,
                                             city_selection, city_selection))
                                        if fourth_input.lower() in [
                                                'q', 'quit', 'bye'
                                        ]:
                                            return (print(
                                                '\n till Next Time! Goodbye.'))
                                        elif fourth_input.lower() in ['back']:
                                            break
                                        elif fourth_input.lower() in [
                                                'a', 'a.', '1'
                                        ]:
                                            ans = appeared_incity(
                                                largeData, pokemon_selection,
                                                city_selection)
                                            continue
                                        elif fourth_input.lower() in [
                                                'b', 'b.', '2'
                                        ]:
                                            ans = city_ID_freq(
                                                largeData, city_selection)
                                            print(
                                                '\n Histogram of Top 10 pokemons appeared in %s is saved in: '
                                                '\n Frequencies of pokemons appear in %s.png'
                                                % (city_selection,
                                                   city_selection))
                                            continue
                                        elif fourth_input.lower() in [
                                                'c', 'c.', '3'
                                        ]:
                                            if hasItAppearedGlobally(
                                                    citywideDataframe(
                                                        largeData,
                                                        city_selection),
                                                    pokemon_selection) == True:
                                                ans = co_occurance(
                                                    citywideDataframe(
                                                        largeData,
                                                        city_selection),
                                                    pokemon_selection)
                                                print(
                                                    '\n In %s, Top 5 pokemons %s has appeared together with are: '
                                                    % (city_selection,
                                                       pokemonList[
                                                           pokemon_selection]))
                                                for i in range(len(ans)):
                                                    print(
                                                        '\n %d %s' %
                                                        (pokemon_selection,
                                                         pokemonList[
                                                             pokemon_selection]
                                                         ))
                                            else:
                                                print(
                                                    '\n %s has not appeared in %s yet.'
                                                    % (pokemonList[
                                                        pokemon_selection],
                                                       city_selection))
                                            continue
                                        elif fourth_input.lower() in [
                                                'd', 'd.', '4'
                                        ]:
                                            if hasItAppearedGlobally(
                                                    citywideDataframe(
                                                        largeData,
                                                        city_selection),
                                                    pokemon_selection) == True:
                                                ans = appeared_time(
                                                    citywideDataframe(
                                                        largeData,
                                                        city_selection),
                                                    pokemon_selection)
                                                print(
                                                    '\n Percentage of %s has appearance time of the day in %s is saved in: '
                                                    '\n pie chart of pokemon %d showing up periods.png'
                                                    % (pokemonList[
                                                        pokemon_selection],
                                                       city_selection,
                                                       pokemon_selection))
                                            else:
                                                print(
                                                    '\n %s has not appeared in %s yet.'
                                                    % (pokemonList[
                                                        pokemon_selection],
                                                       city_selection))
                                            continue
                                        elif fourth_input.lower() in [
                                                'e', 'e.', '5'
                                        ]:
                                            ans = temp_windspeed_relation(
                                                largeData, city_selection)
                                            continue
                        continue
                    else:
                        print('Sorry, Please follow the input instructions.')
                        continue
        #elif main_input.lower() in ['city']:
        #city_selection = select_city(cityList)
        else:
            print(
                '\n Please type again, I am not sure what you are talking about.'
            )
            continue
    return (print('\n till Next Time! Goodbye.'))
Пример #31
0
        lamdas = lamdas + [lamda]

    lamdas.sort()

    return lamdas


# calculate PBO
def PBO(lamdas):
    count = len([num for num in lamdas if num < 0])
    PBO = count / len(lamdas)
    return PBO


if __name__ == '__main__':
    df = loadData()
    #    df = pd.read_csv('MZ.00006%.csv')
    df.drop(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '18'],
            axis=1,
            inplace=True)
    column = 'date'
    lamdas = calculate(df, column, 16)
    dfForSave = pd.DataFrame(lamdas, columns=['lamda'])
    dfForSave.to_excel('lamda.xlsx')

    PBO = PBO(lamdas)
    print('PBO is ', PBO)

    if PBO < 0.5:
        print('it is very likely that this factor is useful')
    else: