Python loadData примеры использования

Пример #1

0

Показать файл

Файл: Distribution_symbols.py Проект: suhaspillai/Handwritten-Recognition-of-Math-Symbols

        def get_file_symbol_info(self,set_of_files):
                '''
                The function is used for calculating symbol count  
                '''
                count_no_files = len(set_of_files)
                load_obj = loadData()
                dict_sym_to_file={}
                count_sym={}
                total_count_sym=0
                for fileName in set_of_files:   
                    root_obj, trace_obj_dict = load_obj.loadInkml(fileName)    
                    symbols = load_obj.get_symbol(root_obj,trace_obj_dict)

                    for sym_obj in symbols:
                        sym_id=sym_obj.symbol_class
                        if  sym_id in dict_sym_to_file:
                            dict_sym_to_file[sym_id].append(fileName)
                        else:
                           dict_sym_to_file[sym_id]=[]
                           dict_sym_to_file[sym_id].append(fileName)
                            
                        if sym_id in count_sym:
                            count_sym[sym_id] = count_sym[sym_id] + 1
                        else:
                            count_sym[sym_id] = 1
                        total_count_sym = total_count_sym +  1
                        
                return  dict_sym_to_file,count_sym,total_count_sym

Пример #2

0

Показать файл

    def extract_data_without_pca_relationship(self, file_set,
                                              file_path_till_Traininkml):
        '''
        The function is used to extract data from inkml files
        '''
        load_obj = loadData()
        count = 0
        X_train = []
        y_train = []
        stroke_to_pixel = []
        count_fName = 0
        for fileName in file_set:
            count_fName = count_fName + 1
            print("File No=%d ") % (count_fName)
            fileName = fileName.strip("\'")
            fileName = fileName.replace(
                '/home/sbp3624/PatternRecog/TrainINKML_v3/',
                file_path_till_Traininkml)
            root_obj, trace_obj_dict = load_obj.loadInkml(fileName)
            symbols = load_obj.get_symbol(root_obj, trace_obj_dict)
            # get relationship data

            for symbol in symbols:
                features = symbol.get_features()
                X_train.append(features)
                y_train.append(symbol.symbol_class)
        N = len(X_train)

        return X_train, y_train

Пример #3

0

Показать файл

Файл: Distribution_symbols.py Проект: t-k-/Handwritten-Recognition-of-Math-Symbols

    def get_file_symbol_info(self, set_of_files):
        '''
                The function is used for calculating symbol count  
                '''
        count_no_files = len(set_of_files)
        load_obj = loadData()
        dict_sym_to_file = {}
        count_sym = {}
        total_count_sym = 0
        for fileName in set_of_files:
            root_obj, trace_obj_dict = load_obj.loadInkml(fileName)
            symbols = load_obj.get_symbol(root_obj, trace_obj_dict)

            for sym_obj in symbols:
                sym_id = sym_obj.symbol_class
                if sym_id in dict_sym_to_file:
                    dict_sym_to_file[sym_id].append(fileName)
                else:
                    dict_sym_to_file[sym_id] = []
                    dict_sym_to_file[sym_id].append(fileName)

                if sym_id in count_sym:
                    count_sym[sym_id] = count_sym[sym_id] + 1
                else:
                    count_sym[sym_id] = 1
                total_count_sym = total_count_sym + 1

        return dict_sym_to_file, count_sym, total_count_sym

Пример #4

0

Показать файл

Файл: Preprocess_Extract.py Проект: suhaspillai/Handwritten-Recognition-of-Math-Symbols

 def extract_data_without_pca_relationship(self,file_set,file_path_till_Traininkml):
     '''
     The function is used to extract data from inkml files
     '''
     load_obj = loadData()
     count = 0
     X_train= []
     y_train=[]
     stroke_to_pixel=[]
     count_fName=0
     for fileName in file_set:
         count_fName=count_fName+1
         print ("File No=%d ") % (count_fName)
         fileName=fileName.strip("\'")
         fileName=fileName.replace('/home/sbp3624/PatternRecog/TrainINKML_v3/',file_path_till_Traininkml)
         root_obj, trace_obj_dict = load_obj.loadInkml(fileName)
         symbols = load_obj.get_symbol(root_obj,trace_obj_dict)
         # get relationship data
         
         
         for symbol in symbols:
             features = symbol.get_features()
             X_train.append(features)
             y_train.append(symbol.symbol_class)
     N=len(X_train) 
     
     return X_train,y_train

Пример #5

0

Показать файл

Файл: Network_Lasso_main.py Проект: iknow1988/networklasso_sut

def main():
    maxProcesses = multiprocessing.cpu_count()
    lamb = 0
    lambdaMaxValue = 1
    lambdaUpdateStepSize = 0.05
    rho = .0001
    dataSize = 10
    type = 1
    (trainingSet, x_train, y_train, x_test, y_test, trainSize, testSize,
     sizeOptimizationVariable) = loadData(dataSize, type)
    G1 = generateGraph(60)
    nodes = G1.number_of_nodes()
    edges = G1.number_of_edges()
    print "Number of Nodes = ", nodes, " , Number of Edges = ", edges
    print "Diameter is ", nx.diameter(G1)
    # Initialize ADMM variables
    (A, sqn, sqp, x, u, z) = initializeADMM(G1, sizeOptimizationVariable)
    plot1 = list()
    plot2 = list()
    plot3 = list()
    while (lamb <= lambdaMaxValue or lamb == 0):
        print "For lambda = ", lamb
        start_time = time.time()
        (x, u, z,
         localVariables) = runADMM(G1, lamb, rho + math.sqrt(lamb), x, u, z,
                                   trainingSet, A, sqn, sqp, maxProcesses,
                                   sizeOptimizationVariable, trainSize,
                                   testSize, x_train, y_train, x_test, y_test)
        print("ADMM finished in %s seconds" % (time.time() - start_time))
        (parameters, trainRMSE) = RRLayer(x, x_train, y_train[:,
                                                              0].transpose(),
                                          sizeOptimizationVariable - 1)
        testRMSE = getAccuracy(x, parameters, x_test, y_test[:, 0].transpose(),
                               sizeOptimizationVariable - 1)
        plot1.append(trainRMSE)
        plot2.append(testRMSE)
        plot3.append(lamb)
        print "trainRMSE =", trainRMSE, "testRMSE =", testRMSE
        showImageMatrix(x, False, False, '', 'x matrix')
        showImageMatrix(u, False, False, '', 'u matrix')
        showImageMatrix(z, False, False, '', 'z matrix')
        showclusterPerformance(
            x, False, True, 'train:' + str(trainRMSE) + ' test: ' +
            str(testRMSE) + ' lamda: ' + str(lamb),
            "EXP/norm/" + str(lamb) + ".png")
        if (lamb == 0):
            lamb = 0.01
        else:
            lamb = lamb + lambdaUpdateStepSize
    showOverallAccuracy(plot1, plot2, plot3, False, True)

Пример #6

0

Показать файл

Файл: Classifier.py Проект: suhaspillai/Handwritten-Recognition-of-Math-Symbols

    def classification(self,file_path_till_Traininkml,classifier_obj,str_opt):
        '''
        The function is used to classify the data.
        Input:
        file_path_till_Traininkml : Path of inkml files
        classifier_obj : Classifier object
        str_opt: Test or Train 

        '''
        load_obj = loadData()
        lg_folder_name="classification_"+str_opt
        file_write_obj=FileWrite(lg_folder_name)
        flag=False
        with open('split_files.txt','r') as f:
            for line in f:
                if flag:
                    files=line
                    files=files.strip("Set([")
                    files=files.strip("])\n")            
                    list_files=files.split(', ')
                    break
                
                elif line.startswith(str_opt):
                    flag=True
                    continue
        
        count_traces=0
        count=0
        print 'Classification started'
        count=0
        for fileName in list_files:
            count=count+1
            fileName=fileName.strip("'")
            print "count= %d" % (count)
            fileName=fileName.replace("/home/sbp3624/PatternRecog/TrainINKML_v3/",file_path_till_Traininkml)
            print fileName+"\n"
            root_obj, trace_obj_dict = load_obj.loadInkml(fileName)
            symbols = load_obj.get_symbol(root_obj,trace_obj_dict)
            symbol_list=symbols
            
            X_test=[]
            count_traces=0
            for symbol in symbol_list:
                features=symbol.get_features()
                X_test.append(features)
                count_traces=count_traces+len(symbol.symbol_list)
            X_test_final=np.asarray(X_test)
            predict_labels=classifier_obj.predict(X_test_final)
            #Write this to lg file.
            file_write_obj.write_to_lg(predict_labels,fileName,symbol_list,count_traces,lg_folder_name)

Пример #7

0

Показать файл

Файл: Segmentation.py Проект: suhaspillai/Handwritten-Recognition-of-Math-Symbols

    def sym_segmentation(self,classifier_obj,file_path_till_traininkml,str_opt,rel_classifier_obj):
        '''
        The function calls methods from MinimumSpanningTree to segment,classify and parse symbols
        Input 
        classifier_obj - Classifier pretrained model
        file_path_till_traininkml - path to inkml file
        str_opt - Train or Test
        rel_classifier_obj - Realationship classifier pretrained model.   
        '''
        load_obj = loadData()
        m=MinimumSpanningTree()
        symbol_obj=Symbol()
        
       
      
        lg_folder_name="parsing_"+str_opt
        file_write_obj=FileWrite(lg_folder_name)
      
       
        flag=False
        
        with open('split_files.txt','r') as f:
            for line in f:
                if flag:
                    files=line
                    files=files.strip("Set([")
                    files=files.strip("])\n")            
                    list_files=files.split(', ')
                    break
                

                elif line.startswith(str_opt):
                    flag=True
                    continue

        count=0


        for fileName in list_files:
            count=count+1
            fileName=fileName.strip("'")
            print "count= %d" % (count)
            fileName=fileName.replace("/home/sbp3624/PatternRecog/TrainINKML_v3/",file_path_till_traininkml)
            root_obj, trace_obj_dict = load_obj.loadInkml(fileName)
          
         
            m.get_segmentation(trace_obj_dict,classifier_obj,symbol_obj,file_write_obj,fileName,lg_folder_name,rel_classifier_obj)

Пример #8

0

Показать файл

    def sym_segmentation(self, classifier_obj, file_path_till_traininkml,
                         str_opt, rel_classifier_obj):
        '''
        The function calls methods from MinimumSpanningTree to segment,classify and parse symbols
        Input 
        classifier_obj - Classifier pretrained model
        file_path_till_traininkml - path to inkml file
        str_opt - Train or Test
        rel_classifier_obj - Realationship classifier pretrained model.   
        '''
        load_obj = loadData()
        m = MinimumSpanningTree()
        symbol_obj = Symbol()

        lg_folder_name = "parsing_" + str_opt
        file_write_obj = FileWrite(lg_folder_name)

        flag = False

        with open('split_files.txt', 'r') as f:
            for line in f:
                if flag:
                    files = line
                    files = files.strip("Set([")
                    files = files.strip("])\n")
                    list_files = files.split(', ')
                    break

                elif line.startswith(str_opt):
                    flag = True
                    continue

        count = 0

        for fileName in list_files:
            count = count + 1
            fileName = fileName.strip("'")
            print "count= %d" % (count)
            fileName = fileName.replace(
                "/home/sbp3624/PatternRecog/TrainINKML_v3/",
                file_path_till_traininkml)
            root_obj, trace_obj_dict = load_obj.loadInkml(fileName)

            m.get_segmentation(trace_obj_dict, classifier_obj, symbol_obj,
                               file_write_obj, fileName, lg_folder_name,
                               rel_classifier_obj)

Пример #9

0

Показать файл

    def sym_parsing(self, rel_classifier_obj, file_path_till_traininkml,
                    str_opt):
        '''
        The function calls methods from MinimumSpanningTree class to segment and classify symbols and then parses the symbols, finally
        writing it to an lg files.
        '''
        load_obj = loadData()
        e = Graph()
        symbol_obj = Symbol()
        lg_folder_name = "parsing_" + str_opt
        file_write_obj = FileWrite(lg_folder_name)
        flag = False

        with open('split_files.txt', 'r') as f:
            for line in f:
                if flag:
                    files = line
                    files = files.strip("Set([")
                    files = files.strip("])\n")
                    list_files = files.split(', ')
                    break

                elif line.startswith(str_opt):
                    flag = True
                    continue

        count = 0

        for fileName in list_files:
            count = count + 1
            fileName = fileName.strip("'")
            print "count= %d" % (count)
            fileName = fileName.replace(
                "/home/sbp3624/PatternRecog/TrainINKML_v3/",
                file_path_till_traininkml)
            root_obj, trace_obj_dict = load_obj.loadInkml(fileName)
            symbols = load_obj.get_symbol(root_obj, trace_obj_dict)
            adj_matrix, dict_mapping_Symbol_index, index_to_symbol = e.LineOfSight(
                symbols, rel_classifier_obj)
            dict_map_rel_to_syms = self.get_parse_layout(
                adj_matrix, dict_mapping_Symbol_index, index_to_symbol,
                rel_classifier_obj)
            dict_map_rel_to_syms = dict_map_rel_to_syms[
                0]  # because the funtion returns a tuple
            self.write_to_lg(fileName, symbols, dict_map_rel_to_syms,
                             lg_folder_name)

Пример #10

0

Показать файл

Файл: RelationShip_Classifier.py Проект: suhaspillai/Handwritten-Recognition-of-Math-Symbols

    def sym_parsing(self,rel_classifier_obj,file_path_till_traininkml,str_opt):
      
        '''
        The function calls methods from MinimumSpanningTree class to segment and classify symbols and then parses the symbols, finally
        writing it to an lg files.
        '''
        load_obj = loadData()
        e=Graph()
        symbol_obj=Symbol()
        lg_folder_name="parsing_"+str_opt
        file_write_obj=FileWrite(lg_folder_name)
        flag=False
        
        with open('split_files.txt','r') as f:
            for line in f:
                if flag:
                    files=line
                    files=files.strip("Set([")
                    files=files.strip("])\n")            
                    list_files=files.split(', ')
                    break
                

                elif line.startswith(str_opt):
                    flag=True
                    continue

        count=0


        for fileName in list_files:
            count=count+1
            fileName=fileName.strip("'")
            print "count= %d" % (count)
            fileName=fileName.replace("/home/sbp3624/PatternRecog/TrainINKML_v3/",file_path_till_traininkml)
            root_obj, trace_obj_dict = load_obj.loadInkml(fileName)
            symbols = load_obj.get_symbol(root_obj,trace_obj_dict)
            adj_matrix,dict_mapping_Symbol_index,index_to_symbol=e.LineOfSight(symbols,rel_classifier_obj)
            dict_map_rel_to_syms=self.get_parse_layout(adj_matrix,dict_mapping_Symbol_index,index_to_symbol,rel_classifier_obj)
            dict_map_rel_to_syms=dict_map_rel_to_syms[0]  # because the funtion returns a tuple
            self.write_to_lg(fileName,symbols,dict_map_rel_to_syms,lg_folder_name)

Пример #11

0

Показать файл

Файл: create_data.py Проект: wsj188626/HandwritingRecognition-with-MultiDimensionalRecurrentNeuralNetworks

def main(argv):

    load_obj = loadData()
    file_xml_path = argv[0]
    file_location = argv[1]
    no_of_train = int(argv[2])
    no_of_val = int(argv[3])
    dict_data, chars, list_data = load_obj.loadData_word(
        file_location, file_xml_path)
    len_dict = len(dict_data)
    list_data_random = []
    dict_data_random = {}
    while (len(list_data) > 0):
        rand_seed = np.random.randint(len(list_data))
        img_id = list_data[rand_seed]
        list_data_random.append(img_id)
        dict_data_random[img_id] = dict_data[img_id]
        list_data.remove(img_id)

    train_data = list_data_random[:no_of_train]
    val_data = list_data_random[no_of_train:no_of_train + no_of_val]
    test_data = list_data_random[no_of_train + no_of_val:]
    print('Total no of training samples created =%d') % (len(train_data))
    print('Total no of validation samples created =%d') % (len(val_data))
    print('Total no of testing samples created =%d') % (len(test_data))
    file_dict_data = open('dict_data', 'wb')
    file_data_chars = open('chars_data', 'wb')
    file_data_train = open('training_data', 'wb')
    file_data_val = open('validation_data', 'wb')
    file_data_test = open('testing_data', 'wb')
    cp.dump(dict_data_random, file_dict_data)
    cp.dump(train_data, file_data_train)
    cp.dump(val_data, file_data_val)
    cp.dump(test_data, file_data_test)
    cp.dump(chars, file_data_chars)
    file_dict_data.close()
    file_data_train.close()
    file_data_val.close()
    file_data_test.close()
    file_data_chars.close()
    print 'Finished Creating data'

Пример #12

0

Показать файл