def build_and_eval(): utility.make_dir(OUTPUT_DIR) print('Building lexicon') poli_lexicon = list(lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE)) sost_lexicon = list(lexicon.loadLexiconFromFile(corpora.DIZIONARIO_SOSTANTIVI_AUGMENTED_PAISA_FILE)) agg_lexicon = list(lexicon.loadLexiconFromFile(corpora.DIZIONARIO_AGGETTIVI_AUGMENTED_PAISA_FILE)) lex_set = set(poli_lexicon+sost_lexicon+agg_lexicon) lex_solution_set = set(sost_lexicon+agg_lexicon) lexicon.printLexiconToFile(lex_set, LEX_FREQ_FILE) lexicon.printLexiconToFile(lex_solution_set, SOLUTION_LEX_FREQ_FILE) print('Computing coverage') scorer.computeCoverageOfGameWordLex(lex_set, lex_solution_set, corpora.GAME_SET_100_FILE, COVERAGE_WORD_GAME100_FILE) print('Building association matrix') matrix = matrix_dict.Matrix_Dict(lex_set, lex_solution_set) matrix.add_patterns_from_corpus(corpora.PAISA_RAW_INFO) matrix.compute_association_scores() matrix.write_matrix_to_file(MATRIX_FILE) print('Eval') scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.GAME_SET_100_FILE, EVAL_WORD_GAME100_FILE) scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.NLP4FUN_DEV_TSV_v2_tv_FILE, EVAL_NLP4FUN_DEV_TV_FILE) scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.NLP4FUN_DEV_TSV_v2_bg_FILE, EVAL_NLP4FUN_DEV_BG_FILE)
def build_and_eval(): utility.make_dir(OUTPUT_DIR) print('\nBuilding lexicon') lex_set = lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE) lex_solution_set = lex_set ''' poli_lexicon = list(lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE)) sost_lexicon = list(corpora.getSostantiviSetFromPaisa(min_freq=100, inflected=True)) print('\nSize of sostantivi lex: {}'.format(len(sost_lexicon))) agg_lexicon = list(corpora.getAggettiviSetFromPaisa(min_freq=100, inflected=True)) print('\nSize of agg lex: {}'.format(len(agg_lexicon))) lex_set = set(poli_lexicon + sost_lexicon + agg_lexicon) lex_solution_set = set(sost_lexicon+agg_lexicon) #lex_solution_set = lex_set ''' print('\nComputing lex coverage') scorer.computeCoverageOfGameWordLex(lex_set, lex_solution_set, corpora.GAME_SET_100_FILE, COVERAGE_WORD_GAME100_FILE) print('\nBuilding association matrix') matrix = Matrix(lex_set, lex_solution_set) matrix.add_patterns_from_corpus(corpora.DE_MAURO_POLIREMATICHE_INFO) corpora.addBigramFromPolirematicheInMatrix(matrix, weight=1) #corpora.addBigramFromCompunds(matrix, lex_set, min_len=4, weight=10) matrix.compute_association_scores() matrix.write_matrix_to_file(MATRIX_FILE) print('\nEval') scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.GAME_SET_100_FILE, EVAL_WORD_GAME100_FILE)
def __init__(self, model, optimizer, gpu=-1, save_path='./', load_model=None, train_transform=None, test_transform=None, train_batch_size=64, test_batch_size=256, start_epoch=1, epochs=200, seed=1): self.model, self.optimizer = model, optimizer self.gpu, self.save_path, load_model = gpu, utility.remove_slash( save_path), load_model self.train_transform, self.test_transform = train_transform, test_transform self.train_batch_size, self.test_batch_size = train_batch_size, test_batch_size self.start_epoch, self.epochs, self.seed = start_epoch, epochs, seed # load mnist self.init_dataset() # initialize seed self.init_seed() # create directory utility.make_dir(save_path) # load pretrained model if possible self.load(load_model) # init log self.init_log()
def saveParameters(fileDir): # Model name 1 mean dataset`s folder 1. model_name = '1' detection_model = objectDetector.load_model(model_name) # File is directory files = utility.get_filenames(fileDir) fileNames = [] domColors = [] wallColors = [] floorColors = [] for f in files: if "." not in f: continue print("Now proceeding ", f, " [ ", files.index(f), " ]") coord, str_tag, number_tag, score = objectDetector.inference( detection_model, f) # Save file name make. save_file_name = utility.add_name(f, "_od", extension="bin") dirs = save_file_name.split("/") save_image_name = "" for d in dirs[0:-1]: save_image_name += d + "/" save_image_name += f.split("/")[-1].split(".")[0] + "/" utility.make_dir(save_image_name) rect_files = [] additional_infor = [] for i in range(len(str_tag)): additional_infor.append(-1) rect_image = image_processing.get_rect_image( f, int(coord[i][0]), int(coord[i][1]), int(coord[i][2]), int(coord[i][3])) rect_image_name = save_image_name + f.split("/")[-1] rect_image_name = utility.add_name(rect_image_name, "_" + str(i)) rect_files.append(rect_image_name) utility.save_image(rect_image, rect_image_name) dom_color = image_processing.get_dominant_color(f) n_color = utility.get_remarkable_color_n(dom_color, MAX_COLOR_LENGTH) fileNames.append(os.path.basename(f)) domColors.append(n_color) wallColors.append([]) floorColors.append([]) utility.save_result([ coord, str_tag, number_tag, score, rect_files, additional_infor, n_color ], save_file_name) utility.save_result([files, domColors, wallColors, floorColors], config.RESEARCH_BASE_FILE)
def build_and_eval(): utility.make_dir(OUTPUT_DIR) print('Building lexicon') poli_lexicon = list( lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE)) sost_lexicon = list( lexicon.loadLexiconFromFile( corpora.DIZIONARIO_SOSTANTIVI_AUGMENTED_PAISA_FILE)) agg_lexicon = list( lexicon.loadLexiconFromFile( corpora.DIZIONARIO_AGGETTIVI_AUGMENTED_PAISA_FILE)) lex_set = set(poli_lexicon + sost_lexicon + agg_lexicon) lex_solution_set = set(sost_lexicon + agg_lexicon) ''' poli_lexicon = list(lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE)) sost_lexicon = list(corpora.getSostantiviSetFromPaisa(min_freq=1000, inflected=True)) print('\nSize of sostantivi lex: {}'.format(len(sost_lexicon))) agg_lexicon = list(corpora.getAggettiviSetFromPaisa(min_freq=1000, inflected=True)) print('\nSize of agg lex: {}'.format(len(agg_lexicon))) lex_set = set(poli_lexicon + sost_lexicon + agg_lexicon) ''' lexicon.printLexiconToFile(lex_set, LEX_FREQ_FILE) lexicon.printLexiconToFile(lex_solution_set, SOLUTION_LEX_FREQ_FILE) print('Computing lex coverage') scorer.computeCoverageOfGameWordLex(lex_set, lex_solution_set, corpora.GAME_SET_100_FILE, COVERAGE_WORD_GAME100_FILE) print('Building association matrix') matrix = Matrix_Dict(lex_set, lex_solution_set) matrix.add_patterns_from_corpus(corpora.PAISA_RAW_INFO) matrix.add_patterns_from_corpus(corpora.DE_MAURO_POLIREMATICHE_INFO, weight=DE_MAURO_WEIGHT) matrix.add_patterns_from_corpus(corpora.PROVERBI_INFO, weight=PROVERBI_WEIGHT) matrix.add_patterns_from_corpus(corpora.ITWAC_RAW_INFO, weight=1) matrix.add_patterns_from_corpus(corpora.WIKI_IT_TITLES_INFO, weight=WIKI_IT_WEIGHT) #matrix.add_patterns_from_corpus(corpora.WIKI_IT_TEXT_INFO, weight=1) corpora.addBigramFromPolirematicheInMatrix(matrix, DE_MAURO_WEIGHT) corpora.addBigramFromCompunds(matrix, lex_set, min_len=4, weight=COMPOUNDS_WEIGHT) matrix.compute_association_scores() matrix.write_matrix_to_file(MATRIX_FILE) print('Eval') scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.GAME_SET_100_FILE, EVAL_WORD_GAME100_FILE)
def saveParameter(fileName, detection_model): coord, str_tag, number_tag, score = objectDetector.inference( detection_model, fileName) # Save file name make. save_file_name = config.RESEARCH_BASE_DIR + "/" + os.path.basename( utility.get_od_bin(fileName)) dirs = save_file_name.split("/") save_image_name = "" for d in dirs[0:-1]: save_image_name += d + "/" save_image_name += fileName.split("/")[-1].split(".")[0] + "/" utility.make_dir(save_image_name) rect_files = [] additional_infor = [] for i in range(len(str_tag)): additional_infor.append(-1) rect_image = image_processing.get_rect_image(fileName, int(coord[i][0]), int(coord[i][1]), int(coord[i][2]), int(coord[i][3])) rect_image_name = save_image_name + fileName.split("/")[-1] rect_image_name = utility.add_name(rect_image_name, "_" + str(i)) rect_files.append(rect_image_name) utility.save_image(rect_image, rect_image_name) dom_color = image_processing.get_dominant_color(fileName) n_color = utility.get_remarkable_color_n(dom_color, MAX_COLOR_LENGTH) utility.save_result([ coord, str_tag, number_tag, score, rect_files, additional_infor, n_color ], save_file_name) return [ coord, str_tag, number_tag, score, rect_files, additional_infor, n_color ]
def build(): utility.make_dir(OUTPUT_DIR) print('Building lexicon') poli_lexicon = list( lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE)) sost_lexicon = list( lexicon.loadLexiconFromFile( corpora.DIZIONARIO_SOSTANTIVI_AUGMENTED_PAISA_FILE)) agg_lexicon = list( lexicon.loadLexiconFromFile( corpora.DIZIONARIO_AGGETTIVI_AUGMENTED_PAISA_FILE)) lex_set = set(poli_lexicon + sost_lexicon + agg_lexicon) lex_solution_set = set(sost_lexicon + agg_lexicon) lexicon.printLexiconToFile(lex_set, LEX_FREQ_FILE) lexicon.printLexiconToFile(lex_solution_set, SOLUTION_LEX_FREQ_FILE) def add_patterns_from_corpus(corpus_info): lines_extractor = corpora.extract_lines(corpus_info) source = corpus_info['name'] patterns_count = 0 print("Adding patterns from source: {}".format(source)) tot_lines = corpus_info['lines'] for n, line in enumerate(lines_extractor, 1): patterns_count += patterns_extraction.addPatternsFromLineInMongo( line, lex_set, source) if n % 1000 == 0: sys.stdout.write("Progress: {0:.1f}%\r".format( float(n) * 100 / tot_lines)) sys.stdout.flush() print('Extracted patterns: {}'.format(patterns_count)) # print('Computing lex coverage') # scorer.computeCoverageOfGameWordLex(lex_set, lex_solution_set, corpora.GAME_SET_100_FILE, COVERAGE_WORD_GAME100_FILE) print('Adding patterns in db') add_patterns_from_corpus(corpora.DE_MAURO_POLIREMATICHE_INFO)
def build_and_eval(): utility.make_dir(OUTPUT_DIR) print('Building lexicon') lex_set = lexicon.loadLexiconFromFile(corpora.DIZ_POLI_WORD_SORTED_FILE) lexicon_freq = {w: 1 for w in lex_set} #lex_solution_set = lexicon.loadLexiconFromFile(corpora.DIZIONARIO_BASE_SOSTANTIVI_FILE) print('Lex size: {}'.format(len(lex_set))) lexicon.printLexFreqToFile(lexicon_freq, LEX_FREQ_FILE) print('Computing coverage') scorer.computeCoverageOfGameWordLex(lexicon_freq, corpora.GAME_SET_100_FILE, COVERAGE_WORD_GAME100_FILE) print('Building association matrix') matrix = matrix_dict.Matrix_Dict(lex_set=lex_set) # lex_solution_set matrix.add_patterns_from_corpus(corpora.PAISA_RAW_INFO) matrix.add_patterns_from_corpus(corpora.DE_MAURO_POLIREMATICHE_INFO, weight=DE_MAURO_WEIGHT) corpora.addBigramFromPolirematicheInMatrix(matrix, DE_MAURO_WEIGHT) matrix.compute_association_scores() matrix.write_matrix_to_file(MATRIX_FILE) print('Eval') scorer.evaluate_kbest_MeanReciprocalRank(matrix, corpora.GAME_SET_100_FILE, EVAL_WORD_GAME100_FILE)
def objectDetect(inputFile, outputFile): ''' 입력받은 inputFile의 가구를 ObjectDetection한 결과를 outputFile에 저장한다. json 형태로 저장한다. 현재는 bin file로만 입출력이 가능. 폴더를 입력하면 outputFile은 무시됨. ''' if "." not in inputFile: # File is directory files = utility.get_filenames(inputFile) for f in files: if "." not in f: continue coord, str_tag, number_tag, score = objectDetector.inference( detection_model, f) # Save file name make. save_file_name = utility.add_name(f, "_od", extension="bin") dirs = save_file_name.split("/") save_image_name = "" for d in dirs[0:-1]: save_image_name += d + "/" save_image_name += f.split("/")[-1].split(".")[0] + "/" utility.make_dir(save_image_name) rect_files = [] additional_infor = [] for i in range(len(str_tag)): additional_infor.append(-1) rect_image = image_processing.get_rect_image( f, int(coord[i][0]), int(coord[i][1]), int(coord[i][2]), int(coord[i][3])) rect_image_name = save_image_name + f.split("/")[-1] rect_image_name = utility.add_name(rect_image_name, "_" + str(i)) rect_files.append(rect_image_name) utility.save_image(rect_image, rect_image_name) utility.save_result([ coord, str_tag, number_tag, score, rect_files, additional_infor ], save_file_name) else: coord, str_tag, number_tag, score = objectDetector.inference( detection_model, inputFile) # Save file name make. save_file_name = utility.add_name(inputFile, "_od", extension="bin") dirs = save_file_name.split("/") save_image_name = "" for d in dirs[0:-1]: save_image_name += d + "/" save_image_name += inputFile.split("/")[-1].split(".")[0] + "/" utility.make_dir(save_image_name) rect_files = [] additional_infor = [] for i in range(len(str_tag)): additional_infor.append(-1) rect_image = image_processing.get_rect_image( inputFile, int(coord[i][0]), int(coord[i][1]), int(coord[i][2]), int(coord[i][3])) rect_image_name = save_image_name + inputFile.split("/")[-1] rect_image_name = utility.add_name(rect_image_name, "_" + str(i)) rect_files.append(rect_image_name) utility.save_image(rect_image, rect_image_name) utility.save_result( [coord, str_tag, number_tag, score, rect_files, additional_infor], outputFile)
def _generate_file_names(dataframe_dir): make_dir(dataframe_dir) return (os.path.join(dataframe_dir, 'train_sampled.pkl'), os.path.join(dataframe_dir, 'dev.pkl'), os.path.join(dataframe_dir, 'cost_versus_epoch.pkl'))
responsePattern.update(**responsePatternIn) TheLogger.debug("Incomming response pattern: \n" + \ json_dumps(responsePattern)) pyData_to_json_file(responsePattern, pathRespPatternFile) except Exception as e: TheLogger.error(str(e)) if __name__ == "__main__": """ Main enter. """ pathTempDataDir = path_join(PATH_ROOT, "temp") pathRespPatternFile = path_join(pathTempDataDir, "response_pattern.json") make_dir(pathTempDataDir) TheLogger.init(pathTempDataDir, "server.log") mode = sys.argv[1] if mode == "start": serverHost = "127.0.0.1" try: serverPort = int(sys.argv[2]) except Exception as e: print("Error: specify port correctly.") start(serverHost, serverPort) elif mode == "set_response_pattern": pathRespPatternFileIn = sys.argv[2] flagAddData = bool(int(sys.argv[3])) set_response_pattern(pathRespPatternFileIn, flagAddData) elif mode == "get_response_pattern":