def infer(test_reader, window_size=5, use_cuda=False, model_path=None): """ inference function """ if model_path is None or not os.path.exists(model_path): print(str(model_path) + " cannot be found") return # get the reverse dict # and define the index of interest word in the window # (mast the same as index of train ) reverse_word_dict = reverse_dict(word_dict) reverse_lbl_dict = reverse_dict(lbl_dict) interest_index = int(window_size / 2) # define the input layers data = fluid.layers.data( name="words", shape=[1], dtype="int64", lod_level=1) # init paddlepaddle place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[data], place=place) inference_scope = fluid.core.Scope() with fluid.scope_guard(inference_scope): [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(model_path, exe) for data_ in test_reader(): # get the words index and words in char format words_index = [[d[0]] for d in data_] words = [reverse_word_dict[d[0][interest_index]] for d in data_] # use the infer to predict prediction = exe.run(inference_program, feed=feeder.feed(words_index), fetch_list=fetch_targets, return_numpy=True) # get the label tag and the prediction tag label_tag = [reverse_lbl_dict[d[1]] for d in data_] prediction_tag = [ reverse_lbl_dict[p.argmax()] for p in prediction[0] ] # get the source string and prediction string of POS work source_POS = " ".join( ["/".join(items) for items in zip(words, label_tag)]) prediction_POS = " ".join( ["/".join(items) for items in zip(words, prediction_tag)]) # print the result for compare print("%s\ns_POS = %s\np_POS = %s" % ("-" * 40, source_POS, prediction_POS))
def load(self, path, features='BoW', match_avitm=True): if path[:2] == '~/': path = os.path.join(os.path.expanduser(path[:2]), path[2:]) ### Specify the file locations train_path = path + '/train.npz' dev_path = path + '/dev.npz' test_path = path + '/test.npz' vocab_path = path + '/train.vocab.json' ### Load train train_csr = load_sparse(train_path) train = np.array(train_csr.todense()).astype('float32') ### Load dev self.dev_counts = load_sparse(dev_path).tocsc() # will be used for NPMI ### Load test test_csr = load_sparse(test_path) test = np.array(test_csr.todense()).astype('float32') ### load vocab # ENCODING = "ISO-8859-1" ENCODING = "utf-8" with open(vocab_path, encoding=ENCODING) as f: vocab_list = json.load(f) # construct maps vocab2dim = dict(zip(vocab_list, range(len(vocab_list)))) dim2vocab = reverse_dict(vocab2dim) return [train, None, test, None, None, None], [None, None, None], [vocab2dim, dim2vocab, None, None]
def load(self, path='./data/wikitext-103', features='BoW', match_avitm=True): if path[:2] == '~/': path = os.path.join(os.path.expanduser(path[:2]), path[2:]) ### Specify the file locations train_path = path + '/wikitext-103_tra.csr.npz' test_path = path + '/wikitext-103_test.csr.npz' vocab_path = path + '/vocab.txt' ### Load train train_csr = sparse.load_npz(train_path) train = np.array(train_csr.todense()).astype('float32') ### Load test test_csr = sparse.load_npz(test_path) test = np.array(test_csr.todense()).astype('float32') ### load vocab ENCODING = "ISO-8859-1" # ENCODING = "utf-8" with open(vocab_path, encoding=ENCODING) as f: vocab_list = [line.strip('\n') for line in f] # construct maps vocab2dim = dict(zip(vocab_list, range(len(vocab_list)))) dim2vocab = reverse_dict(vocab2dim) return [train, None, test, None, None, None], [None, None, None], [vocab2dim, dim2vocab, None, None]
def load(self, path='./nytimes-pbr', features='BoW', match_avitm=False): if path[:2] == '~/': path = os.path.join(os.path.expanduser(path[:2]), path[2:]) ### Specify the file locations train_path = path + '/input/data/train_tra/nytimes-pbr_tra.csr.npz' test_path = path + '/input/data/validation_np/validation_data.csr.npz' vocab_path = path + '/vocab.nytimes.txt' ### Load train # train_csr = sparse.load_npz(train_path) # train = np.array(train_csr.todense()).astype('float32') train = sparse.load_npz(train_path).astype('float32') train = mx.nd.sparse.csr_matrix(train, dtype='float32') ### Load test # test_csr = sparse.load_npz(test_path) # test = np.array(test_csr.todense()).astype('float32') test = sparse.load_npz(test_path).astype('float32') test = mx.nd.sparse.csr_matrix(test, dtype='float32') ### load vocab ENCODING = "ISO-8859-1" # ENCODING = "utf-8" with open(vocab_path, encoding=ENCODING) as f: vocab_list = [line.strip('\n') for line in f] # construct maps vocab2dim = dict(zip(vocab_list, range(len(vocab_list)))) dim2vocab = reverse_dict(vocab2dim) return [train, None, test, None, None, None], [None, None, None], [vocab2dim, dim2vocab, None, None]
def load(self, path='~/20news_sklearn', features='BoW', match_avitm=True): if path[:2] == '~/': path = os.path.join(os.path.expanduser(path[:2]), path[2:]) ### Specify the file locations train_path = path + '/train_sklearn_avitm.npy' train_labels_path = path + '/train_labels_sklearn_avitm.npy' test_path = path + '/test_sklearn_avitm.npy' test_labels_path = path + '/test_labels_sklearn_avitm.npy' vocab_path = path + '/vocab.txt' label_names_path = path + '/label_names.txt' ### Load train train = np.load(train_path).astype('float32') if train_labels_path: train_labels = np.load(train_labels_path) else: train_labels = None ### Load train test = np.load(test_path).astype('float32') if test_labels_path: test_labels = np.load(test_labels_path) else: test_labels = None ### load vocab ENCODING = "ISO-8859-1" # ENCODING = "utf-8" with open(vocab_path, encoding=ENCODING) as f: vocab_list = [line.strip('\n') for line in f] ### Load label names if label_names_path: with open(label_names_path, encoding=ENCODING) as f: label_name_list = [line.strip('\n') for line in f] else: label_name_list = None # construct maps vocab2dim = dict(zip(vocab_list, range(len(vocab_list)))) dim2vocab = reverse_dict(vocab2dim) topic2dim = dict(zip(label_name_list, range(len(label_name_list)))) dim2topic = reverse_dict(topic2dim) return [train, None, test, None, None, None], [train_labels, None, test_labels], [vocab2dim, dim2vocab, topic2dim, dim2topic]
def load(self, data_path, features='BoW', match_avitm=True): ### Specify the file locations train_path = data_path + '/train.npz' dev_path = data_path + '/dev.npz' test_path = data_path + '/test.npz' vocab_path = data_path + '/train.vocab.json' ### Load train train_csr = load_sparse(train_path) train_counts = np.array(train_csr.todense()).astype('float32') train_bert_logits = np.load(self.logit_path + "/train.npy") train = np.concatenate([train_counts, train_bert_logits], axis=1) if self.logit_clip is not None: # limit the document representations to the top k labels doc_tokens = np.sum(train_counts > 0, axis=1) vocab_size = train_counts.shape[1] for i, (row, total) in enumerate(zip(train_bert_logits, doc_tokens)): k = self.logit_clip * total # keep this many logits if k < vocab_size: min_logit = np.quantile(row, 1 - k / vocab_size) train_bert_logits[ i, train_bert_logits[i] < min_logit] = -np.inf #min_logits = np.quantile(train_bert_logits, np.quantile(train_counts.sum(1), 0.9) / 20_000, axis=1) #train_bert_logits[train_bert_logits < min_logits.reshape(-1, 1)] = -np.inf ### Load dev self.dev_counts = load_sparse( dev_path).tocsc() # will be used for NPMI ### Load test test_csr = load_sparse(test_path) test_counts = np.array(test_csr.todense()).astype('float32') test_bert_logits = np.ones_like(test_counts) test = np.concatenate([test_counts, test_bert_logits], axis=1) ### load vocab # ENCODING = "ISO-8859-1" ENCODING = "utf-8" with open(vocab_path, encoding=ENCODING) as f: vocab_list = json.load(f) # construct maps vocab2dim = dict(zip(vocab_list, range(len(vocab_list)))) dim2vocab = reverse_dict(vocab2dim) return [train, None, test, None, None, None], [None, None, None], [vocab2dim, dim2vocab, None, None]
def get_display_states(component): """ get configured display states from trac.ini if configured [testmanager] passed = foo passed_comment = bar failed = doh skipped = n not_tested = - """ states = dict( [option for option in component.config.options('testmanager')] ) if not states or not len(states) == len(STATES_DISPLAY): return reverse_dict(STATES_DISPLAY) return states
def __init__(self, language="american_english", min_word_size=3, config="config.json", print_search_progress=False): self.digit_map = utils.get_digit_map(file=config) self.char_map = utils.reverse_dict(self.digit_map) allowed_languages = utils.get_language_map(file=config) self.csp_solver = CspSolver( config=config, language=allowed_languages[language], print_search_progress=print_search_progress) self.min_word_size = min_word_size
def load(self, path='./yelp_review_polarity_csv', features='BoW', match_avitm=False): if path[:2] == '~/': path = os.path.join(os.path.expanduser(path[:2]), path[2:]) ### Specify the file locations train_path = path + '/yelp_review_polarity_csv_train.npz' val_path = path + '/yelp_review_polarity_csv_val.npz' test_path = path + '/yelp_review_polarity_csv_test.npz' vocab_path = path + '/vocab.txt' ### Load train # train_csr = sparse.load_npz(train_path) # train = np.array(train_csr.todense()).astype('float32') train = sparse.load_npz(train_path).astype('float32') train = mx.nd.sparse.csr_matrix(train, dtype='float32') ### Load val val = sparse.load_npz(val_path).astype('float32') val = mx.nd.sparse.csr_matrix(val, dtype='float32') ### Load test # test_csr = sparse.load_npz(test_path) # test = np.array(test_csr.todense()).astype('float32') test = sparse.load_npz(test_path).astype('float32') test = mx.nd.sparse.csr_matrix(test, dtype='float32') ### load vocab # ENCODING = "ISO-8859-1" ENCODING = "utf-8" with open(vocab_path, encoding=ENCODING) as f: vocab_list = [line.strip('\n') for line in f] # construct maps vocab2dim = dict(zip(vocab_list, range(len(vocab_list)))) dim2vocab = reverse_dict(vocab2dim) return [train, val, test, None, None, None], [None, None, None], [vocab2dim, dim2vocab, None, None]
def align_records(self, r1, r2): """ partial align DOM tree list to another DOM tree list. e.g. (taken from [1]): >>> from lxml.html import fragment_fromstring >>> from .mdr import Record >>> pta = PartialTreeAligner() 1. "flanked by 2 sibling nodes" >>> t1 = fragment_fromstring("<p> <a></a> <b></b> <e></e> </p>") >>> t2 = fragment_fromstring("<p> <b></b> <c></c> <d></d> <e></e> </p>") >>> _, _, mapping = pta.align_records(Record(t1), Record(t2)) >>> [e.tag for e in t1] ['a', 'b', 'c', 'd', 'e'] >>> sorted([e.tag for e in mapping.itervalues()]) ['b', 'c', 'd', 'e', 'p'] 2. "rightmost nodes" >>> t1 = fragment_fromstring("<p> <a></a> <b></b> <e></e> </p>") >>> t2 = fragment_fromstring("<p> <e></e> <f></f> <g></g> </p>") >>> _, _, mapping = pta.align_records(Record(t1), Record(t2)) >>> [e.tag for e in t1] ['a', 'b', 'e', 'f', 'g'] >>> sorted([e.tag for e in mapping.itervalues()]) ['e', 'f', 'g', 'p'] 3. "leftmost nodes" >>> t1 = fragment_fromstring("<p> <a></a> <b></b> <e></e> </p>") >>> t2 = fragment_fromstring("<p> <f></f> <g></g> <a></a> </p>") >>> _, _, mapping = pta.align_records(Record(t1), Record(t2)) >>> [e.tag for e in t1] ['f', 'g', 'a', 'b', 'e'] >>> sorted([e.tag for e in mapping.itervalues()]) ['a', 'f', 'g', 'p'] 4. "no unique insertion" >>> t1 = fragment_fromstring("<p> <a></a> <b></b> <e></e> </p>") >>> t2 = fragment_fromstring("<p> <a></a> <g></g> <e></e> </p>") >>> _, _, mapping = pta.align_records(Record(t1), Record(t2)) >>> [e.tag for e in t1] ['a', 'b', 'e'] >>> sorted([e.tag for e in mapping.itervalues()]) ['a', 'e', 'p'] 5. "multiple unaligned nodes" >>> t1 = fragment_fromstring("<p> <x></x> <b></b> <d></d> </p>") >>> t2 = fragment_fromstring("<p> <b></b> <c></c> <d></d> <h></h> <k></k> </p>") >>> _, _, mapping = pta.align_records(Record(t1), Record(t2)) >>> [e.tag for e in t1] ['x', 'b', 'c', 'd', 'h', 'k'] >>> sorted([e.tag for e in mapping.itervalues()]) ['b', 'c', 'd', 'h', 'k', 'p'] References ---------- [1] Web Data Extraction Based on Partial Tree Alignment <http://dl.acm.org/citation.cfm?id=1060761> """ alignment = self.sta.align_records(r1, r2) aligned = {alignment.first: alignment.second} for sub in alignment.subs: aligned[sub.first] = sub.second # add reverse mapping too reverse_aligned = reverse_dict(aligned) modified = False unaligned_elements = self.find_unaligned_elements(aligned, r2) for l in unaligned_elements: left_most = l[0] right_most = l[-1] prev_sibling = left_most.getprevious() next_sibling = right_most.getnext() if prev_sibling is None: if next_sibling is not None: # leftmost alignment next_sibling_match = reverse_aligned.get(next_sibling, None) for i, element in enumerate(l): element_copy = copy.deepcopy(element) next_sibling_match.getparent().insert(i, element_copy) aligned.update({element_copy: element}) modified = True elif next_sibling is None: # rightmost alignment prev_sibling_match = reverse_aligned.get(prev_sibling, None) previous_match_index = self._get_index(prev_sibling_match) # unique insertion for i, element in enumerate(l): element_copy = copy.deepcopy(element) prev_sibling_match.getparent().insert(previous_match_index + 1 + i, element_copy) aligned.update({element_copy: element}) modified = True else: # flanked by two sibling elements prev_sibling_match = reverse_aligned.get(prev_sibling, None) next_sibling_match = reverse_aligned.get(next_sibling, None) if prev_sibling_match is not None and next_sibling_match is not None: next_match_index = self._get_index(next_sibling_match) previous_match_index = self._get_index(prev_sibling_match) if next_match_index - previous_match_index == 1: # unique insertion for i, element in enumerate(l): element_copy = copy.deepcopy(element) prev_sibling_match.getparent().insert(previous_match_index + 1 + i, element_copy) aligned.update({element_copy: element}) modified = True return modified, len(unaligned_elements) > 0, aligned
def infer(test_reader, use_cuda=False, model_path=None): """ inference function """ if model_path is None or not os.path.exists(model_path): print(str(model_path) + " cannot be found") return # get the reverse dict # and define the index of interest word in the window # (mast the same as index of train ) reverse_word_dict = reverse_dict(word_dict) # define the input layers hidden = fluid.layers.data(name="hidden", shape=[4096], dtype="float32") cell = fluid.layers.data(name="cell", shape=[4096], dtype="float32") pre_word = fluid.layers.data(name="pre_words", shape=[1], dtype="int64") # init paddlepaddle place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[hidden, cell, pre_word], place=place) inference_scope = fluid.core.Scope() ## start_word_id = word_dict["__start__"] end_word_id = word_dict["__end__"] with fluid.scope_guard(inference_scope): [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(model_path, exe) for data_ in test_reader(): # get the words index and words in char format # words_index = [d[0] for d in data_] words = [reverse_word_dict[d] for d in data_[1]] img_feat, word_list = data_ prev_hidden_, prev_cell_, prediction = img_feat, img_feat, start_word_id prediction_list = [] ## # use the infer to predict for ii in range(MAX_LEN): if ii == 0: # data_lstm = [(start_word_id, start_word_id, img_feat, img_feat)] data_lstm = [[prev_hidden_, prev_cell_, start_word_id]] prediction, prev_hidden, prev_cell = exe.run( inference_program, feed=feeder.feed(data_lstm), fetch_list=fetch_targets, return_numpy=True) prediction = prediction[0].argmax() else: # pre_words = word_list[ii - 1] data_lstm = [[prev_hidden_, prev_cell_, prediction]] prediction, prev_hidden, prev_cell = exe.run( inference_program, feed=feeder.feed(data_lstm), fetch_list=fetch_targets, return_numpy=True) prediction = prediction[0].argmax() prediction_list.append(prediction) if prediction == end_word_id: break prediction_tag = [reverse_word_dict[p] for p in prediction_list] prediction_words = " ".join(prediction_tag) source_words = " ".join(words) # print the result for compare print("%s\ns_POS = %s\np_POS = %s" % ("-" * 40, source_words, prediction_words))
def align_records(self, r1, r2): """ partial align DOM tree list to another DOM tree list. e.g. (taken from [1]): >>> from lxml.html import fragment_fromstring >>> from .mdr import Record >>> pta = PartialTreeAligner() 1. "flanked by 2 sibling nodes" >>> t1 = fragment_fromstring("<p> <a></a> <b></b> <e></e> </p>") >>> t2 = fragment_fromstring("<p> <b></b> <c></c> <d></d> <e></e> </p>") >>> _, _, mapping = pta.align_records(Record(t1), Record(t2)) >>> [e.tag for e in t1] ['a', 'b', 'c', 'd', 'e'] >>> sorted([e.tag for e in mapping.itervalues()]) ['b', 'c', 'd', 'e', 'p'] 2. "rightmost nodes" >>> t1 = fragment_fromstring("<p> <a></a> <b></b> <e></e> </p>") >>> t2 = fragment_fromstring("<p> <e></e> <f></f> <g></g> </p>") >>> _, _, mapping = pta.align_records(Record(t1), Record(t2)) >>> [e.tag for e in t1] ['a', 'b', 'e', 'f', 'g'] >>> sorted([e.tag for e in mapping.itervalues()]) ['e', 'f', 'g', 'p'] 3. "leftmost nodes" >>> t1 = fragment_fromstring("<p> <a></a> <b></b> <e></e> </p>") >>> t2 = fragment_fromstring("<p> <f></f> <g></g> <a></a> </p>") >>> _, _, mapping = pta.align_records(Record(t1), Record(t2)) >>> [e.tag for e in t1] ['f', 'g', 'a', 'b', 'e'] >>> sorted([e.tag for e in mapping.itervalues()]) ['a', 'f', 'g', 'p'] 4. "no unique insertion" >>> t1 = fragment_fromstring("<p> <a></a> <b></b> <e></e> </p>") >>> t2 = fragment_fromstring("<p> <a></a> <g></g> <e></e> </p>") >>> _, _, mapping = pta.align_records(Record(t1), Record(t2)) >>> [e.tag for e in t1] ['a', 'b', 'e'] >>> sorted([e.tag for e in mapping.itervalues()]) ['a', 'e', 'p'] 5. "multiple unaligned nodes" >>> t1 = fragment_fromstring("<p> <x></x> <b></b> <d></d> </p>") >>> t2 = fragment_fromstring("<p> <b></b> <c></c> <d></d> <h></h> <k></k> </p>") >>> _, _, mapping = pta.align_records(Record(t1), Record(t2)) >>> [e.tag for e in t1] ['x', 'b', 'c', 'd', 'h', 'k'] >>> sorted([e.tag for e in mapping.itervalues()]) ['b', 'c', 'd', 'h', 'k', 'p'] References ---------- [1] Web Data Extraction Based on Partial Tree Alignment <http://dl.acm.org/citation.cfm?id=1060761> """ alignment = self.sta.align_records(r1, r2) aligned = {alignment.first: alignment.second} for sub in alignment.subs: aligned[sub.first] = sub.second # add reverse mapping too reverse_aligned = reverse_dict(aligned) modified = False unaligned_elements = self.find_unaligned_elements(aligned, r2) for l in unaligned_elements: left_most = l[0] right_most = l[-1] prev_sibling = left_most.getprevious() next_sibling = right_most.getnext() if prev_sibling is None: if next_sibling is not None: # leftmost alignment next_sibling_match = reverse_aligned.get( next_sibling, None) for i, element in enumerate(l): element_copy = copy.deepcopy(element) next_sibling_match.getparent().insert(i, element_copy) aligned.update({element_copy: element}) modified = True elif next_sibling is None: # rightmost alignment prev_sibling_match = reverse_aligned.get(prev_sibling, None) previous_match_index = self._get_index(prev_sibling_match) # unique insertion for i, element in enumerate(l): element_copy = copy.deepcopy(element) prev_sibling_match.getparent().insert( previous_match_index + 1 + i, element_copy) aligned.update({element_copy: element}) modified = True else: # flanked by two sibling elements prev_sibling_match = reverse_aligned.get(prev_sibling, None) next_sibling_match = reverse_aligned.get(next_sibling, None) if prev_sibling_match is not None and next_sibling_match is not None: next_match_index = self._get_index(next_sibling_match) previous_match_index = self._get_index(prev_sibling_match) if next_match_index - previous_match_index == 1: # unique insertion for i, element in enumerate(l): element_copy = copy.deepcopy(element) prev_sibling_match.getparent().insert( previous_match_index + 1 + i, element_copy) aligned.update({element_copy: element}) modified = True return modified, len(unaligned_elements) > 0, aligned
import pandas as pd import numpy as np import snap import community import random G, G_weighted = utils.loadGraphs() R_t, G_2, layers_2 = utils.hicode(G_weighted, 2) print("hicode 2", R_t) nx.write_weighted_edgelist(G_weighted, 'results/G2_weighted.edgelist') nodes_mapping = utils.load_nodes_mapping() partitions = [] for num_layer, layer in enumerate(layers_2): print("community_count", len(layer)) revised_community_count = 0 for i, subgraph in enumerate(layer): if len(subgraph.nodes) > 100: revised_community_count += 1 print("revised_community_count", revised_community_count) partition = utils.layer_to_partition(layer, G_weighted) print("layer number ", num_layer + 1) print(utils.modularity(partition, G_weighted)) print(utils.modularity(partition, G_2)) partitions.append(partition) reverse_comms = utils.reverse_dict(partition) utils.write_results_to_file(reverse_comms, nodes_mapping, "layer_" + str(num_layer + 1))
def exportBio2RDFFeature(): fin = open(const.BIO2RDF_DRUG_TRIPLE_PATH) featureMap = dict() featureCount = dict() dDrug2Bio2RDFFeature = dict() currentDrug = "" currentBio2RDFFeature = [] while True: line = fin.readline() if line == "": #fout.write("%s|%s\n" % (currentDrug, ",".join(int2StringArray(currentBio2RDFFeature)))) dDrug2Bio2RDFFeature[currentDrug] = currentBio2RDFFeature break parts = line.strip().split("\t") if len(parts) != 3: print("Error") print(line) exit(-1) drugId = parts[0] if drugId != currentDrug: if currentDrug != "": #fout.write("%s|%s\n" % (currentDrug, ",".join(int2StringArray(currentBio2RDFFeature)))) dDrug2Bio2RDFFeature[currentDrug] = currentBio2RDFFeature currentDrug = drugId currentBio2RDFFeature = [] predicate = parts[1] obj = parts[2] isSkipped = False for skipPattern in PREDICATE_SKIP_PATTERNS: if predicate.__contains__(skipPattern): isSkipped = True break if isSkipped: continue feature = "%s|%s" % (predicate, obj) featureId = utils.get_update_dict_index(featureMap, feature) utils.add_dict_counter(featureCount, featureId) currentBio2RDFFeature.append(featureId) fin.close() #sorted = utils.sort_dict(featureCount) #print (sorted[-10:]) newFeatureMap = dict() for featureId, cout in featureCount.items(): if cout < MIN_FEATURE_COUNT: continue utils.get_update_dict_index(newFeatureMap, featureId) print("After filtering: ", len(newFeatureMap)) fout = open(const.BIO2RDF_FEATURE_PATH, "w") for drugId, features in dDrug2Bio2RDFFeature.items(): newFeatureAr = [] for feature in features: newFeatureId = utils.get_dict(newFeatureMap, feature, -1) if newFeatureId != -1: newFeatureAr.append(newFeatureId) strArr = int2StringArray(newFeatureAr) fout.write("%s|%s\n" % (drugId, ",".join(strArr))) fout.close() fout = open("%s_Feature" % const.BIO2RDF_FEATURE_PATH, "w") revertNewFeatureMap = utils.reverse_dict(newFeatureMap) revertOldFeatuerMap = utils.reverse_dict(featureMap) for newFeatureMapId, oldFeatureMapId in revertNewFeatureMap.items(): fout.write("%s|%s\n" % (newFeatureMapId, revertOldFeatuerMap[oldFeatureMapId])) fout.close() fout = open(const.BIO2RDF_INFO, "w") fout.write("Num feature: %s\n" % len(newFeatureMap)) fout.close()
coord.join(threads) def write_dict(): cs = open("resource/gb2312_list.txt", 'r').read() index = 134 with open("resource/new_dic2.txt", 'a') as f: for c in cs: f.write("%d\t%c\n" % (index, c)) index = index + 1 # python gen_record_crnn.py --dataset_name=train --dataset_dir=out --dataset_nums=10000 --output_dir=datasets/vgg_train if __name__ == '__main__': chinese_dict = read_dict(FLAGS.dict_text) chinese_dict_ids = reverse_dict(chinese_dict) # print([chinese_dict[code] for code in "你好呀!"]) # print([chinese_dict_ids[code] for code in [chinese_dict[code] for code in "你好呀!"]]) # make_tfrecord2(chinese_dict, FLAGS.dataset_name, FLAGS.dataset_nums) # write_dict() # words = open("resource/gb2312_list.txt", 'r').read() # print(words) parse_tfrecord_file() # # import datasets # print(getattr(datasets, "my_data"))