def parse_params(): my_args = MyArgsWebQsp() p = dict() conf_pf = os.path.join(my_args.job_folder, 'kgt.conf') sint = bas_utils.ignore_exception(ValueError)(int) sfloat = bas_utils.ignore_exception(ValueError)(float) with open(conf_pf) as f: content = f.readlines() for next_line in content: if next_line.startswith('#') or next_line == '\n' or len( next_line) == 0: continue toks = next_line.split('=') val = toks[1].replace('\n', '') print toks[0] + '=' + str(val) if sint(val) is not None: p[toks[0]] = sint(val) continue if sfloat(val) is not None: p[toks[0]] = sfloat(val) continue p[toks[0]] = val lod_home = os.environ['LOD_HOME'] p['dp_home'] = os.path.join(lod_home, p['dp_name']) p['job_folder'] = my_args.job_folder p['job_number'] = os.path.basename(my_args.job_folder) my_args.set_details(p) print '=========================================================' return my_args
def parse_params(conf_pf): global p sint = bas_utils.ignore_exception(ValueError)(int) sfloat = bas_utils.ignore_exception(ValueError)(float) with open(conf_pf) as f: content = f.readlines() for next_line in content: if next_line.startswith('#') or next_line == '\n' or len(next_line) == 0: continue toks = next_line.split('=') val = toks[1].replace('\n', '') print toks[0] + '=' + str(val) if sint(val) is not None: p[toks[0]] = sint(val) continue if sfloat(val) is not None: p[toks[0]] = sfloat(val) continue p[toks[0]] = val lod_home = os.environ['WORK_DIR'] p['dp_home'] = os.path.join(lod_home, p['dp_name']) my_args = MyArgs(p) jf, jn = set_job_folder(os.path.join(p['dp_home'], 'model/kgrep')) p['job_folder'] = jf p['job_number'] = jn p['conf_file'] = conf_pf my_args.job_folder = jf copyfile(conf_pf, os.path.join(p['job_folder'], os.path.basename(conf_pf))) print '=========================================================' return my_args
def is_rel_id(word): sint = bas_utils.ignore_exception(ValueError)(int) is_rel = False if str(word).startswith('r'): id = sint(word[1:]) if id is not None: is_rel = True return is_rel
def get_rel_embd_mat(my_args, embedding_mat, word_idx): rel_embd_mat = dict() sint = bas_utils.ignore_exception(ValueError)(int) for w in word_idx.keys(): if not str(w).startswith('r') or sint(w[1:]) is None: continue ind = word_idx[w] e1 = embedding_mat[ind] e = e1 rel_embd_mat[w] = e return rel_embd_mat
def check_replace_src_ent(my_args, w='exxx'): if my_args.p['use_ere'] == 1: return False if not w.startswith('e'): return False ent_num = w[1:] sint = bas_utils.ignore_exception(ValueError)(int) en = sint(ent_num) if en is None: return False else: return True
def get_all_relids(wi): sint = bas_utils.ignore_exception(ValueError)(int) relid_set = set() for w in wi.keys(): if not str(w).startswith('r'): continue id = w[1:] num = sint(id) if num is None: continue relid_set.add(w) return relid_set
def parse_params(conf_pf): p = dict() sint = bas_utils.ignore_exception(ValueError)(int) with open(conf_pf) as f: content = f.readlines() for next_line in content: if next_line.startswith('#') or next_line == '\n' or len( next_line) == 0: continue toks = next_line.split('=') val = toks[1].replace('\n', '') print toks[0] + '=' + str(val) if sint(val) is not None: p[toks[0]] = sint(val) continue p[toks[0]] = val word_dir = os.environ['WORK_DIR'] p['dp_home'] = os.path.join(word_dir, p['dp_name']) args = MyArgs(p) print '=========================================================' return args
def __iter__(self): print 'called - ------------------------------' sint = bas_utils.ignore_exception(ValueError)(int) for fname in os.listdir(self.dirname): if not os.path.isfile(os.path.join(self.dirname, fname)): continue ct = 0 print ('Working on file - %s now ...' % fname) for next_line in open(os.path.join(self.dirname, fname)): nl = '' for tok in next_line.split(): if tok.endswith('\'s'): ntok = tok[:-2] + ' apostrophe ' elif 2020 > sint(tok) > 1000: ntok = tok elif fname.startswith('ere'): ntok = re.sub(r'[^\x00-\x7F]+', ' ', re.sub(r"[!\"#$%&()*+,-./:;<=>?@\[\]'^_`{|}~\\\t\n]", " ", tok.lower())) else: ntok = re.sub(r'[^\x00-\x7F]+', ' ', re.sub(r"[!\"#$%&()*+,-./:;<=>?@\[\]0-9'^_`{|}~\\\t\n]", " ", tok.lower())) nl = nl + ntok + ' ' yield nl.split()