Пример #1
0
def parse_params():
    my_args = MyArgsWebQsp()
    p = dict()
    conf_pf = os.path.join(my_args.job_folder, 'kgt.conf')
    sint = bas_utils.ignore_exception(ValueError)(int)
    sfloat = bas_utils.ignore_exception(ValueError)(float)
    with open(conf_pf) as f:
        content = f.readlines()
    for next_line in content:
        if next_line.startswith('#') or next_line == '\n' or len(
                next_line) == 0:
            continue
        toks = next_line.split('=')
        val = toks[1].replace('\n', '')
        print toks[0] + '=' + str(val)
        if sint(val) is not None:
            p[toks[0]] = sint(val)
            continue
        if sfloat(val) is not None:
            p[toks[0]] = sfloat(val)
            continue
        p[toks[0]] = val
    lod_home = os.environ['LOD_HOME']
    p['dp_home'] = os.path.join(lod_home, p['dp_name'])
    p['job_folder'] = my_args.job_folder
    p['job_number'] = os.path.basename(my_args.job_folder)
    my_args.set_details(p)
    print '========================================================='
    return my_args
Пример #2
0
def parse_params(conf_pf):
    global p
    sint = bas_utils.ignore_exception(ValueError)(int)
    sfloat = bas_utils.ignore_exception(ValueError)(float)
    with open(conf_pf) as f:
        content = f.readlines()
    for next_line in content:
        if next_line.startswith('#') or next_line == '\n' or len(next_line) == 0:
            continue
        toks = next_line.split('=')
        val = toks[1].replace('\n', '')
        print toks[0] + '=' + str(val)
        if sint(val) is not None:
            p[toks[0]] = sint(val)
            continue
        if sfloat(val) is not None:
            p[toks[0]] = sfloat(val)
            continue
        p[toks[0]] = val
    lod_home = os.environ['WORK_DIR']
    p['dp_home'] = os.path.join(lod_home, p['dp_name'])
    my_args = MyArgs(p)
    jf, jn = set_job_folder(os.path.join(p['dp_home'], 'model/kgrep'))
    p['job_folder'] = jf
    p['job_number'] = jn
    p['conf_file'] = conf_pf
    my_args.job_folder = jf
    copyfile(conf_pf, os.path.join(p['job_folder'], os.path.basename(conf_pf)))
    print '========================================================='
    return my_args
Пример #3
0
def is_rel_id(word):
    sint = bas_utils.ignore_exception(ValueError)(int)
    is_rel = False
    if str(word).startswith('r'):
        id = sint(word[1:])
        if id is not None:
            is_rel = True
    return is_rel
Пример #4
0
def get_rel_embd_mat(my_args, embedding_mat, word_idx):
    rel_embd_mat = dict()
    sint = bas_utils.ignore_exception(ValueError)(int)
    for w in word_idx.keys():
        if not str(w).startswith('r') or sint(w[1:]) is None:
            continue
        ind = word_idx[w]
        e1 = embedding_mat[ind]
        e = e1
        rel_embd_mat[w] = e
    return rel_embd_mat
Пример #5
0
def check_replace_src_ent(my_args, w='exxx'):
    if my_args.p['use_ere'] == 1:
        return False
    if not w.startswith('e'):
        return False
    ent_num = w[1:]
    sint = bas_utils.ignore_exception(ValueError)(int)
    en = sint(ent_num)
    if en is None:
        return False
    else:
        return True
Пример #6
0
def get_all_relids(wi):
    sint = bas_utils.ignore_exception(ValueError)(int)
    relid_set = set()
    for w in wi.keys():
        if not str(w).startswith('r'):
            continue
        id = w[1:]
        num = sint(id)
        if num is None:
            continue
        relid_set.add(w)
    return relid_set
Пример #7
0
def parse_params(conf_pf):
    p = dict()
    sint = bas_utils.ignore_exception(ValueError)(int)
    with open(conf_pf) as f:
        content = f.readlines()
    for next_line in content:
        if next_line.startswith('#') or next_line == '\n' or len(
                next_line) == 0:
            continue
        toks = next_line.split('=')
        val = toks[1].replace('\n', '')
        print toks[0] + '=' + str(val)
        if sint(val) is not None:
            p[toks[0]] = sint(val)
            continue
        p[toks[0]] = val
    word_dir = os.environ['WORK_DIR']
    p['dp_home'] = os.path.join(word_dir, p['dp_name'])
    args = MyArgs(p)
    print '========================================================='
    return args
Пример #8
0
 def __iter__(self):
     print 'called - ------------------------------'
     sint = bas_utils.ignore_exception(ValueError)(int)
     for fname in os.listdir(self.dirname):
         if not os.path.isfile(os.path.join(self.dirname, fname)):
             continue
         ct = 0
         print ('Working on file - %s now ...' % fname)
         for next_line in open(os.path.join(self.dirname, fname)):
             nl = ''
             for tok in next_line.split():
                 if tok.endswith('\'s'):
                     ntok = tok[:-2] + ' apostrophe '
                 elif 2020 > sint(tok) > 1000:
                     ntok = tok
                 elif fname.startswith('ere'):
                     ntok = re.sub(r'[^\x00-\x7F]+', ' ',
                                   re.sub(r"[!\"#$%&()*+,-./:;<=>?@\[\]'^_`{|}~\\\t\n]", " ", tok.lower()))
                 else:
                     ntok = re.sub(r'[^\x00-\x7F]+', ' ',
                                   re.sub(r"[!\"#$%&()*+,-./:;<=>?@\[\]0-9'^_`{|}~\\\t\n]",
                                          " ", tok.lower()))
                 nl = nl + ntok + ' '
             yield nl.split()