示例#1
0
def read_number_curr_result(doc_id, page_no):
    cid_odir = cfgObj.get_config('PageAnalysis', 'number_curr_result')
    fname = '%s.sh' % (page_no)
    ofname = os.path.join(opath, str(doc_id), cid_odir, fname)
    d = datastore.read_data_fname(ofname, isdb, isenc)
    cell_info_dict = d.get('data', {})
    return cell_info_dict
示例#2
0
def get_fp_curr_result(doc_id, page_no):
    cid_odir = cfgObj.get_config('PageAnalysis', 'fp_curr_result')
    fname = '%s.sh' % (page_no)
    ofname = os.path.join(opath, str(doc_id), cid_odir, fname)
    d = datastore.read_data_fname(ofname, isdb, isenc)
    data = d.get('data', {})
    return data
示例#3
0
def get_metadata_dict(docid):
    cid_odir = cfgObj.get_config('ExtractEntity', 'EntityOutput_odir')
    fname = '%s.sh' % (docid)
    ofname = os.path.join(opath, str(docid), cid_odir, fname)
    shv = datastore.read_data_fname(ofname, isdb, isenc)
    metadat_dict = shv.get('data', {})
    return metadat_dict
 def get_number_curr_result(self, doc_id, page_no):
     cid_odir = cfgObj.get_config('PageAnalysis', 'number_curr_result')
     #print "number_curr_result path:", cid_odir
     fname = '%s.sh' % (page_no)
     ofname = os.path.join(self.opath, str(doc_id), cid_odir, fname)
     d = datastore.read_data_fname(ofname, self.isdb, self.isenc)
     data = d.get('data', {})
     return data
示例#5
0
def get_basic_hrvr_grps(doc_id, page_no):
    fname = '%s_HRVR.sh' % (page_no)
    shname = os.path.join(opath, str(doc_id), "MDB", fname)
    #d = datastore.read_data_fname(shname, isdb, isenc)
    d = datastore.read_data_fname(shname, isdb, 0)
    bbox_list = d.get('hrvr_bboxs', {})
    cell_list = d.get('hrvr_cells', {})
    font_dict = d.get('font', {})
    return bbox_list, cell_list, font_dict
示例#6
0
def read_projected_rm_linll(doc_id, page_no, level):
    prm_odir = cfgObj.get_config('PageAnalysis', 'projectedrmlnill_odir')
    fpath = os.path.join(opath, str(doc_id), prm_odir, '')
    #os.system('mkdir -p %s' %fpath)
    datastore.make_dirs(fpath)
    filename = '%s_%s.sh' % (str(page_no), level)
    fname = os.path.join(fpath, filename)
    data = datastore.read_data_fname(fname, isdb, isenc, d)
    return data.get('data', {})
示例#7
0
def get_font_rm(doc_id, page_no):
    fname = '%s.sh' % (page_no)
    #shname = os.path.join(ipath, str(doc_id), ci_odir, fname)
    shname = os.path.join(opath, str(doc_id), "MDB", fname)

    #d = datastore.read_data_fname(shname, isdb, isenc)
    d = datastore.read_data_fname(shname, isdb, 0)
    cellinfodict = d.get('data', {})
    return cellinfodict
示例#8
0
def get_base_igs(doc_id, page_no):
    fname = '%s_TOK_HLPN.sh' % (page_no)
    #shname = os.path.join(ipath, str(doc_id), ci_odir, fname)
    shname = os.path.join(opath, str(doc_id), "MDD", fname)
    #print shname
    #d = datastore.read_data_fname(shname, isdb, isenc)
    d = datastore.read_data_fname(shname, isdb, 0)
    cellinfodict = d.get('data', {})
    return cellinfodict
 def get_font_dict(self, doc_id, page_no):
     cpath = cfgObj.get_config('MOD_DIRNAME', 'fontdict')
     fname = '%s.sh' % (page_no)
     #FontDictpath = os.path.join(ipath, str(doc_id), cpath, fname)
     FontDictpath = os.path.join(self.opath, str(doc_id), cpath, fname)
     if not os.path.exists(FontDictpath):
         print >> sys.stderr, 'Font dict not found! ', FontDictpath
         return {}
     shv = datastore.read_data_fname(FontDictpath, self.isdb, self.isenc)
     font_dict = shv.get('font_dict', {})
     return font_dict
示例#10
0
def get_num_grid(doc_id, page_no):
    cpath = cfgObj.get_config('MOD_DIRNAME', 'numgrid')
    fname = '%s.sh' % (page_no)
    #numpath = os.path.join(ipath, str(doc_id), cpath, fname)
    numpath = os.path.join(opath, str(doc_id), cpath, fname)
    if not os.path.exists(numpath):
        print >> sys.stderr, 'NUM GRID dict not found! ', numpath
        return {}
    shv = datastore.read_data_fname(numpath, isdb, isenc)
    num_dict = shv.get('data', {})
    return num_dict
示例#11
0
def get_hrvr_grps2(doc_id, page_no):
    fname = '%s_HRVR.sh' % (page_no)
    #shname = os.path.join(ipath, str(doc_id), ci_odir, fname)
    shname = os.path.join(opath, str(doc_id), "MDB", fname)

    #d = datastore.read_data_fname(shname, isdb, isenc)
    d = datastore.read_data_fname(shname, isdb, 0)
    bbox_list = d.get('bbox_unordered', {})
    cell_list = d.get('cell_unordered', {})
    font_dict = d.get('font', {})
    return bbox_list, cell_list, font_dict
示例#12
0
def is_cell_info_dict_exists(doc_id, page_no):
    ci_odir = cfgObj.get_config('PageAnalysis', 'cell_info_dict_odir')
    fname = '%s.sh' % (page_no)
    #shname = os.path.join(ipath, str(doc_id), ci_odir, fname)
    shname = os.path.join(opath, str(doc_id), ci_odir, fname)
    #print 'shname: ', shname
    if os.path.exists(shname):
        d = datastore.read_data_fname(shname, isdb, isenc)
        if d.get('cell_info_dict', {}):
            return 1
    return 0
示例#13
0
def get_tok_indexing(doc_id, page_no, inkey, level):
    fname = '%s_%s_%s.sh' % (page_no, inkey, level)
    #shname = os.path.join(ipath, str(doc_id), ci_odir, fname)
    shname = os.path.join(opath, str(doc_id), "MDD", fname)
    print shname
    #print isdb, isenc
    #d = datastore.read_data_fname(shname, isdb, isenc)
    d = datastore.read_data_fname(shname, isdb, 0)
    if type(d) == type({}):
        cellinfodict = d.get('data', {})
        return cellinfodict
    return {}
示例#14
0
def get_visual_group_dict(doc_id, page_no):
    ci_odir = cfgObj.get_config('MOD_DIRNAME', 'visdict')
    fname = '%s.sh' % (page_no)
    #shname = os.path.join(ipath, str(doc_id), ci_odir, fname)
    shname = os.path.join(opath, str(doc_id), ci_odir, fname)
    if not os.path.exists(shname):
        print >> sys.stderr, 'Visual Group dict not found! '
        return {}

    shv = datastore.read_data_fname(shname, isdb, isenc)
    cell_info_dict = shv.get('vis_dict', {})
    return cell_info_dict
示例#15
0
def get_relation_dict(doc_id, page_no):
    rel_odir = cfgObj.get_config('PageAnalysis', 'RelationResults_odir')
    fname = '%s.sh' % (str(page_no))
    #sh_path = os.path.join(ipath, str(doc_id), rel_odir, fname)
    sh_path = os.path.join(opath, str(doc_id), rel_odir, fname)
    if not os.path.exists(sh_path):
        print >> sys.stderr, 'relation dict not found! '
        return {}

    dd = datastore.read_data_fname(sh_path, isdb, isenc)
    rd = dd.get('data', {})
    return rd
示例#16
0
def get_num_behave_shelve(doc_id, page_no):
    drm_odir = cfgObj.get_config('PageAnalysis', 'num_behave_odir')
    fname = '%s.sh' % (str(page_no))
    #rm_path = os.path.join(ipath, str(doc_id), drm_odir, fname)
    rm_path = os.path.join(opath, str(doc_id), drm_odir, fname)
    if not os.path.exists(rm_path):
        print >> sys.stderr, 'Number Behavior Shelve not found! '
        return []

    dd = datastore.read_data_fname(rm_path, isdb, isenc)
    data_dict = dd.get('data', {})
    return data_dict
示例#17
0
def get_visual_group_proj_dict(doc_id, page_no):
    ci_odir = cfgObj.get_config('MOD_DIRNAME', 'visprojdict')
    fname = '%s.sh' % (page_no)
    shname = os.path.join(ipath, str(doc_id), ci_odir, fname)
    #print shname
    if not os.path.exists(shname):
        #print >> sys.stderr, 'Visual Group projected dict not found! '
        return {}

    shv = datastore.read_data_fname(shname, isdb, isenc)
    res_dict = shv.get('vis_proj_dict', {})
    return res_dict
示例#18
0
def get_nonG_shelve(doc_id):
    drm_odir = cfgObj.get_config('applicator', 'TAS_Topic_Mapped_NonG')
    fname = '%s.sh' % (str(doc_id))
    #rm_path = os.path.join(ipath, str(doc_id), drm_odir, fname)
    rm_path = os.path.join(opath, str(doc_id), drm_odir, fname)
    if not os.path.exists(rm_path):
        print >> sys.stderr, 'Number Behavior Shelve not found! '
        return []

    dd = datastore.read_data_fname(rm_path, isdb, isenc)
    data_dict = dd.get('nong_data', {})
    return data_dict
示例#19
0
def get_semantic_ph(doc_id):
    ci_odir = cfgObj.get_config('SemanticModule', 'oldPH_odir')
    fname = '%s.sh' % (doc_id)
    shname = os.path.join(opath, str(doc_id), ci_odir, fname)
    #print shname
    if not os.path.exists(shname):
        print >> sys.stderr, 'Visual Group projected dict not found! '
        return {}

    shv = datastore.read_data_fname(shname, isdb, isenc)
    res_dict = shv.get('data', {})
    return res_dict
示例#20
0
def get_proj_rm(doc_id, page_no, level):
    drm_odir = cfgObj.get_config('PageAnalysis', 'projectedrm_odir')
    fname = '%s_%s.sh' % (str(page_no), level)
    #rm_path = os.path.join(ipath, str(doc_id), drm_odir, fname)
    rm_path = os.path.join(opath, str(doc_id), drm_odir, fname)
    print 'ppp', rm_path
    if not os.path.exists(rm_path):
        print >> sys.stderr, 'Projected RM not found! '
        return []

    dd = datastore.read_data_fname(rm_path, isdb, 0)
    data_dict = dd.get('data', {})
    return data_dict
    def get_cell_info_dict_1(self, doc_id, page_no):
        ci_odir = cfgObj.get_config('PageAnalysis', 'cell_info_dict_odir')
        fname = '%s_HRA.sh' % (page_no)
        #shname = os.path.join(ipath, str(doc_id), ci_odir, fname)
        shname = os.path.join(self.opath, str(doc_id), ci_odir, fname)
        print 'sssss : ', shname
        if not os.path.exists(shname):
            #            print >> sys.stderr, 'Cell INFO dict not found! '
            return {}

        shv = datastore.read_data_fname(shname, self.isdb, self.isenc)
        cell_info_dict = shv.get('cell_info_dict', {})
        return cell_info_dict
示例#22
0
def get_synthe_dict(doc_id, page_no, level):

    slt_odir = cfgObj.get_config('PageAnalysis', 'coverpagesynthesizer_odir')
    fname = '%s_%s.sh' % (page_no, level)
    #CellDictpath = os.path.join(ipath, str(doc_id), slt_odir, fname)
    CellDictpath = os.path.join(opath, str(doc_id), slt_odir, fname)
    #print CellDictpath
    if not os.path.exists(CellDictpath):
        print >> sys.stderr, 'SLT not found! '
        return {}

    dd = datastore.read_data_fname(CellDictpath, isdb, 0)
    data_dict = dd.get('data', {})
    return data_dict
示例#23
0
def get_cell_info_dict(doc_id, page_no):
    ci_odir = "CID"  #cfgObj.get_config('PageAnalysis', 'cell_info_dict_odir')
    fname = '%s.sh' % (page_no)
    #print fname
    #shname = os.path.join(ipath, str(doc_id), ci_odir, fname)
    shname = os.path.join(opath, str(doc_id), ci_odir, fname)
    if not os.path.exists(shname):
        print >> sys.stderr, 'Cell INFO dict not found! '
        return {}

    print shname, isdb, isenc
    shv = datastore.read_data_fname(shname, isdb, isenc)
    cell_info_dict = shv.get('cell_info_dict', {})
    return cell_info_dict
示例#24
0
def get_cell_info_dict_level(doc_id, page_no, level):
    ci_odir = cfgObj.get_config('PageAnalysis', 'cell_info_dict_odir')
    fname = '%s_%s.sh' % (page_no, level)
    #shname = os.path.join(ipath, str(doc_id), ci_odir, fname)
    shname = os.path.join(opath, str(doc_id), ci_odir, fname)
    if not os.path.exists(shname):
        print >> sys.stderr, 'Cell INFO dict not found! ', shname
        return {}

    #shv = datastore.read_data_fname(shname, isdb, isenc)
    shv = datastore.read_data_fname(shname, isdb, 0)
    #print >> sys.stderr, "JJJJ", shname
    cell_info_dict = shv.get('cell_info_dict', {})
    return cell_info_dict
示例#25
0
def get_fc_grps(doc_id, page_no):
    fname = '%s_HRVR.sh' % (page_no)
    #shname = os.path.join(ipath, str(doc_id), ci_odir, fname)
    shname = os.path.join(opath, str(doc_id), "MDB", fname)

    #d = datastore.read_data_fname(shname, isdb, isenc)
    d = datastore.read_data_fname(shname, isdb, 0)

    fc_cells = d.get('fc_cells', [])
    fc_signature_list = d.get('fc_sig_dict_list', [])
    sfc_cells = d.get('sfc_cells', [])
    sfc_signature_list = d.get('sfc_sig_dict_list', [])

    return fc_cells, fc_signature_list, sfc_cells, sfc_signature_list
示例#26
0
def get_slt_dict(doc_id, page_no, level):

    slt_odir = cfgObj.get_config('PageAnalysis', 'slt_data_odir')
    fname = '%s_%s.sh' % (page_no, level)
    #CellDictpath = os.path.join(ipath, str(doc_id), slt_odir, fname)
    CellDictpath = os.path.join(opath, str(doc_id), slt_odir, fname)
    #print CellDictpath
    if not os.path.exists(CellDictpath):
        print >> sys.stderr, 'SLT not found! '
        return {}

    #return return_data(CellDictpath)
    dd = datastore.read_data_fname(CellDictpath, isdb, isenc)
    return dd
示例#27
0
def get_cell_dict(doc_id, page_no):
    cpath = cfgObj.get_config('MOD_DIRNAME', 'celldict')
    fname = '%s.sh' % (page_no)
    CellDictpath = os.path.join(ipath, str(doc_id), cpath, fname)
    #print 'CELL DICT PATH : ', CellDictpath
    #sys.exit()
    if not os.path.exists(CellDictpath):
        print >> sys.stderr, 'Cell dict not found! ', CellDictpath
        return {}
    #sys.exit()
    #print CellDictpath
    shv = datastore.read_data_fname(CellDictpath, isdb, isenc)
    cell_dict = shv.get('cell_dict', {})
    #print cell_dict
    return cell_dict
示例#28
0
def get_media_box(doc_id, pno):
    #rm_path = os.path.join(opath, str(doc_id), "db", str(pno), 'pdfdata.db')
    rm_path = os.path.join(ipath, str(doc_id), "db", str(pno), 'pdfdata.db')
    #print rm_path
    if not os.path.exists(rm_path):
        print >> sys.stderr, ' pdfdata not found'
        return []

    dd = datastore.read_data_fname(rm_path, isdb, isenc, {}, 'pdfdata')
    data_dict = dd.get('page_master', [])
    if data_dict:
        bbox_dict = data_dict[0].get('bbox', {})
        if bbox_dict:
            return "%s_%s_%s_%s" % (bbox_dict['x0'], bbox_dict['y0'],
                                    bbox_dict['w'], bbox_dict['h'])
    return ""
示例#29
0
def return_data(data_path):
    dd = datastore.read_data_fname(data_path, isdb, isenc)
    data_dict = dd.get('data', {})
    return data_dict