Пример #1
0
w_t = WordTokenizer(stem=False)

logger = Logger(__file__.split('/')[-1]).logger

path = constants.get_path()
result_outpath = 'tmp/tmpres/'

_ANNS_DIR = path['ann']
_ANNS_PATH = path['ann_json']
CACHE = path['cache']

valid_topics = ['all']
# doc_mod = DocumentsModel(_ANNS_DIR)

CACHE_FILE = constants.join_path(
    CACHE, 'umls.json')
if os.path.isfile(CACHE_FILE):
    try:
        with codecs.open(CACHE_FILE, 'rb', 'utf-8') as mf:
            cachefile = json.load(mf)
    except:
        cachefile = {}
else:
    cachefile = {}


def evaluate(opts, args):
    def tofloat(s): return '%.3f' % s
    # round float representation

    def toCI(s): return s, s + '_cb', s + '_ce'
Пример #2
0
import codecs
import os
import sys
from copy import deepcopy
from libs.evaluate import merge_offsets
from libs.supervised.prep.prepare import Prep
from constants import get_path, join_path
from libs.supervised.classifiers.svm_rank import Supervised
from util.common import hash_obj
from util.cache import simple_caching, object_hashing
from importlib import import_module
import constants

path = get_path()
STOPWORDS_PATH = path['data'] + '/stopwords.txt'
CLF_PATH = join_path(path['root'], 'libs/supervised/classifiers')
docs_path = join_path(path['data'], 'TAC_2014_BiomedSumm_Training_Data')
json_data_path = join_path(path['data'], 'v1-2a.json')

# root_proj_path = os.getcwd()
# while not('.git' in os.listdir(root_proj_path)):
#     root_proj_path = os.path.split(root_proj_path)[0]
# if not(root_proj_path in sys.path):
#     sys.path.append(root_proj_path)


class Reranker(RerankInterface):

    reranker_opts = {
        'cutoff': {
            'type': int,
Пример #3
0
import codecs
import os
import sys
from libs.evaluate import merge_offsets
from libs.supervised.prep.prepare import Prep
from constants import get_path, join_path
from libs.supervised.classifiers.svm_rank import Supervised
from util.common import hash_obj
from util.cache import simple_caching, object_hashing
from importlib import import_module
import constants
import operator

path = get_path()
STOPWORDS_PATH = path['data'] + '/stopwords.txt'
CLF_PATH = join_path(
    path['root'], 'libs/supervised/classifiers')
docs_path = join_path(path['data'], 'TAC_2014_BiomedSumm_Training_Data')
json_data_path = join_path(path['data'], 'v1-2a.json')

# root_proj_path = os.getcwd()
# while not('.git' in os.listdir(root_proj_path)):
#     root_proj_path = os.path.split(root_proj_path)[0]
# if not(root_proj_path in sys.path):
#     sys.path.append(root_proj_path)


class Reranker(RerankInterface):

    reranker_opts = {'cutoff': {'type': int, 'default': 3},
                     'lookup': {'type': int, 'default': 15},
                     'relaxation': {'type': int, 'default': 0},