def cm_ebmcat(self): path = os.path.join(config.getpath('corpora','alta2012-ebm'),'GS', 'gs1.txt') cm = {} with open(path) as f: reader = csv.reader(f, delimiter='\t') for row in reader: key = '{0}-{1}'.format(row[0], row[1]) value = row[2].split(',') cm[key] = value return cm
def _parse_data(self): if self._data is None: ts = {} with open(os.path.join(config.getpath('corpora','alta2012-ebm'),'test.csv')) as f: reader = csv.DictReader(f) for row in reader: docid = row['Document'] + '-' + row['Sentence'] ts[docid] = row['Text'] self._data = {} self._data['ts'] = dict(ts)
def _parse_data(self): if self._data is None: cm = defaultdict(list) ts = {} with open(os.path.join(config.getpath('corpora','alta2012-ebm'),'train.csv')) as f: reader = csv.DictReader(f) for row in reader: docid = row['Document'] + '-' + row['Sentence'] if row['Prediction'] == '1': cm[docid].append(row['Label']) ts[docid] = row['Text'] self._data = {} self._data['cm'] = dict(cm) self._data['ts'] = dict(ts)
def sp_crossvalidation(self): sq_index = dict( (s[0].split('-')[0],s) for s in self.sequence('abstract')) sp = {} with open(os.path.join(config.getpath('corpora','alta2012-ebm'),'data.testset')) as f: for i,row in enumerate(f): key = 'fold{0}'.format(i) value = [] for abs_id in row.split('\t',1)[1].split(':'): if abs_id.strip(): try: value.extend(sq_index[abs_id]) except KeyError: # No sequence, single-sentence abstract value.append('{0}-1'.format(abs_id)) sp[key] = value return sp
def __unpack(self): if self.model is None: with Timer() as t: if self.path is None: logger.info("unpacking default model") model = PIBOSOModel(*load_default_model()) else: logger.info("unpacking model from: {}".format(self.path)) model = PIBOSOModel(*load_model(self.path)) logger.info("unpacking took {0:.2f}s".format(t.elapsed)) # hydrat hardcodes the paths for the classifier, which need to be updated # if they are installed at a different location classifier = config.getpath("tools", "liblinearclassifier") # TODO: Check that the tool exists if model.L1_cl.classifier != classifier: logger.debug("updating classifier path from {0} to {1}".format(model.L1_cl.classifier, classifier)) model.L1_cl.classifier = classifier for c in model.L0_cl: c.classifier = classifier self.model = model else: logger.debug("already unpacked!")