def __init__(self, taxonomy_resources, voc_fpath="", relations_fpath="", lang="en"): self._isas = taxonomy_resources.isas self._freqs = taxonomy_resources.freqs self.voc_name = fpath2filename(voc_fpath) self._voc_fpath = voc_fpath self._stopwords = load_stoplist(lang=lang) self._lang = lang if exists(voc_fpath) and not exists(relations_fpath): self.voc = self._load_voc(voc_fpath) relations_fpath = voc_fpath + "-relations.csv" print "Generating new relations file:", relations_fpath self._relations_fpath = voc_fpath + "-relations.csv" self._relations = self._generate_relations(self.voc, self._relations_fpath) elif exists(relations_fpath): print "Loading relations file:", relations_fpath self._relations_fpath = relations_fpath self._relations = read_csv(relations_fpath, encoding='utf-8', delimiter="\t", error_bad_lines=False) print "Loaded %d relations from: %s" % (len(self._relations), relations_fpath) hypos_voc = set(self._relations.hyponym.to_dict().values()) hyper_voc = set(self._relations.hypernym.to_dict().values()) self.voc = hypos_voc.union(hyper_voc) print "Loaded %d voc from relations" % len(self.voc) else: raise Exception("Error: cannot load relations or generate them. Specify either voc_fpath or relations_fpath.")
def __init__(self, freq_fpaths=[], isa_fpaths=[]): tic = time() self._freqs = {} for fpath in freq_fpaths: fname = fpath2filename(fpath) self._freqs[fname] = FreqDictionary(fpath) print "Loaded freq dictionary '%s': %s" % (fname, fpath) self._isas = {} for fpath in isa_fpaths: fname = fpath2filename(fpath) self._isas[fname] = ISAs(fpath) print "Loaded isa dictionary (%d words) '%s': %s" % (len(self._isas[fname].data), fname, fpath) print "Loaded resources in %d sec." % (time() - tic)
def __init__(self, freq_fpaths=[], isa_fpaths=[]): tic = time() self._freqs = {} for fpath in freq_fpaths: fname = fpath2filename(fpath) self._freqs[fname] = FreqDictionary(fpath) print "Loaded freq dictionary '%s': %s" % (fname, fpath) self._isas = {} for fpath in isa_fpaths: fname = fpath2filename(fpath) self._isas[fname] = ISAs(fpath) print "Loaded isa dictionary (%d words) '%s': %s" % (len( self._isas[fname].data), fname, fpath) print "Loaded resources in %d sec." % (time() - tic)
def _load_voc(self, voc_fpath): if exists(voc_fpath): voc_df = read_csv(voc_fpath, encoding='utf-8', delimiter="\t", error_bad_lines=False) voc_name = fpath2filename(voc_fpath) voc = set() for i, row in voc_df.iterrows(): if "term" in row: voc.add(row.term) elif "word" in row: voc.add(row.word) print "Loaded %d words vocabulary" % len(voc) return voc else: print "Warning: vocabulary is not loaded. This means hypo2hyper features cannot be extracted." return set()
def __init__(self, taxonomy_resources, voc_fpath="", relations_fpath="", lang="en"): self._isas = taxonomy_resources.isas self._freqs = taxonomy_resources.freqs self.voc_name = fpath2filename(voc_fpath) self._voc_fpath = voc_fpath self._stopwords = load_stoplist(lang=lang) self._lang = lang if exists(voc_fpath) and not exists(relations_fpath): self.voc = self._load_voc(voc_fpath) relations_fpath = voc_fpath + "-relations.csv" print "Generating new relations file:", relations_fpath self._relations_fpath = voc_fpath + "-relations.csv" self._relations = self._generate_relations(self.voc, self._relations_fpath) elif exists(relations_fpath): print "Loading relations file:", relations_fpath self._relations_fpath = relations_fpath self._relations = read_csv(relations_fpath, encoding='utf-8', delimiter="\t", error_bad_lines=False) print "Loaded %d relations from: %s" % (len( self._relations), relations_fpath) hypos_voc = set(self._relations.hyponym.to_dict().values()) hyper_voc = set(self._relations.hypernym.to_dict().values()) self.voc = hypos_voc.union(hyper_voc) print "Loaded %d voc from relations" % len(self.voc) else: raise Exception( "Error: cannot load relations or generate them. Specify either voc_fpath or relations_fpath." )