def test_get_dictionaries(self): df = pd.DataFrame([['a', 'R1', 'b'], ['c', 'R2', 'd']], columns=['from', 'rel', 'to']) assert get_dictionaries(df, ent=True) == self.d1 assert get_dictionaries(df, ent=False) == self.d2
def __init__( self, df=None, kg=None, ent2ix=None, rel2ix=None, dict_of_heads=None, dict_of_tails=None, ): if df is None: if kg is None: raise WrongArgumentsError( "Please provide at least one " "argument of `df` and kg`" ) else: try: assert ( (type(kg) == dict) & ("heads" in kg.keys()) & ("tails" in kg.keys()) & ("relations" in kg.keys()) ) except AssertionError: raise WrongArgumentsError( "Keys in the `kg` dict should " "contain `heads`, `tails`, " "`relations`." ) try: assert (rel2ix is not None) & (ent2ix is not None) except AssertionError: raise WrongArgumentsError( "Please provide the two " "dictionaries ent2ix and rel2ix " "if building from `kg`." ) else: if kg is not None: raise WrongArgumentsError( "`df` and kg` arguments should not " "both be provided." ) if ent2ix is None: self.ent2ix = get_dictionaries(df, ent=True) else: self.ent2ix = ent2ix if rel2ix is None: self.rel2ix = get_dictionaries(df, ent=False) else: self.rel2ix = rel2ix self.n_ent = max(self.ent2ix.values()) + 1 self.n_rel = max(self.rel2ix.values()) + 1 if df is not None: # build kg from a pandas dataframe self.n_facts = len(df) self.head_idx = tensor(df["from"].map(self.ent2ix).values).long() self.tail_idx = tensor(df["to"].map(self.ent2ix).values).long() self.relations = tensor(df["rel"].map(self.rel2ix).values).long() self.magnitudes = tensor(df["how-much"], dtype=float64) else: # build kg from another kg self.n_facts = kg["heads"].shape[0] self.head_idx = kg["heads"] self.tail_idx = kg["tails"] self.relations = kg["relations"] self.magnitudes = kg["magnitudes"] if dict_of_heads is None or dict_of_tails is None: self.dict_of_heads = defaultdict(set) self.dict_of_tails = defaultdict(set) self.evaluate_dicts() else: self.dict_of_heads = dict_of_heads self.dict_of_tails = dict_of_tails try: self.sanity_check() except AssertionError: raise SanityError("Please check the sanity of arguments.")
def __init__(self, df=None, kg=None, ent2ix=None, rel2ix=None, dict_of_heads=None, dict_of_tails=None, dict_of_rel=None, id2point=None, geo=None): if df is None: if kg is None: raise WrongArgumentsError("Please provide at least one " "argument of `df` and kg`") else: try: assert (type(kg) == dict) & ('heads' in kg.keys()) & \ ('tails' in kg.keys()) & \ ('relations' in kg.keys()) except AssertionError: raise WrongArgumentsError("Keys in the `kg` dict should " "contain `heads`, `tails`, " "`relations`.") try: assert (rel2ix is not None) & (ent2ix is not None) except AssertionError: raise WrongArgumentsError("Please provide the two " "dictionaries ent2ix and rel2ix " "if building from `kg`.") else: if kg is not None: raise WrongArgumentsError("`df` and kg` arguments should not " "both be provided.") if ent2ix is None: self.ent2ix = get_dictionaries(df, ent=True) else: self.ent2ix = ent2ix if rel2ix is None: self.rel2ix = get_dictionaries(df, ent=False) else: self.rel2ix = rel2ix if id2point is not None: self.id2point = id2point self.n_ent = max(self.ent2ix.values()) + 1 self.n_rel = max(self.rel2ix.values()) + 1 self.geo = geo if df is not None: # build kg from a pandas dataframe self.n_facts = len(df) self.head_idx = tensor(df['from'].map(self.ent2ix).values).long() self.tail_idx = tensor(df['to'].map(self.ent2ix).values).long() self.relations = tensor(df['rel'].map(self.rel2ix).values).long() else: # build kg from another kg self.n_facts = kg['heads'].shape[0] self.head_idx = kg['heads'] self.tail_idx = kg['tails'] self.relations = kg['relations'] try: self.point = kg['point'] except: pass if (geo is not None) and (df is not None): # Geo self.entity2point, self.id2point = self.load_point(geo) self.point = np.array([[ self.entity2point[triplet[0]], self.entity2point[triplet[2]] ] for triplet in df.values]) if dict_of_heads is None or dict_of_tails is None or dict_of_rel is None: self.dict_of_heads = defaultdict(set) self.dict_of_tails = defaultdict(set) self.dict_of_rel = defaultdict(set) self.evaluate_dicts() else: self.dict_of_heads = dict_of_heads self.dict_of_tails = dict_of_tails self.dict_of_rel = dict_of_rel try: self.sanity_check() except AssertionError: raise SanityError("Please check the sanity of arguments.")
def __init__(self, df=None, kg=None, ent2ix=None, rel2ix=None, dict_of_heads=None, dict_of_tails=None): if df is None: if kg is None: raise WrongArgumentsError("Please provide at least one " "argument of `df` and kg`") else: try: assert (type(kg) == dict) & ('heads' in kg.keys()) & \ ('tails' in kg.keys()) & \ ('relations' in kg.keys()) except AssertionError: raise WrongArgumentsError("Keys in the `kg` dict should " "contain `heads`, `tails`, " "`relations`.") try: assert (rel2ix is not None) & (ent2ix is not None) except AssertionError: raise WrongArgumentsError("Please provide the two " "dictionaries ent2ix and rel2ix " "if building from `kg`.") else: if kg is not None: raise WrongArgumentsError("`df` and kg` arguments should not " "both be provided.") if ent2ix is None: self.ent2ix = get_dictionaries(df, ent=True) else: self.ent2ix = ent2ix self.ix2ent = {v: k for k, v in self.ent2ix.items()} if rel2ix is None: self.rel2ix = get_dictionaries(df, ent=False) else: self.rel2ix = rel2ix self.ix2rel = {v: k for k, v in self.rel2ix.items()} self.n_ent = max(self.ent2ix.values()) + 1 self.n_rel = max(self.rel2ix.values()) + 1 if df is not None: # build kg from a pandas dataframe self.n_facts = len(df) self.head_idx = tensor(df['from'].map(self.ent2ix).values).long() self.tail_idx = tensor(df['to'].map(self.ent2ix).values).long() self.relations = tensor(df['rel'].map(self.rel2ix).values).long() else: # build kg from another kg self.n_facts = kg['heads'].shape[0] self.head_idx = kg['heads'] self.tail_idx = kg['tails'] self.relations = kg['relations'] if dict_of_heads is None or dict_of_tails is None: self.dict_of_heads = defaultdict(set) self.dict_of_tails = defaultdict(set) self.evaluate_dicts() else: self.dict_of_heads = dict_of_heads self.dict_of_tails = dict_of_tails try: self.sanity_check() except AssertionError: raise SanityError("Please check the sanity of arguments.")