def fix_dim(maxsize, mindim, word2ballDic=dict(), bPath='/Users/tdong/data/glove/glove.6B/glove.6B.50Xball', outputPath=""): """ :param maxsize: :param mindim: :param word2ballDic: :param bPath: :return: """ for bf in os.listdir(bPath): with open(os.path.join(bPath, bf), 'r') as ifh: wlst = ifh.readline().strip().split() ballv = [decimal.Decimal(ele) for ele in wlst] delta = maxsize - len(ballv) if delta > 0: assert len(wlst) < maxsize print(bf, len(wlst), ballv[-1]) vec = vec_norm(ballv[:-2] + [decimal.Decimal(mindim)] * delta) + ballv[-2:] word2ballDic[bf] = vec if outputPath: create_ball_file(bf, outputPath=bPath, word2ballDic=word2ballDic) return word2ballDic
def initialize_dictionaries(word2vecFile=None, catDicFile=None, wsChildrenFile=None): """ input is pre-trained word2vec :param word2vecFile: :param catDicFile: :param wsChildrenFile: :return: """ wscatCodeDic = dict() wsChildrenDic = dict() word2vecDic = dict() if not os.path.isfile(word2vecFile): print('file does not exist:', word2vecFile) return with open(word2vecFile, mode="r", encoding="utf-8") as w2v: for line in w2v.readlines(): wlst = line.strip().split() word2vecDic[wlst[0]] = vec_norm([float(ele) for ele in wlst[1:]]) if os.path.isfile(catDicFile): with open(catDicFile, 'r') as cfh: for ln in cfh.readlines(): wlst = ln[:-1].split() wscatCodeDic[wlst[0]] = [int(ele) for ele in wlst[1:]] if os.path.isfile(wsChildrenFile): with open(wsChildrenFile, 'r') as chfh: for ln in chfh: wlst = ln[:-1].split() wsChildrenDic[wlst[0]] = wlst[1:] return wsChildrenDic, word2vecDic, wscatCodeDic
def initialize_ball(root, addDim=[], L0=0.1, R0=0.1, word2vecDic=dict(), wscatCodeDic=dict(), word2ballDic=dict(), outputPath=None): """ :param root: :param addDim: :param L0: :param R0: :param word2vecDic: :param wscatCodeDic: :param word2ballDic: :param outputPath: :return: """ w2v = [ decimal.Decimal(ele * 100) for ele in get_word2vector(root, word2vecDic=word2vecDic) ] cpoint = w2v + [ele + 10 for ele in wscatCodeDic[root]] + addDim word2ballDic[root] = vec_norm(cpoint) + [L0, R0] if outputPath: create_ball_file(root, outputPath=outputPath, word2ballDic=word2ballDic) return word2ballDic[root], word2ballDic
def initialize_ball(root, addDim=[], L0=0.1, R0=0.1, word2vecDic=dict(), wscatCodeDic=dict(), word2ballDic=dict(), outputPath=None): """ :param root: :param addDim: :param L0: :param R0: :param word2vecDic: :param wscatCodeDic: :param word2ballDic: :param outputPath: :return: """ w2v = [ decimal.Decimal(ele * 100) for ele in get_word2vector(root, word2vecDic=word2vecDic) ] # add catcode -> path from root to here e.g. 1 1 2 :> first tree, first child, second child cpoint = w2v + [ele + 10 for ele in wscatCodeDic[root]] + addDim word2ballDic[root] = vec_norm(cpoint) + [L0, R0] print(f"Initialize Ball for root: {root}") ball_generation_log.append( Log(key=root, operation=Operation.INITIALIZE, operation_args=[], vector=word2ballDic[root])) if outputPath: create_ball_file(root, outputPath=outputPath, word2ballDic=word2ballDic) return word2ballDic[root], word2ballDic
def shift_whole_tree_of(tree, deltaVec, deltaL, wsChildrenDic=dict(), word2ballDic=dict(), outputPath=None): """ :param tree: :param deltaVec: :param deltaL: :param wsChildrenDic: :param word2ballDic: :param outputPath: :return: for child of tree: shift_whole_tree_of(child, deltaVec, deltaL, outputPath=None) l1, r1 = word2ballDic[tree][-2:] l = np.sqrt(l1*l1 + deltaL*deltaL + 2*l1*deltaL* vec_cos(deltaVec, word2ballDic[tree][:-2])) newVec = vec_norm(vec_point(word2ballDic[tree][:-2], l1) + vec_point(deltaVec, deltaL)) word2ballDic[tree] = list(newVec) + [l, r1] for child of tree: while True: delta = qsr_DC_degree_by_name(child, tree) if delta < 0: word2ballDic[tree][-2] += - delta*1.01 else: break create_ball_file(tree, outputPath=outputPath) """ for child in get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic): word2ballDic = shift_whole_tree_of(child, deltaVec, deltaL, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic, outputPath=outputPath) l1, r1 = word2ballDic[tree][-2:] l = np.sqrt(l1 * l1 + deltaL * deltaL + 2 * l1 * deltaL * vec_cos(deltaVec, word2ballDic[tree][:-2])) newVec = vec_norm( vec_point(word2ballDic[tree][:-2], l1) + vec_point(deltaVec, deltaL)) word2ballDic[tree] = list(newVec) + [l, r1] i, j, lst = 0, 0, get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic) for i in range(len(lst) - 1): j = i + 1 while j < len(lst): dcDelta = qsr_DC_degree(word2ballDic[lst[i]], word2ballDic[lst[j]]) if dcDelta < 0: print(lst[j], lst[i], j, i) word2ballDic = rotate_vector_till(lst[j], lst[i], word2ballDic=word2ballDic, logFile='word2ball.log') j += 1 for child in get_children(tree, wsChildrenDic=wsChildrenDic, word2ballDic=word2ballDic): gap = 1 while True: delta = qsr_P_degree(word2ballDic[child], word2ballDic[tree]) if delta < 0: gap *= 2 word2ballDic[tree][-1] += -delta + gap else: break if outputPath: create_ball_file(tree, outputPath=outputPath, word2ballDic=word2ballDic) return word2ballDic