示例#1
0
def fix_dim(maxsize,
            mindim,
            word2ballDic=dict(),
            bPath='/Users/tdong/data/glove/glove.6B/glove.6B.50Xball',
            outputPath=""):
    """
    :param maxsize:
    :param mindim:
    :param word2ballDic:
    :param bPath:
    :return:
    """
    for bf in os.listdir(bPath):
        with open(os.path.join(bPath, bf), 'r') as ifh:
            wlst = ifh.readline().strip().split()
            ballv = [decimal.Decimal(ele) for ele in wlst]
            delta = maxsize - len(ballv)
            if delta > 0:
                assert len(wlst) < maxsize
                print(bf, len(wlst), ballv[-1])
                vec = vec_norm(ballv[:-2] +
                               [decimal.Decimal(mindim)] * delta) + ballv[-2:]
                word2ballDic[bf] = vec
                if outputPath:
                    create_ball_file(bf,
                                     outputPath=bPath,
                                     word2ballDic=word2ballDic)
    return word2ballDic
示例#2
0
def initialize_dictionaries(word2vecFile=None,
                            catDicFile=None,
                            wsChildrenFile=None):
    """
    input is pre-trained word2vec

    :param word2vecFile:
    :param catDicFile:
    :param wsChildrenFile:
    :return:
    """
    wscatCodeDic = dict()
    wsChildrenDic = dict()
    word2vecDic = dict()
    if not os.path.isfile(word2vecFile):
        print('file does not exist:', word2vecFile)
        return

    with open(word2vecFile, mode="r", encoding="utf-8") as w2v:
        for line in w2v.readlines():
            wlst = line.strip().split()
            word2vecDic[wlst[0]] = vec_norm([float(ele) for ele in wlst[1:]])

    if os.path.isfile(catDicFile):
        with open(catDicFile, 'r') as cfh:
            for ln in cfh.readlines():
                wlst = ln[:-1].split()
                wscatCodeDic[wlst[0]] = [int(ele) for ele in wlst[1:]]

    if os.path.isfile(wsChildrenFile):
        with open(wsChildrenFile, 'r') as chfh:
            for ln in chfh:
                wlst = ln[:-1].split()
                wsChildrenDic[wlst[0]] = wlst[1:]
    return wsChildrenDic, word2vecDic, wscatCodeDic
示例#3
0
def initialize_ball(root,
                    addDim=[],
                    L0=0.1,
                    R0=0.1,
                    word2vecDic=dict(),
                    wscatCodeDic=dict(),
                    word2ballDic=dict(),
                    outputPath=None):
    """
    :param root:
    :param addDim:
    :param L0:
    :param R0:
    :param word2vecDic:
    :param wscatCodeDic:
    :param word2ballDic:
    :param outputPath:
    :return:
    """
    w2v = [
        decimal.Decimal(ele * 100)
        for ele in get_word2vector(root, word2vecDic=word2vecDic)
    ]
    cpoint = w2v + [ele + 10 for ele in wscatCodeDic[root]] + addDim
    word2ballDic[root] = vec_norm(cpoint) + [L0, R0]
    if outputPath:
        create_ball_file(root,
                         outputPath=outputPath,
                         word2ballDic=word2ballDic)
    return word2ballDic[root], word2ballDic
示例#4
0
def initialize_ball(root,
                    addDim=[],
                    L0=0.1,
                    R0=0.1,
                    word2vecDic=dict(),
                    wscatCodeDic=dict(),
                    word2ballDic=dict(),
                    outputPath=None):
    """
    :param root:
    :param addDim:
    :param L0:
    :param R0:
    :param word2vecDic:
    :param wscatCodeDic:
    :param word2ballDic:
    :param outputPath:
    :return:
    """
    w2v = [
        decimal.Decimal(ele * 100)
        for ele in get_word2vector(root, word2vecDic=word2vecDic)
    ]
    # add catcode -> path from root to here e.g. 1 1 2 :> first tree, first child, second child
    cpoint = w2v + [ele + 10 for ele in wscatCodeDic[root]] + addDim
    word2ballDic[root] = vec_norm(cpoint) + [L0, R0]
    print(f"Initialize Ball for root: {root}")
    ball_generation_log.append(
        Log(key=root,
            operation=Operation.INITIALIZE,
            operation_args=[],
            vector=word2ballDic[root]))
    if outputPath:
        create_ball_file(root,
                         outputPath=outputPath,
                         word2ballDic=word2ballDic)
    return word2ballDic[root], word2ballDic
def shift_whole_tree_of(tree,
                        deltaVec,
                        deltaL,
                        wsChildrenDic=dict(),
                        word2ballDic=dict(),
                        outputPath=None):
    """
    :param tree:
    :param deltaVec:
    :param deltaL:
    :param wsChildrenDic:
    :param word2ballDic:
    :param outputPath:
    :return:


    for child of tree:
        shift_whole_tree_of(child, deltaVec, deltaL, outputPath=None)

    l1, r1 = word2ballDic[tree][-2:]
    l = np.sqrt(l1*l1 + deltaL*deltaL
                    + 2*l1*deltaL* vec_cos(deltaVec, word2ballDic[tree][:-2]))
    newVec = vec_norm(vec_point(word2ballDic[tree][:-2], l1) + vec_point(deltaVec, deltaL))
    word2ballDic[tree] = list(newVec) + [l, r1]

    for child of tree:
        while True:
            delta = qsr_DC_degree_by_name(child, tree)
            if delta < 0:
                word2ballDic[tree][-2] += - delta*1.01
            else:
                break

    create_ball_file(tree, outputPath=outputPath)
    """
    for child in get_children(tree,
                              wsChildrenDic=wsChildrenDic,
                              word2ballDic=word2ballDic):
        word2ballDic = shift_whole_tree_of(child,
                                           deltaVec,
                                           deltaL,
                                           wsChildrenDic=wsChildrenDic,
                                           word2ballDic=word2ballDic,
                                           outputPath=outputPath)

    l1, r1 = word2ballDic[tree][-2:]
    l = np.sqrt(l1 * l1 + deltaL * deltaL +
                2 * l1 * deltaL * vec_cos(deltaVec, word2ballDic[tree][:-2]))
    newVec = vec_norm(
        vec_point(word2ballDic[tree][:-2], l1) + vec_point(deltaVec, deltaL))
    word2ballDic[tree] = list(newVec) + [l, r1]

    i, j, lst = 0, 0, get_children(tree,
                                   wsChildrenDic=wsChildrenDic,
                                   word2ballDic=word2ballDic)
    for i in range(len(lst) - 1):
        j = i + 1
        while j < len(lst):
            dcDelta = qsr_DC_degree(word2ballDic[lst[i]], word2ballDic[lst[j]])
            if dcDelta < 0:
                print(lst[j], lst[i], j, i)
                word2ballDic = rotate_vector_till(lst[j],
                                                  lst[i],
                                                  word2ballDic=word2ballDic,
                                                  logFile='word2ball.log')
            j += 1

    for child in get_children(tree,
                              wsChildrenDic=wsChildrenDic,
                              word2ballDic=word2ballDic):
        gap = 1
        while True:
            delta = qsr_P_degree(word2ballDic[child], word2ballDic[tree])
            if delta < 0:
                gap *= 2
                word2ballDic[tree][-1] += -delta + gap
            else:
                break
    if outputPath:
        create_ball_file(tree,
                         outputPath=outputPath,
                         word2ballDic=word2ballDic)
    return word2ballDic