Exemplo n.º 1
0
def fix_dim(maxsize,
            mindim,
            word2ballDic=dict(),
            bPath='/Users/tdong/data/glove/glove.6B/glove.6B.50Xball',
            outputPath=""):
    """
    :param maxsize:
    :param mindim:
    :param word2ballDic:
    :param bPath:
    :return:
    """
    for bf in os.listdir(bPath):
        with open(os.path.join(bPath, bf), 'r') as ifh:
            wlst = ifh.readline().strip().split()
            ballv = [decimal.Decimal(ele) for ele in wlst]
            delta = maxsize - len(ballv)
            if delta > 0:
                assert len(wlst) < maxsize
                print(bf, len(wlst), ballv[-1])
                vec = vec_norm(ballv[:-2] +
                               [decimal.Decimal(mindim)] * delta) + ballv[-2:]
                word2ballDic[bf] = vec
                if outputPath:
                    create_ball_file(bf,
                                     outputPath=bPath,
                                     word2ballDic=word2ballDic)
    return word2ballDic
Exemplo n.º 2
0
def initialize_ball(root,
                    addDim=[],
                    L0=0.1,
                    R0=0.1,
                    word2vecDic=dict(),
                    wscatCodeDic=dict(),
                    word2ballDic=dict(),
                    outputPath=None):
    """
    :param root:
    :param addDim:
    :param L0:
    :param R0:
    :param word2vecDic:
    :param wscatCodeDic:
    :param word2ballDic:
    :param outputPath:
    :return:
    """
    w2v = [
        decimal.Decimal(ele * 100)
        for ele in get_word2vector(root, word2vecDic=word2vecDic)
    ]
    cpoint = w2v + [ele + 10 for ele in wscatCodeDic[root]] + addDim
    word2ballDic[root] = vec_norm(cpoint) + [L0, R0]
    if outputPath:
        create_ball_file(root,
                         outputPath=outputPath,
                         word2ballDic=word2ballDic)
    return word2ballDic[root], word2ballDic
Exemplo n.º 3
0
def initialize_ball(root,
                    addDim=[],
                    L0=0.1,
                    R0=0.1,
                    word2vecDic=dict(),
                    wscatCodeDic=dict(),
                    word2ballDic=dict(),
                    outputPath=None):
    """
    :param root:
    :param addDim:
    :param L0:
    :param R0:
    :param word2vecDic:
    :param wscatCodeDic:
    :param word2ballDic:
    :param outputPath:
    :return:
    """
    w2v = [
        decimal.Decimal(ele * 100)
        for ele in get_word2vector(root, word2vecDic=word2vecDic)
    ]
    # add catcode -> path from root to here e.g. 1 1 2 :> first tree, first child, second child
    cpoint = w2v + [ele + 10 for ele in wscatCodeDic[root]] + addDim
    word2ballDic[root] = vec_norm(cpoint) + [L0, R0]
    print(f"Initialize Ball for root: {root}")
    ball_generation_log.append(
        Log(key=root,
            operation=Operation.INITIALIZE,
            operation_args=[],
            vector=word2ballDic[root]))
    if outputPath:
        create_ball_file(root,
                         outputPath=outputPath,
                         word2ballDic=word2ballDic)
    return word2ballDic[root], word2ballDic
def homothetic_recursive_transform_of_decendents(tree,
                                                 root=None,
                                                 rate=None,
                                                 wsChildrenDic=dict(),
                                                 word2ballDic=dict(),
                                                 outputPath=None):
    """
        for child of tree:
        homothetic_recursive_transform_of_decendents_by_name(child, rate=None, outputPath=outputPath)
    l1, r1 = word2ballDic[tree][-2:]
    l = np.sqrt(l1*l1 + deltaL*deltaL
                    + 2*l1*deltaL* vec_cos(deltaVec, word2ballDic[tree][:-2]))
    newVec = vec_norm(vec_point(word2ballDic[tree][:-2], l1) + vec_point(deltaVec, deltaL))
    word2ballDic[tree] = list(newVec) + [l, r1]

    for child of tree:
        while True:
            delta = qsr_DC_degree_by_name(child, tree)
            if delta < 0:
                word2ballDic[tree][-2] += - delta*1.01
            else:
                break

    create_ball_file(tree, outputPath=outputPath)

    :param tree:
    :param root:
    :param rate:
    :param wsChildrenDic:
    :param word2ballDic:
    :param outputPath:
    :return:
    """
    if rate != 1:
        for child in get_children(tree,
                                  wsChildrenDic=wsChildrenDic,
                                  word2ballDic=word2ballDic):
            word2ballDic = homothetic_recursive_transform_of_decendents(
                child,
                root=root,
                rate=rate,
                word2ballDic=word2ballDic,
                wsChildrenDic=wsChildrenDic,
                outputPath=outputPath)

        if tree == root:
            return word2ballDic

        l = decimal.Decimal(word2ballDic[tree][-2])
        # l = word2ballDic[tree][-2]
        word2ballDic[tree][-2] = l * rate

        assert word2ballDic[tree][-2] != np.inf and word2ballDic[tree][-2] >= 0

        word2ballDic[tree][-1] = l * rate - (l - word2ballDic[tree][-1]) * rate
        # word2ballDic[tree][-1] *=  rate
        if outputPath:
            create_ball_file(tree,
                             outputPath=outputPath,
                             word2ballDic=word2ballDic)

        i, j, lst = 0, 0, get_children(tree,
                                       wsChildrenDic=wsChildrenDic,
                                       word2ballDic=word2ballDic)
        for i in range(len(lst) - 1):
            j = i + 1
            while j < len(lst):
                dcDelta = qsr_DC_degree(word2ballDic[lst[i]],
                                        word2ballDic[lst[j]])
                if dcDelta < 0:
                    print(lst[j], lst[i], j, i)
                    word2ballDic = rotate_vector_till(
                        lst[j],
                        lst[i],
                        word2ballDic=word2ballDic,
                        logFile='word2ball.log')
                j += 1

        if outputPath:
            for child in get_children(tree,
                                      wsChildrenDic=wsChildrenDic,
                                      word2ballDic=word2ballDic):
                create_ball_file(child,
                                 outputPath=outputPath,
                                 word2ballDic=word2ballDic)

        for child in get_children(tree,
                                  wsChildrenDic=wsChildrenDic,
                                  word2ballDic=word2ballDic):
            gap = 1
            while True:
                delta = qsr_P_degree(word2ballDic[child], word2ballDic[tree])
                if delta < 0:
                    print('delta:', delta)
                    word2ballDic[tree][-1] += -delta + gap
                    gap *= 10
                else:
                    break
        if outputPath:
            create_ball_file(tree,
                             outputPath=outputPath,
                             word2ballDic=word2ballDic)
    return word2ballDic
def ratio_homothetic_DC_transform(curTree,
                                  refTree,
                                  wsChildrenDic=dict(),
                                  word2ballDic=dict(),
                                  outputPath=None,
                                  logFile=None):
    """
     update curTree and all its children, that that they disconnect from refTree
    step 1 compute the ratio
        curTree central point P1, l1=|OP1|, radius r1, k = r1/l1
        refTree cnetral point P0, l0=|OP0|, radius r0
        (r0 + k*x)^2 = l0^2 + x^2 - 2*l0*x*cos\alpha
        x < (l0 + r0)/(1 - k) on the same line
    step 2 update the family of curTree

    :param curTree:
    :param refTree:
    :param wsChildrenDic:
    :param word2ballDic:
    :param outputPath:
    :param logFile:
    :return:
    """
    ball1 = word2ballDic[curTree]
    l1, r1 = decimal.Decimal(ball1[-2]), decimal.Decimal(ball1[-1])
    ball0 = word2ballDic[refTree]
    l0, r0 = decimal.Decimal(ball0[-2]), decimal.Decimal(ball0[-1])
    k = r1 / l1
    targetsin0 = 0.6
    while k >= 1:

        print("assertion -1 k=", k)
        L, R = word2ballDic[curTree][-2:]

        print('Shifting...', curTree)
        LNew = R / decimal.Decimal(targetsin0)
        with open(logFile, 'a+') as wlog:
            wlog.write(
                " ".join(["shifting", str(curTree)] +
                         [str(ele) for ele in word2ballDic[curTree][:-2]] +
                         [str(LNew - L)]))
            wlog.write("\n")
        word2ballDic = shift_whole_tree_of(curTree,
                                           word2ballDic[curTree][:-2],
                                           LNew - L,
                                           wsChildrenDic=wsChildrenDic,
                                           word2ballDic=word2ballDic,
                                           outputPath=outputPath)
        print('Ended of shifting...', curTree)

        ball1 = word2ballDic[curTree]
        l1, r1 = decimal.Decimal(ball1[-2]), decimal.Decimal(ball1[-1])
        k = r1 / l1
        targetsin0 *= 0.9

    margin = 10
    while True:
        assert word2ballDic[curTree][-2] != np.inf and word2ballDic[curTree][
            -2] >= 0

        ratio = decimal.Decimal(margin + l0 + r0) / decimal.Decimal(
            word2ballDic[curTree][-2] - word2ballDic[curTree][-1])
        l = word2ballDic[curTree][-2]
        word2ballDic[curTree][-2] = l * ratio
        word2ballDic[curTree][-1] = l * ratio - (
            l - word2ballDic[curTree][-1]) * ratio
        delta = qsr_DC_degree(word2ballDic[curTree], word2ballDic[refTree])
        if delta > 0:
            break
        decimal.getcontext().prec += 10
        margin *= 10
    if outputPath:
        create_ball_file(curTree,
                         outputPath=outputPath,
                         word2ballDic=word2ballDic)
    with open(logFile, 'a+') as wlog:
        wlog.write(" ".join(["h**o", str(curTree)] + [str(ratio)]))
        wlog.write("\n")
    return ratio, word2ballDic
def shift_whole_tree_of(tree,
                        deltaVec,
                        deltaL,
                        wsChildrenDic=dict(),
                        word2ballDic=dict(),
                        outputPath=None):
    """
    :param tree:
    :param deltaVec:
    :param deltaL:
    :param wsChildrenDic:
    :param word2ballDic:
    :param outputPath:
    :return:


    for child of tree:
        shift_whole_tree_of(child, deltaVec, deltaL, outputPath=None)

    l1, r1 = word2ballDic[tree][-2:]
    l = np.sqrt(l1*l1 + deltaL*deltaL
                    + 2*l1*deltaL* vec_cos(deltaVec, word2ballDic[tree][:-2]))
    newVec = vec_norm(vec_point(word2ballDic[tree][:-2], l1) + vec_point(deltaVec, deltaL))
    word2ballDic[tree] = list(newVec) + [l, r1]

    for child of tree:
        while True:
            delta = qsr_DC_degree_by_name(child, tree)
            if delta < 0:
                word2ballDic[tree][-2] += - delta*1.01
            else:
                break

    create_ball_file(tree, outputPath=outputPath)
    """
    for child in get_children(tree,
                              wsChildrenDic=wsChildrenDic,
                              word2ballDic=word2ballDic):
        word2ballDic = shift_whole_tree_of(child,
                                           deltaVec,
                                           deltaL,
                                           wsChildrenDic=wsChildrenDic,
                                           word2ballDic=word2ballDic,
                                           outputPath=outputPath)

    l1, r1 = word2ballDic[tree][-2:]
    l = np.sqrt(l1 * l1 + deltaL * deltaL +
                2 * l1 * deltaL * vec_cos(deltaVec, word2ballDic[tree][:-2]))
    newVec = vec_norm(
        vec_point(word2ballDic[tree][:-2], l1) + vec_point(deltaVec, deltaL))
    word2ballDic[tree] = list(newVec) + [l, r1]

    i, j, lst = 0, 0, get_children(tree,
                                   wsChildrenDic=wsChildrenDic,
                                   word2ballDic=word2ballDic)
    for i in range(len(lst) - 1):
        j = i + 1
        while j < len(lst):
            dcDelta = qsr_DC_degree(word2ballDic[lst[i]], word2ballDic[lst[j]])
            if dcDelta < 0:
                print(lst[j], lst[i], j, i)
                word2ballDic = rotate_vector_till(lst[j],
                                                  lst[i],
                                                  word2ballDic=word2ballDic,
                                                  logFile='word2ball.log')
            j += 1

    for child in get_children(tree,
                              wsChildrenDic=wsChildrenDic,
                              word2ballDic=word2ballDic):
        gap = 1
        while True:
            delta = qsr_P_degree(word2ballDic[child], word2ballDic[tree])
            if delta < 0:
                gap *= 2
                word2ballDic[tree][-1] += -delta + gap
            else:
                break
    if outputPath:
        create_ball_file(tree,
                         outputPath=outputPath,
                         word2ballDic=word2ballDic)
    return word2ballDic
Exemplo n.º 7
0
def training_P_by_name(childName,
                       atreeName,
                       addDim=[],
                       wsChildrenDic=dict(),
                       word2vecDic=dict(),
                       wscatCodeDic=dict(),
                       word2ballDic=dict(),
                       sep='.',
                       outputPath="",
                       logFile=None):
    """
    :param childName:
    :param atreeName:
    :param addDim:
    :param wsChildrenDic:
    :param word2vecDic:
    :param wscatCodeDic:
    :param word2ballDic:
    :param sep:
    :param outputPath:
    :param logFile:
    :return:
    """

    if childName.split(sep)[0] == atreeName.split(sep)[0]:
        BallLeaf = word2ballDic[childName]
        BallParent, word2ballDic = initialize_ball(atreeName,
                                                   addDim=addDim,
                                                   L0=L0,
                                                   R0=R0,
                                                   word2vecDic=word2vecDic,
                                                   wscatCodeDic=wscatCodeDic,
                                                   word2ballDic=word2ballDic,
                                                   outputPath=outputPath)
        LeafO, ParentO = BallLeaf[:-2], BallParent[:-2]
        LeafL, LeafR = BallLeaf[-2], BallLeaf[-1]
        ParentL, ParentR = LeafL + LeafR + cgap, LeafR + LeafR + cgap + cgap
        BallParent = ParentO + [ParentL, ParentR]
        word2ballDic.update({atreeName: BallParent})
    else:
        targetsin0 = 0.6
        while True:
            BallLeaf = word2ballDic[childName]
            BallParent, word2ballDic = initialize_ball(
                atreeName,
                addDim=addDim,
                L0=L0,
                R0=R0,
                word2vecDic=word2vecDic,
                wscatCodeDic=wscatCodeDic,
                word2ballDic=word2ballDic,
                outputPath=outputPath)
            LeafO, ParentO = [decimal.Decimal(ele) for ele in BallLeaf[:-2]], \
                             [decimal.Decimal(ele) for ele in BallParent[:-2]]
            LeafL, LeafR = BallLeaf[-2], BallLeaf[-1]
            sin_beta = BallLeaf[-1] / BallLeaf[-2]

            delta = 1 - sin_beta * sin_beta
            if delta < 0:
                delta = 0
            cos_beta = np.sqrt(delta)
            cos_alpha = np.dot(
                LeafO,
                ParentO) / np.linalg.norm(LeafO) / np.linalg.norm(ParentO)

            delta = 1 - cos_alpha * cos_alpha
            if delta < 0:
                delta = 0
            sin_alpha = np.sqrt(delta)

            # begin alpha --> xalpha
            xalpha = sin_alpha / 25
            yalpha = np.sqrt(1 - xalpha * xalpha)
            sin_xalpha = xalpha * cos_alpha + yalpha * sin_alpha
            delta = 1 - sin_xalpha * sin_xalpha
            if delta < 0: delta = 0
            cos_xalpha = np.sqrt(delta)

            sin_alpha = sin_xalpha
            cos_alpha = cos_xalpha
            # end

            dOO = LeafL * decimal.Decimal(cos_beta)

            cos_alpha_beta = (
                decimal.Decimal(cos_beta) * decimal.Decimal(cos_alpha) -
                decimal.Decimal(sin_beta) * decimal.Decimal(sin_alpha))
            if cos_alpha_beta <= 0:
                # shift_one_family(root=childName, targetsin = targetsin0,  outputPath=outputPath)
                L, R = word2ballDic[childName][-2:]
                print('Shifting...', childName)
                LNew = R / decimal.Decimal(targetsin0)
                with open(logFile, 'a+') as wlog:
                    wlog.write(" ".join(
                        ["shifting", str(childName)] +
                        [str(ele) for ele in word2ballDic[childName][:-2]] +
                        [str(LNew - L)]))
                    wlog.write("\n")
                word2ballDic = shift_whole_tree_of(
                    childName,
                    word2ballDic[childName][:-2],
                    LNew - L,
                    wsChildrenDic=wsChildrenDic,
                    word2ballDic=word2ballDic,
                    outputPath=outputPath)
                # check_P_for_child_parent_in_one_family(childName, ballPath=outputPath)
                checkResult = check_DC_for_sibilings_in_one_family(childName)
                if checkResult:
                    print("check_DC_for_sibilings_in_one_family", childName,
                          checkResult)
                targetsin0 *= 0.9
            else:
                break

        ParentL = dOO / cos_alpha_beta
        assert ParentL > 0 and ParentL != np.inf

        ParentR = ParentL * (
            decimal.Decimal(sin_alpha) * decimal.Decimal(cos_beta) +
            decimal.Decimal(cos_alpha) *
            decimal.Decimal(sin_beta)) + decimal.Decimal(0.1)
        BallParent = ParentO + [ParentL, ParentR]
        word2ballDic.update({atreeName: BallParent})

    count = 0
    while qsr_P_degree(word2ballDic[childName], word2ballDic[atreeName]) < 0:
        oldParentR, delta = ParentR, 10
        ParentR += decimal.Decimal(2) - qsr_P_degree(word2ballDic[childName],
                                                     word2ballDic[atreeName])
        while oldParentR == ParentR:
            ParentR += delta
            delta *= 10
        BallParent = ParentO + [ParentL, ParentR]
        word2ballDic.update({atreeName: BallParent})
        # print('*', qsr_P_degree_by_name(childName, atreeName))
        # print("**", qsr_P_by_name(childName, atreeName))
        count += 1
        # print('count', count)

    # assert qsr_P_by_name(childName, atreeName), childName+" - "+atreeName+": "+str(qsr_P_degree_by_name(childName, atreeName))
    if outputPath:
        create_ball_file(atreeName,
                         outputPath=outputPath,
                         word2ballDic=word2ballDic)
    return BallParent, word2ballDic
Exemplo n.º 8
0
def training_DC_by_name(childrenNames,
                        wsChildrenDic=dict(),
                        word2ballDic=dict(),
                        outputPath=None,
                        ordered=False,
                        logFile=None):
    """
    :param childrenNames:
    :param wsChildrenDic:
    :param word2ballDic:
    :param outputPath:
    :param maxsize:
    :param mindim:
    :param logFile:
    :return:
    """
    dic = dict()
    for tree in childrenNames:
        dic[tree] = word2ballDic[tree][-2]
    dic0 = copy.deepcopy(dic)

    if ordered:
        lst = [(node, word2ballDic[node]) for node in childrenNames]
    else:
        lst = [(item[0], word2ballDic[item[0]])
               for item in sorted(dic.items(), key=operator.itemgetter(1))]

    i = 0

    while i < len(lst) - 1:
        # print('i:', i, ' in', len(lst))
        j = i + 1
        refTreeName = lst[i][0]
        while j < len(lst):
            curTreeName = lst[j][0]
            # print(curTreeName, refTreeName)
            targetsin0 = 0.6
            while not qsr_DC(word2ballDic[curTreeName],
                             word2ballDic[refTreeName]):
                ball1 = word2ballDic[curTreeName]
                l1, r1 = decimal.Decimal(ball1[-2]), decimal.Decimal(ball1[-1])
                k = r1 / l1
                if k == 1:
                    L, R = word2ballDic[curTreeName][-2:]
                    print('Shifting...', curTreeName)
                    LNew = R / decimal.Decimal(targetsin0)
                    with open(logFile, 'a+') as wlog:
                        wlog.write(" ".join(
                            ["shifting", str(tree)] +
                            [str(ele) for ele in word2ballDic[tree][:-2]] +
                            [str(LNew - L)]))
                        wlog.write("\n")
                    word2ballDic = shift_whole_tree_of(
                        tree,
                        word2ballDic[curTreeName][:-2],
                        LNew - L,
                        wsChildrenDic=wsChildrenDic,
                        word2ballDic=word2ballDic,
                        outputPath=outputPath)
                    # check_P_for_child_parent_in_one_family(tree, ballPath=outputPath)
                    checkResult = check_DC_for_sibilings_in_one_family(tree)
                    if checkResult:
                        print("check_DC_for_sibilings_in_one_family", tree,
                              checkResult)
                    targetsin0 *= 0.9

                ratio0, word2ballDic = ratio_homothetic_DC_transform(
                    curTreeName,
                    refTreeName,
                    wsChildrenDic=wsChildrenDic,
                    word2ballDic=word2ballDic,
                    outputPath=outputPath,
                    logFile=logFile)

                ball_generation_log.append(
                    Log(key=curTreeName,
                        operation=Operation.SEPERATE,
                        operation_args=[refTreeName],
                        vector=word2ballDic[curTreeName]))

                assert ratio0 != -1

            # assert qsr_DC_by_name(curTreeName, refTreeName, outputPath=outputPath)
            if outputPath:
                create_ball_file(curTreeName,
                                 outputPath=outputPath,
                                 word2ballDic=word2ballDic)
            j += 1
        for tree in childrenNames:
            dic[tree] = word2ballDic[tree][-2]
        lst = [(item[0], word2ballDic[item[0]])
               for item in sorted(dic.items(), key=operator.itemgetter(1))]
        i += 1

    if "herd.n.02" in childrenNames and "gathering.n.01" in childrenNames:
        print('break')

    #####
    # homothetic transformation
    #####
    for child in childrenNames:
        ratio = word2ballDic[child][-2] / decimal.Decimal(dic0[child])
        word2ballDic = homothetic_recursive_transform_of_decendents(
            child,
            root=child,
            rate=ratio,
            wsChildrenDic=wsChildrenDic,
            word2ballDic=word2ballDic,
            outputPath=outputPath)
    return word2ballDic
Exemplo n.º 9
0
def making_ball_contains(root,
                         children,
                         addDim=[],
                         word2vecDic=dict(),
                         wsChildrenDic=dict(),
                         wscatCodeDic=dict(),
                         word2ballDic=dict(),
                         outputPath=None,
                         logFile=None):
    """
    :param root:
    :param children:
    :param addDim:
    :param wsChildrenDic:
    :param wscatCodeDic:
    :param word2ballDic:
    :param outputPath:
    :param logFile:
    :return:
    """
    maxL = -1
    flag = False
    while not flag:
        flag = True
        for childName in children:
            # todo P == parent?
            pBall, word2ballDic = training_P_by_name(
                childName,
                root,
                addDim=addDim,
                wsChildrenDic=wsChildrenDic,
                word2vecDic=word2vecDic,
                wscatCodeDic=wscatCodeDic,
                word2ballDic=word2ballDic,
                outputPath=outputPath,
                logFile=logFile)
            assert pBall != -1
            if maxL == -1:  # initialize maxL, minL_R
                maxL, minL_R = pBall[-2], decimal.Decimal(
                    pBall[-2]) - decimal.Decimal(pBall[-1])
            if maxL < pBall[-2]:
                maxL = pBall[-2]
            delta = decimal.Decimal(pBall[-2]) - decimal.Decimal(pBall[-1])
            if delta <= 0:
                print('Shifting...mbc', root)
                with open(logFile, 'a+') as wlog:
                    wlog.write(
                        " ".join(["shifting", str(root)] +
                                 [str(ele)
                                  for ele in word2ballDic[root][:-2]] +
                                 [str(-delta)]))
                    wlog.write("\n")
                word2ballDic = shift_whole_tree_of(root,
                                                   word2ballDic[root][:-2],
                                                   -delta,
                                                   wsChildrenDic=wsChildrenDic,
                                                   word2ballDic=word2ballDic,
                                                   outputPath=outputPath)
                flag = False
                break
            elif decimal.Decimal(pBall[-2]) - decimal.Decimal(
                    pBall[-1]) < minL_R:
                minL_R = decimal.Decimal(pBall[-2]) - decimal.Decimal(
                    pBall[-1])

            word2ballDic[root] = word2ballDic[root][:-2] + [
                maxL, maxL - minL_R + cgap
            ]
            if outputPath:
                create_ball_file(root,
                                 outputPath=outputPath,
                                 word2ballDic=word2ballDic)
    return word2ballDic