예제 #1
0
def test_save_and_load():
    path = "save_and_load_test.json"
    z1 = Zipf({"one": 0.2, "two": 0.25, "three": 0.6})
    z1.save(path)
    z2 = Zipf.load(path)
    os.remove(path)
    assert z1 == z2
예제 #2
0
def _plot_pareto_pdf(ax, alpha, xmin, degrees, max_degree, hist=False):
    degree_freq = []
    # p= Pareto2(alpha, xmin)
    p = Zipf(alpha, max_degree, xmin)
    n = 0
    for k in degrees:
        if k >= xmin:
            n += 1
    total = 0
    xrange = range(xmin, max_degree, 1)
    for k in xrange:
        pdf = p.pdf(k)
        if not (hist):
            degree_freq.append(pdf)
        else:
            # degree_freq.append(total+pdf)
            degree_freq.append(p.cdf(k))
        total += pdf
    degree_freq = [i * n / total for i in degree_freq]

    if not (hist):
        ax.plot(xrange, degree_freq)
    else:
        ax.plot([i - 0.5 for i in xrange], degree_freq, "v")
    return
예제 #3
0
def test_sort():
    z = Zipf({"one": 0.2, "two": 0.25, "three": 0.6})

    assert (
        Zipf({"three": 0.6, "two": 0.25, "one": 0.2})
    ) == (
        z.sort()
    )
예제 #4
0
def test_mean():
    z = Zipf({"one":0.2, "two":0.25, "three":0.6})

    assert (
        z.mean()
    ) == (
        0.35
    )
예제 #5
0
def test_neg():
    z = Zipf({"one": 1, "two": 0.5})
    assert (
        z,
        Zipf({"two": -0.5, "one": -1})
    ) == (
        (-(-z)).sort(),
        (-z).sort()
    )
예제 #6
0
def test_mul():
    z = Zipf({"one": 0.75, "two": 0.5})
    z2 = Zipf({"one": 1.0, "two": 1.0})
    z3 = Zipf({"one": 1.0})

    assert (
        Zipf({
            "one": 1.5,
            "two": 1.0
        }),
        z,
        Zipf({"one": 0.75}),
    ) == (z * 2, (z * z2 * z2 * z2).sort(), (z * z3 * z3 * z3).sort())
예제 #7
0
def test_str():
    z = Zipf({"g": 1})
    assert (
        str(z),
        str(Zipf()),
        str(z),
        str(Zipf())
    ) == (
        '{\n  "g": 1\n}',
        '{}',
        repr(z),
        repr(Zipf())
    )
예제 #8
0
def test_ksequence_factory():

    errors = []

    try:
        ZipfFromKSequence(-3)
        errors.append("ZipfFromKSequence should fail with k less than zero")
    except Exception as e:
        pass

    k = 5

    factory = ZipfFromKSequence(k)

    current_path = os.path.dirname(__file__)+"/factory_utils"

    zipf = Zipf.load(
        current_path+"/expected_results/sequence.json").sort().round()

    with open(current_path+"/sequence/sequence.txt", "r") as f:
        sequence = f.read()

    factory_run = factory.run(sequence).round()

    if factory_run != zipf:
        errors.append(
            "Sequence zipf run is different than expected: %s != %s" % (zipf, factory_run))

    assert not errors, "errors occured:\n{}".format("\n".join(errors))
예제 #9
0
def test_check_empty():
    z = Zipf({"one": 0.2, "two": 0.25, "three": 0.6})

    raised = False

    try:
        Zipf().check_empty()
    except ValueError as e:
        raised = True

    try:
        z.check_empty()
    except ValueError as e:
        raised = False

    assert raised == True
예제 #10
0
def get_data(q, path, test_size=0.3, random_state=42):
    df = getData(path)
    z = Zipf(q=q)
    z.fit(df)
    z.transform()
    z.filter_by_language()
    X, y = z['Text'], z['Category']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    return X_train, X_test, y_train, y_test, z.dataframe
예제 #11
0
def test_truediv():
    z = Zipf({"one": 1, "two": 0.5})
    try:
        r = z / 0
        assert False
    except ValueError as e:
        pass
    z2 = Zipf({"one": 1, "two": 1})
    z3 = Zipf({"one": 1})
    assert (
        Zipf({
            "one": 0.5,
            "two": 0.25
        }),
        z,
        Zipf({"one": 1}),
    ) == (z / 2, (z / z2).sort(), (z / z3).sort())
예제 #12
0
def fail_empty(lamb):
    errors = []
    try:
        lamb(Zipf())
        errors.append("Set has not raised exception with empty Zipf")
    except ValueError as e:
        pass
    return errors
예제 #13
0
def _fail_types(lamb, tests):
    z = Zipf()
    errors = []
    for test in tests:
        try:
            result = lamb(z, test)
            errors.append("Set has not raised exception with value %s" %
                          fail_test)
        except ValueError as e:
            pass
    return errors
예제 #14
0
def test_normalize():
    z1 = Zipf({"one": 3, "two": 1})
    z2 = Zipf({"one": 1})
    assert (
        z1.normalize(),
        z2.normalize()
    ) == (
        Zipf({"one": 0.75, "two": 0.25}),
        z2
    )
예제 #15
0
def _bipartite_op_stats_flat(g, output=None):
    # Vertex/edge count
    info("  Number of vertices: %d" % (g.vcount()))
    info("  Number of edges: %d" % (g.ecount()))
    degrees = g.degree()
    max_degree = g.maxdegree()
    info("  Highest degree: %d" % (max_degree))

    # Connectedness
    if g.is_connected():
        info("  Graph is connected")
    else:
        info("  Graph is not connected")
    clusters = g.clusters()
    info("  Number of connected components = %d" % (len(clusters)))
    giant = clusters.giant()
    info("  Size of largest component = %d" % (giant.vcount()))

    # Power law exponent estimators
    # mle= Pareto.mle(degrees)
    mle = Zipf.mle(degrees)
    info("  Maximum Likelihood Estimator:")
    info("    alpha (xm=1): %.20f" % (mle))

    # Node degrees distributions
    if output:
        fig = plt.figure()
        ax = plt.subplot(1, 1, 1)
        _plot_degree_hist(ax, degrees, max_degree)
        filename = "degree-hist-%s" % (output)
        info('  Create "%s"' % (filename))
        fig.savefig(filename)
        fig = plt.figure()
        ax = plt.subplot(1, 1, 1)
        _plot_degree_dist_loglog(ax, degrees, max_degree)
        filename = "degree-dist-loglog-%s" % (output)
        info('  Create "%s"' % (filename))
        fig.savefig(filename)
    else:
        fig = plt.figure()
        ax = plt.subplot(1, 2, 1)
        _plot_degree_hist(ax, degrees, max_degree)
        ax = plt.subplot(1, 2, 2)
        _plot_degree_dist_loglog(ax, degrees, max_degree)
        plt.show()

    return g
예제 #16
0
def test_add():
    z = Zipf({"one": 0.5, "two": 0.4})
    z2 = Zipf({"one": 0.25, "four": 0.25})
    z3 = Zipf({"three": 1})
    assert (
        Zipf({"one": 1, "two": 0.8}),
        Zipf(),
        Zipf({"one": 0.75, "two": 0.4, "four": 0.25}),
        Zipf({"three": 1, "one": 0.75, "two": 0.4, "four": 0.25}),
    ) == (
        (z+z).sort(),
        (z+(-z)).sort(),
        (z+z2).sort(),
        (z+z2+z3).sort()
    )
예제 #17
0
def factory_fails(Factory, path, prepare=None, run=None):
    if prepare is None:
        prepare = Factory
    if run is None:

        def run(factory, data):
            return factory.run(data)

    current_path = os.path.dirname(__file__)
    errors = factory_break_options(Factory)
    global _options_for_tests
    tests = ["default", "empty"] + list(_options_for_tests.keys())
    for test in tests:
        data_path_name = map_test_to_data(test)
        data_path_json = os.path.join(current_path,
                                      "%s/%s.json" % (path, data_path_name))
        data_path_text = os.path.join(current_path,
                                      "%s/%s.txt" % (path, data_path_name))
        result_path = os.path.join(current_path,
                                   "expected_results/%s.json" % test)
        if os.path.isfile(data_path_json):
            with open(data_path_json, "r") as f:
                data = json.load(f)
        elif os.path.isfile(data_path_text):
            with open(data_path_text, "r") as f:
                data = f.read()
        else:
            pytest.skip(
                "While testing %s, data for testing '%s' was not found in %s or %s."
                % (Factory.__name__, test, data_path_text, data_path_json))
        if not os.path.isfile(result_path):
            pytest.skip(
                "While testing %s, result for testing '%s' was not found in %s."
                % (Factory.__name__, test, result_path))
        result = Zipf.load(result_path).sort().round()
        factory = prepare(options=_get_options_for(test))

        factory_run = run(factory, data).sort().round()
        if result != factory_run:
            errors.append(
                "%s has not expected result on run test '%s': %s != %s" %
                (Factory.__name__, test, result, factory_run))
    return errors
예제 #18
0
def bipartite_op_gen_zipf(g, params):
    if len(params) != 8:
        error('invalid number of arguments for "genzipf"')

    zipf_alpha2 = float(params[0])
    zipf_N2 = int(params[1])
    zipf_xmin2 = int(params[2])
    n2 = int(params[3])
    zipf_alpha3 = float(params[4])
    zipf_N3 = int(params[5])
    zipf_xmin3 = int(params[6])
    n3 = int(params[7])

    z2 = Zipf(zipf_alpha2, zipf_N2, zipf_xmin2)
    z3 = Zipf(zipf_alpha3, zipf_N3, zipf_xmin3)

    z2_exp = z2.expectation()
    z3_exp = z3.expectation()
    info("  L2 distrib expectation = %.20f" % (z2_exp))
    info("  L3 distrib expectation = %.20f" % (z3_exp))

    if (n2 <= 0) and (n3 > 0):
        n2 = int(round((n3 * z3_exp) / z2_exp))
        info("  n2 auto-computed = %d" % (n2))
    elif (n2 > 0) and (n3 <= 0):
        n3 = int(round((n2 * z2_exp) / z3_exp))
        info("  n3 auto-computed = %d" % (n3))
    else:
        error("  n2 and n3 cannot both be undefined")

    info("  a2=%f, N2=%d, xmin2=%d, n2=%d" % (zipf_alpha2, zipf_N2, zipf_xmin2, n2))
    info("  a3=%f, N3=%d, xmin3=%d, n3=%d" % (zipf_alpha3, zipf_N3, zipf_xmin3, n3))

    # Check expected number of edges at level 2
    # against expected number of edges at level 3
    l2_exp = z2_exp * n2
    l3_exp = z3_exp * n3
    info("  Expected total L2 degree = %.20f" % (l2_exp))
    info("  Expected total L3 degree = %.20f" % (l3_exp))

    (degrees2, degrees3) = random_matching_degrees(z2, n2, z3, n3)

    log(1, "  MLE L2 = %f" % (Zipf.mle(degrees2, zipf_xmin2)))
    log(1, "  MLE L3 = %f" % (Zipf.mle(degrees3, zipf_xmin3)))

    return bipartite_cm(degrees2, degrees3)
예제 #19
0
def test_sub():
    z = Zipf({"one": 0.5, "two": 0.5})
    z2 = Zipf({"one": 0.25, "four": 0.25})
    z3 = Zipf({"three": 1})
    assert (
        Zipf(),
        Zipf({
            "two": 0.5,
            "one": 0.25,
            "four": -0.25
        }),
        Zipf({
            "two": 0.5,
            "one": 0.25,
            "four": -0.25,
            "three": -1
        }),
    ) == (z - z, (z - z2).sort(), (z - z2 - z3).sort())
예제 #20
0
def test_var():
    z = Zipf({"one": 0.2, "two": 0.25, "three": 0.6})

    assert (z.var()) == (0.03166666666667)
예제 #21
0
def _bipartite_op_stats_bipartite(g, output=None):
    # Vertex/edge count
    l2_vertices = [idx for idx in range(g.vcount()) if not (g.vs["type"][idx])]
    l3_vertices = [idx for idx in range(g.vcount()) if g.vs["type"][idx]]
    info("  Number of L2 vertices: %d" % (len(l2_vertices)))
    info("  Number of L3 vertices: %d" % (len(l3_vertices)))
    info("  Number of edges: %d" % (g.ecount()))

    l2_degrees = g.degree(l2_vertices)
    l3_degrees = g.degree(l3_vertices)

    l3_degrees = [i for i in l3_degrees if i > 0]

    max_l2_degree = g.maxdegree(l2_vertices)
    max_l3_degree = g.maxdegree(l3_vertices)
    if max_l2_degree > max_l3_degree:
        max_degree = max_l2_degree
    else:
        max_degree = max_l3_degree
    info("  Highest L2 degree: %d" % (max_l2_degree))
    info("  Highest L3 degree: %d" % (max_l3_degree))
    l2_degrees_hist = numpy.histogram(l2_degrees, bins=max_degree, range=(0, max_degree))
    l3_degrees_hist = numpy.histogram(l3_degrees, bins=max_degree, range=(0, max_degree))

    # Connectedness
    if g.is_connected():
        info("  Graph is connected")
    else:
        info("  Graph is not connected")
    clusters = g.clusters()
    info("  Number of connected components = %d" % (len(clusters)))
    giant = clusters.giant()
    info("  Size of largest component = %d" % (giant.vcount()))

    # L2/L3 power law exponents estimators
    info("  Maximum Likelihood Estimator:")
    l2_xmin_range = range(1, 4)
    l2_mle = _estimate_pareto_params(l2_degrees, l2_xmin_range)
    for xmin in l2_xmin_range:
        info("    alpha (L2, xm=%d): %.20f" % (xmin, l2_mle[xmin]))
    # l3_mle= Pareto.mle(l3_degrees)
    l3_mle = Zipf.mle(l3_degrees)
    info("    alpha (L3, xm=1): %.20f" % (l3_mle))

    # L2/L3 degrees distributions
    if output:
        fig = plt.figure()
        ax = plt.subplot(1, 1, 1)
        _plot_degree_hist(ax, l2_degrees, max_degree)
        for xmin in l2_xmin_range:
            _plot_pareto_pdf(ax, l2_mle[xmin], xmin, l2_degrees, max_degree, hist=False)
        filename = "l2-degree-hist-%s" % (output)
        info('  Create "%s"' % (filename))
        fig.savefig(filename)

        fig = plt.figure()
        ax = plt.subplot(1, 1, 1)
        _plot_degree_dist_loglog(ax, l2_degrees, max_degree)
        for xmin in l2_xmin_range:
            _plot_pareto_pdf(ax, l2_mle[xmin], xmin, l2_degrees, max_degree)
        filename = "l2-degree-dist-loglog-%s" % (output)
        info('  Create "%s"' % (filename))
        fig.savefig(filename)

        fig = plt.figure()
        ax = plt.subplot(1, 1, 1)
        _plot_degree_hist(ax, l3_degrees, max_degree)
        _plot_pareto_pdf(ax, l3_mle, 1, l3_degrees, max_degree, hist=False)
        filename = "l3-degree-hist-%s" % (output)
        info('  Create "%s"' % (filename))
        fig.savefig(filename)

        fig = plt.figure()
        ax = plt.subplot(1, 1, 1)
        _plot_degree_dist_loglog(ax, l3_degrees, max_degree)
        _plot_pareto_pdf(ax, l3_mle, 1, l3_degrees, max_degree)
        filename = "l3-degree-dist-loglog-%s" % (output)
        info('  Create "%s"' % (filename))
        fig.savefig(filename)
    else:
        fig = plt.figure()
        ax = plt.subplot(2, 2, 1)
        _plot_degree_hist(ax, l2_degrees, max_degree)
        ax = plt.subplot(2, 2, 2)
        _plot_degree_dist_loglog(ax, l2_degrees, max_degree)
        for xmin in l2_xmin_range:
            _plot_pareto_pdf(ax, l2_mle[xmin], xmin, l2_degrees, max_degree)

        ax = plt.subplot(2, 2, 3)
        _plot_degree_hist(ax, l3_degrees, max_degree)
        ax = plt.subplot(2, 2, 4)
        _plot_degree_dist_loglog(ax, l3_degrees, max_degree)
        _plot_pareto_pdf(ax, l3_mle, 1, l3_degrees, max_degree)
        plt.show()

    return g
예제 #22
0
def test_remap():
    z = Zipf({"one": 0.2, "two": 0.25, "three": 0.6})
    remapper = Zipf({"three": 0.7, "one": 0.2})

    assert Zipf({"three": 0.6, "one": 0.2}) == z.remap(remapper)
예제 #23
0
def test_setitem():
    z = Zipf()
    z["my_key"] = 0.5

    assert z == Zipf({"my_key": 0.5})
예제 #24
0
def test_getitem():
    z = Zipf({"g": 1})
    assert (z.__getitem__("g"), z["no_key"], z.__getitem__("g")) == (1, 0,
                                                                     z["g"])
예제 #25
0
def test_missing():
    z = Zipf()
    assert z.__missing__("my_missing_key") == 0
예제 #26
0
def test_min():
    z = Zipf({"one": 0.2, "two": 0.25, "three": 0.6})

    assert (z.min()) == ("one")
예제 #27
0
def _estimate_pareto_params(degrees, beta_range):
    alphas = {}
    for beta in beta_range:
        # alphas[beta]= Pareto.mle(degrees, beta)
        alphas[beta] = Zipf.mle(degrees, beta)
    return alphas
예제 #28
0
def test_cut():
    z = Zipf({"one": 0.2, "two": 0.25, "three": 0.6})

    assert (z.cut(0.2), z.cut(0.25), z.cut(0.6), z.cut(0,
                                                       0.6), z.cut(0.2, 0.6),
            z.cut(0.25, 0.6), z.cut(0.2, 0.25)) == (Zipf({
                "two": 0.25,
                "three": 0.6
            }), Zipf({"three":
                      0.6}), Zipf(), z, Zipf({
                          "two": 0.25,
                          "three": 0.6
                      }), Zipf({"three": 0.6}), Zipf({"two": 0.25}))