示例#1
0
 def test_simple(self):
     x = blaze.array([1+1j, 0+2j, 1+2j, blaze.inf, blaze.nan])
     y_r = blaze.array([blaze.sqrt(2.), 2, blaze.sqrt(5),
                        blaze.inf, blaze.nan])
     y = blaze.abs(x)
     for i in range(len(x)):
         assert_almost_equal(y[i], y_r[i])
示例#2
0
文件: viz.py 项目: kcompher/topik
    def plot(self, output_file="termite.html"):
        t = blz.Data(self.input_file)
        df = pd.read_csv(self.input_file)

        MAX =  blz.compute(t.weight.max())
        MIN = blz.compute(t.weight.min())

        # Create a size variable to define the size of the the circle for the plot.
        t = blz.transform(t, size=blz.sqrt((t.weight - MIN)/(MAX - MIN))*50)

        WORDS = t['word'].distinct()
        WORDS = into(list, WORDS)
        topics = t['topic'].distinct()
        topics = into(list, topics)
        # Convert topics to strings
        TOPICS = [str(i) for i in topics]

        source = into(pd.DataFrame, t)

        plt.output_file(output_file)

        data_source = ColumnDataSource(source)

        p = plt.figure(x_range=TOPICS, y_range=WORDS,
               plot_width=1000, plot_height=1700,
               title=self.title)

        p.circle(x="topic", y="word", size="size", fill_alpha=0.6, source=data_source)
        #p.xaxis().major_label_orientation = np.pi/3
        logging.info("generating termite plot for file %s" % self.input_file)
        plt.show(p)
示例#3
0
文件: viz.py 项目: kwinkunks/topik
    def plot(self, output_file="termite.html"):
        import blaze as blz
        from odo import into
        import pandas as pd
        import bokeh.plotting as plt
        from bokeh.models.sources import ColumnDataSource

        t = blz.Data(self.input_file)

        MAX = blz.compute(t.weight.max())
        MIN = blz.compute(t.weight.min())

        # Create a size variable to define the size of the the circle for the plot.
        t = blz.transform(t, size=blz.sqrt((t.weight - MIN)/(MAX - MIN))*50)

        WORDS = t['word'].distinct()
        WORDS = into(list, WORDS)
        topics = t['topic'].distinct()
        topics = into(list, topics)
        # Convert topics to strings
        TOPICS = [str(i) for i in topics]

        source = into(pd.DataFrame, t)

        plt.output_file(output_file)

        data_source = ColumnDataSource(source)

        p = plt.figure(x_range=TOPICS, y_range=WORDS,
                       plot_width=1000, plot_height=1700,
                       title=self.title)

        p.circle(x="topic", y="word", size="size", fill_alpha=0.6, source=data_source)
        plt.show(p)
示例#4
0
    def plot(self):
        t = blz.Data(self.input_file)
        df = pd.read_csv(self.input_file)

        MAX =  blz.compute(t.weight.max())
        MIN = blz.compute(t.weight.min())

        # Create a size variable to define the size of the the circle for the plot.
        t = blz.transform(t, size=blz.sqrt((t.weight - MIN)/(MAX - MIN))*50)

        WORDS = t['word'].distinct()
        WORDS = into(list, WORDS)
        topics = t['topic'].distinct()
        topics = into(list, topics)
        # Convert topics to strings
        TOPICS = [str(i) for i in topics]

        source = into(pd.DataFrame, t)

        data_source = ColumnDataSource(source)

        p = plt.figure(x_range=TOPICS, y_range=WORDS,
               plot_width=1000, plot_height=1700, title=None)

        p.circle(x="topic", y="word", size="size", fill_alpha=0.6, source=data_source)
        #p.xaxis().major_label_orientation = np.pi/3
        logging.info("generating termite plot for file %s" % self.input_file)

        script, div = components(p, CDN)

        return script, div
示例#5
0
def termite(modeled_corpus, plot_title="Termite plot", topn=15):
    """A Bokeh Termite Visualization for LDA results analysis.

    Parameters
    ----------
    input_file : str or pandas DataFrame
        A pandas dataframe from a topik model get_termite_data() containing columns "word", "topic" and "weight".
        May also be a string, in which case the string is a filename of a csv file with the above columns.
    title : str
        The title for your termite plot

    Examples
    --------
    >>> plot = termite(test_model_output, plot_title="My model results", topn=5)

    """
    prepared_model_vis_data = _termite_data(modeled_corpus, topn)

    t = blz.Data(prepared_model_vis_data)

    MAX = blz.compute(t.weight.max())
    MIN = blz.compute(t.weight.min())

    # Create a size variable to define the size of the the circle for the plot.
    t = blz.transform(t, size=blz.sqrt((t.weight - MIN) / (MAX - MIN)) * 50)

    WORDS = t['word'].distinct()
    WORDS = into(list, WORDS)
    topics = t['topic'].distinct()
    topics = into(list, topics)
    # Convert topics to strings
    TOPICS = [str(i) for i in topics]

    source = into(pd.DataFrame, t)

    data_source = sources.ColumnDataSource(source)

    p = plt.figure(x_range=TOPICS,
                   y_range=WORDS,
                   plot_width=1000,
                   plot_height=1700,
                   title=plot_title)

    p.circle(x="topic",
             y="word",
             size="size",
             fill_alpha=0.6,
             source=data_source)
    return p
示例#6
0
def termite(modeled_corpus, plot_title="Termite plot", topn=15):
    """A Bokeh Termite Visualization for LDA results analysis.

    Parameters
    ----------
    input_file : str or pandas DataFrame
        A pandas dataframe from a topik model get_termite_data() containing columns "word", "topic" and "weight".
        May also be a string, in which case the string is a filename of a csv file with the above columns.
    title : str
        The title for your termite plot

    Examples
    --------
    >>> plot = termite(test_model_output, plot_title="My model results", topn=5)

    """
    prepared_model_vis_data = _termite_data(modeled_corpus, topn)

    t = blz.Data(prepared_model_vis_data)

    MAX = blz.compute(t.weight.max())
    MIN = blz.compute(t.weight.min())

    # Create a size variable to define the size of the the circle for the plot.
    t = blz.transform(t, size=blz.sqrt((t.weight - MIN)/(MAX - MIN))*50)

    WORDS = t['word'].distinct()
    WORDS = into(list, WORDS)
    topics = t['topic'].distinct()
    topics = into(list, topics)
    # Convert topics to strings
    TOPICS = [str(i) for i in topics]

    source = into(pd.DataFrame, t)

    data_source = sources.ColumnDataSource(source)

    p = plt.figure(x_range=TOPICS, y_range=WORDS,
                   plot_width=1000, plot_height=1700,
                   title=plot_title)

    p.circle(x="topic", y="word", size="size", fill_alpha=0.6, source=data_source)
    return p
示例#7
0
 def distance(lat1, lon1, lat2, lon2, R=3959):
     # http://andrew.hedges.name/experiments/haversine/
     dlon = radians(lon2 - lon1)
     dlat = radians(lat2 - lat1)
     a = sin(dlat / 2.0) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2.0) ** 2
     return R * 2 * atan2(sqrt(a), sqrt(1 - a))
示例#8
0
 def distance(lat1, lon1, lat2, lon2, R=3959):
     # http://andrew.hedges.name/experiments/haversine/
     dlon = radians(lon2 - lon1)
     dlat = radians(lat2 - lat1)
     a = sin(dlat / 2.0) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2.0) ** 2
     return R * 2 * atan2(sqrt(a), sqrt(1 - a))
示例#9
0
 def test_cos(self):
     a = blaze.array([0, math.pi/6, math.pi/3, 0.5*math.pi,
                      math.pi, 1.5*math.pi, 2*math.pi])
     b = blaze.array([1, 0.5*blaze.sqrt(3), 0.5, 0, -1, 0, 1])
     assert_allclose(blaze.cos(a), b, rtol=1e-15, atol=1e-15)
     assert_allclose(blaze.cos(-a), b, rtol=1e-15, atol=1e-15)
示例#10
0
 def test_sin(self):
     a = blaze.array([0, math.pi/6, math.pi/3, 0.5*math.pi,
                      math.pi, 1.5*math.pi, 2*math.pi])
     b = blaze.array([0, 0.5, 0.5*blaze.sqrt(3), 1, 0, -1, 0])
     assert_allclose(blaze.sin(a), b, rtol=1e-15, atol=1e-15)
     assert_allclose(blaze.sin(-a), -b, rtol=1e-15, atol=1e-15)
示例#11
0
 def test_sqrt(self):
     a = blaze.array([0., 9., 64., 1e20, 12345])
     b = blaze.array([0., 3., 8., 1e10, math.sqrt(12345)])
     result = blaze.sqrt(a)
     assert_almost_equal(result, b)