示例#1
0
    def test_cartocontext_map_geom_type(self):
        """context.CartoContext.map basemap geometry type defaults"""
        from cartoframes import Layer, QueryLayer
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)

        # baseid1 = dark, labels1 = labels on top in named map name
        labels_polygon = cc.map(layers=Layer(self.test_read_table))
        self.assertRegexpMatches(labels_polygon.__html__(),
                                 '.*baseid2_labels1.*',
                                 msg='labels should be on top since only a '
                                 'polygon layer is present')

        # baseid2 = voyager, labels0 = labels on bottom
        labels_point = cc.map(layers=Layer(self.test_point_table))
        self.assertRegexpMatches(labels_point.__html__(),
                                 '.*baseid2_labels0.*',
                                 msg='labels should be on bottom because a '
                                 'point layer is present')

        labels_multi = cc.map(
            layers=[Layer(self.test_point_table),
                    Layer(self.test_read_table)])
        self.assertRegexpMatches(labels_multi.__html__(),
                                 '.*baseid2_labels0.*',
                                 msg='labels should be on bottom because a '
                                 'point layer is present')
        # create a layer with points and polys, but with more polys
        # should default to poly layer (labels on top)
        multi_geom_layer = QueryLayer('''
            (SELECT
                the_geom, the_geom_webmercator,
                row_number() OVER () AS cartodb_id
              FROM "{polys}" WHERE the_geom IS NOT null LIMIT 10)
            UNION ALL
            (SELECT
                the_geom, the_geom_webmercator,
                (row_number() OVER ()) + 10 AS cartodb_id
              FROM "{points}" WHERE the_geom IS NOT null LIMIT 5)
        '''.format(polys=self.test_read_table, points=self.test_point_table))
        multi_geom = cc.map(layers=multi_geom_layer)
        self.assertRegexpMatches(multi_geom.__html__(),
                                 '.*baseid2_labels1.*',
                                 msg='layer has more polys than points, so it '
                                 'should default to polys labels (on top)')
示例#2
0
    def test_cartocontext_map_time(self):
        """context.CartoContext.map time options"""
        from cartoframes import Layer
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)
        html_map = cc.map(
            layers=Layer(self.test_point_table, time='cartodb_id'))
        self.assertIsInstance(html_map, IPython.core.display.HTML)

        # category map
        cat_map = cc.map(layers=Layer(self.test_point_table,
                                      time='actor_postedtime',
                                      color='twitter_lang'))
        self.assertRegexpMatches(cat_map.__html__(),
                                 '.*CDB_Math_Mode\(cf_value_twitter_lang\).*')

        with self.assertRaises(
                ValueError, msg='cannot create static torque maps currently'):
            cc.map(layers=Layer(self.test_point_table, time='cartodb_id'),
                   interactive=False)

        with self.assertRaises(ValueError,
                               msg='cannot have more than one torque layer'):
            cc.map(layers=[
                Layer(self.test_point_table, time='cartodb_id'),
                Layer(self.test_point_table, color='cartodb_id')
            ])

        with self.assertRaises(
                ValueError,
                msg='cannot do a torque map off a polygon dataset'):
            cc.map(layers=Layer(self.test_read_table, time='cartodb_id'))
def kmeans():
    """k-means analysis

    Params:
        cols (str): Comma-separated list of columns in `table`.
        table (str): Name of table for data with columns `cols`.
        n_clusters (int): Number of clusters for the analysis. Defaults to 5.
        user (str): Username for CARTO account.
        key (str): User's CARTO API Key
    """
    from sklearn.cluster import KMeans
    from sklearn.preprocessing import StandardScaler, Imputer
    from cartoframes import Layer
    from cartoframes.styling import bold
    import time
    n_clusters = int(request.args.get('n_clusters', 5))
    cols = request.args.get('cols').split(',')
    table = request.args.get('table')
    user = request.args.get('user')
    key = request.args.get('key')
    debug = request.args.get('debug', False)
    outtable = request.args.get(
        'outtable', '{0}_kmeans_out_{1}'.format(table,
                                                str(time.time())[-5:]))

    if debug:
        debug_print(outtable=outtable)
    out_format = request.args.get('format', 'html')

    if not all((cols, table, user, key)):
        return json.dumps({'result': 'error'})

    cc = cartoframes.CartoContext(
        base_url='https://{}.carto.com/'.format(user), api_key=key)
    # gather the data
    dataframe = cc.query('''
        SELECT *
          FROM {table}
    '''.format(table=table))
    scaler = StandardScaler()
    imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
    imp.fit(dataframe[cols].values)
    data = imp.transform(dataframe[cols].values)
    data = scaler.fit_transform(data)
    km = KMeans(n_clusters=n_clusters).fit(data)
    dataframe['labels'] = km.labels_
    dataframe['labels'] = dataframe['labels'].astype(str)
    warnings.warn(str(dataframe.dtypes))
    cc.write(dataframe, outtable, overwrite=True)
    if out_format != 'html':
        return json.dumps(
            {'result': {
                'success': 'Table written to ' + outtable
            }})

    msg = ('Performing <b>k-means</b> on columns {cols} from {table} '
           'using {n} clusters.').format(cols=', '.join(cols),
                                         table=table,
                                         n=n_clusters)
    map_html = cc.map(layers=Layer(outtable,
                                   color={
                                       'column': 'labels',
                                       'scheme': bold(n_clusters)
                                   })).data

    table_link = '{0}/dataset/{1}'.format(cc.creds.base_url(), outtable)
    return render_template('kmeans.html',
                           map_html=map_html,
                           table=outtable,
                           table_link=table_link,
                           user=user,
                           msg=msg,
                           plot=plot(dataframe, cols, hue='labels'))
示例#4
0
    def test_cartocontext_map(self):
        """context.CartoContext.map normal usage"""
        from cartoframes import Layer, QueryLayer, BaseMap
        try:
            import matplotlib
            matplotlib.use('agg')
            import matplotlib.pyplot as plt
        except ImportError:
            plt = None
        cc = cartoframes.CartoContext(base_url=self.baseurl,
                                      api_key=self.apikey)

        # test with no layers - should produce basemap
        if plt:
            basemap_only_static_mpl = cc.map(interactive=False)
            cartoframes.context.HAS_MATPLOTLIB = False
        basemap_only_static = cc.map(interactive=False)
        basemap_only_interactive = cc.map(interactive=True)

        # are of the correct type instances
        if plt:
            self.assertIsInstance(basemap_only_static_mpl, plt.Axes)
        self.assertIsInstance(basemap_only_static, IPython.core.display.Image)
        self.assertIsInstance(basemap_only_interactive,
                              IPython.core.display.HTML)

        # have the HTML innards that are to be expected
        if sys.version[0] == 3:
            self.assertRegex(basemap_only_static.data,
                             ('^<img src="https://.*api/v1/map/static/named/'
                              'cartoframes_ver.*" />$'))
            self.assertRegex(basemap_only_interactive.data,
                             '^<iframe srcdoc="<!DOCTYPE html>.*')
        elif sys.version[0] == 2:
            self.assertRegexMatches(
                basemap_only_static.data,
                ('^<img src="https://.*api/v1/map/static/named/'
                 'cartoframes_ver.*" />$'))
            self.assertRegexMatches(basemap_only_interactive.data,
                                    '^<iframe srcdoc="<!DOCTYPE html>.*')

        # test with labels on front
        labels_front = cc.map(layers=BaseMap('light', labels='front'))
        self.assertIsInstance(labels_front, IPython.core.display.HTML)

        # test with one Layer
        one_layer = cc.map(layers=Layer('tweets_obama'))
        self.assertIsInstance(one_layer, IPython.core.display.HTML)

        # test with two Layers
        two_layers = cc.map(
            layers=[Layer('tweets_obama'),
                    Layer(self.test_read_table)])

        self.assertIsInstance(two_layers, IPython.core.display.HTML)

        # test with one Layer, one QueryLayer
        onelayer_onequery = cc.map(layers=[
            QueryLayer('''
                                                SELECT *
                                                FROM tweets_obama
                                                LIMIT 100'''),
            Layer(self.test_read_table)
        ])

        self.assertIsInstance(onelayer_onequery, IPython.core.display.HTML)

        # test with BaseMap, Layer, QueryLayer
        cc.map(layers=[
            BaseMap('light'),
            QueryLayer('''
                               SELECT *
                               FROM tweets_obama
                               LIMIT 100''',
                       color='favoritescount'),
            Layer(self.test_read_table)
        ])

        # Errors
        # too many layers
        with self.assertRaises(ValueError):
            layers = [Layer('tweets_obama')] * 9
            cc.map(layers=layers)

        # zoom needs to be specified with lng/lat
        with self.assertRaises(ValueError):
            cc.map(lng=44.3386, lat=68.2733)

        # only one basemap layer can be added
        with self.assertRaises(ValueError):
            cc.map(layers=[BaseMap('dark'), BaseMap('light')])

        # only one time layer can be added
        with self.assertRaises(ValueError):
            cc.map(layers=[
                Layer(self.test_read_table, time='cartodb_id'),
                Layer(self.test_read_table, time='cartodb_id')
            ])

        # no geometry
        with self.assertRaises(ValueError):
            cc.map(layers=QueryLayer('''
                SELECT
                    null::geometry as the_geom,
                    null::geometry as the_geom_webmercator,
                    row_number() OVER () as cartodb_id
                FROM generate_series(1, 10) as m(i)
                '''))
示例#5
0
resultsPath = 'results/result_3.tsv'
if os.path.isdir(resultsPath):
    shutil.rmtree(resultsPath)
result_task3.coalesce(1).saveAsTextFile(resultsPath)

# #### Using cartoframes library to visualize cartoDB map in notebook

# In[40]:

import cartoframes
from cartoframes import Layer, BaseMap, styling
BASEURL = 'https://larshbj.carto.com'
APIKEY = '299d2d825191b9879da6fc859d1064930f28d061'
cc = cartoframes.CartoContext(base_url=BASEURL, api_key=APIKEY)
cc.map(layers=Layer('result_task3_carto_4', size=7), interactive=False)

# ## Task 4

# #### Method for calculating local time by converting timestamp to UTC and adding timezone offset. Outputs time rounded to the hour

# In[41]:


def getLocalTimeHour(timestamp, offset):
    s = timestamp / 1000.0 + offset
    return str(datetime.datetime.fromtimestamp(s).hour)


# #### Method using Python Counter class to calculate 1-hour interval with most tweets.