Exemplo n.º 1
0
def load_world_bank_health_n_pop():
    """Loads the world bank health dataset, slices and a dashboard"""
    tbl_name = 'wb_health_population'
    with gzip.open(os.path.join(DATA_FOLDER, 'countries.json.gz')) as f:
        pdf = pd.read_json(f)
    pdf.columns = [col.replace('.', '_') for col in pdf.columns]
    pdf.year = pd.to_datetime(pdf.year)
    pdf.to_sql(tbl_name,
               db.engine,
               if_exists='replace',
               chunksize=500,
               dtype={
                   'year': DateTime(),
                   'country_code': String(3),
                   'country_name': String(255),
                   'region': String(255),
               },
               index=False)

    print("Creating table [wb_health_population] reference")
    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not tbl:
        tbl = TBL(table_name=tbl_name)
    tbl.description = utils.readfile(os.path.join(DATA_FOLDER, 'countries.md'))
    tbl.main_dttm_col = 'year'
    tbl.is_featured = True
    tbl.database = get_or_create_db(db.session)
    db.session.merge(tbl)
    db.session.commit()
    tbl.fetch_metadata()

    defaults = {
        "compare_lag": "10",
        "compare_suffix": "o10Y",
        "datasource_id": "1",
        "datasource_name": "birth_names",
        "datasource_type": "table",
        "limit": "25",
        "granularity": "year",
        "groupby": [],
        "metric": 'sum__SP_POP_TOTL',
        "metrics": ["sum__SP_POP_TOTL"],
        "row_limit": config.get("ROW_LIMIT"),
        "since": "2014-01-01",
        "until": "2014-01-01",
        "where": "",
        "markup_type": "markdown",
        "country_fieldtype": "cca3",
        "secondary_metric": "sum__SP_POP_TOTL",
        "entity": "country_code",
        "show_bubbles": "y",
    }

    print("Creating slices")
    slices = [
        Slice(slice_name="Region Filter",
              viz_type='filter_box',
              datasource_type='table',
              table=tbl,
              params=get_slice_json(defaults,
                                    viz_type='filter_box',
                                    groupby=['region', 'country_name'])),
        Slice(slice_name="World's Population",
              viz_type='big_number',
              datasource_type='table',
              table=tbl,
              params=get_slice_json(defaults,
                                    since='2000',
                                    viz_type='big_number',
                                    compare_lag="10",
                                    metric='sum__SP_POP_TOTL',
                                    compare_suffix="over 10Y")),
        Slice(slice_name="Most Populated Countries",
              viz_type='table',
              datasource_type='table',
              table=tbl,
              params=get_slice_json(defaults,
                                    viz_type='table',
                                    metrics=["sum__SP_POP_TOTL"],
                                    groupby=['country_name'])),
        Slice(slice_name="Growth Rate",
              viz_type='line',
              datasource_type='table',
              table=tbl,
              params=get_slice_json(defaults,
                                    viz_type='line',
                                    since="1960-01-01",
                                    metrics=["sum__SP_POP_TOTL"],
                                    num_period_compare="10",
                                    groupby=['country_name'])),
        Slice(slice_name="% Rural",
              viz_type='world_map',
              datasource_type='table',
              table=tbl,
              params=get_slice_json(defaults,
                                    viz_type='world_map',
                                    metric="sum__SP_RUR_TOTL_ZS",
                                    num_period_compare="10")),
        Slice(slice_name="Life Expexctancy VS Rural %",
              viz_type='bubble',
              datasource_type='table',
              table=tbl,
              params=get_slice_json(
                  defaults,
                  viz_type='bubble',
                  since="2011-01-01",
                  until="2011-01-01",
                  series="region",
                  limit="0",
                  entity="country_name",
                  x="sum__SP_RUR_TOTL_ZS",
                  y="sum__SP_DYN_LE00_IN",
                  size="sum__SP_POP_TOTL",
                  max_bubble_size="50",
                  flt_col_1="country_code",
                  flt_op_1="not in",
                  flt_eq_1=
                  "TCA,MNP,DMA,MHL,MCO,SXM,CYM,TUV,IMY,KNA,ASM,ADO,AMA,PLW",
                  num_period_compare="10",
              )),
        Slice(slice_name="Rural Breakdown",
              viz_type='sunburst',
              datasource_type='table',
              table=tbl,
              params=get_slice_json(
                  defaults,
                  viz_type='sunburst',
                  groupby=["region", "country_name"],
                  secondary_metric="sum__SP_RUR_TOTL",
                  since="2011-01-01",
                  until="2011-01-01",
              )),
        Slice(slice_name="World's Pop Growth",
              viz_type='area',
              datasource_type='table',
              table=tbl,
              params=get_slice_json(
                  defaults,
                  since="1960-01-01",
                  until="now",
                  viz_type='area',
                  groupby=["region"],
              )),
        Slice(slice_name="Box plot",
              viz_type='box_plot',
              datasource_type='table',
              table=tbl,
              params=get_slice_json(
                  defaults,
                  since="1960-01-01",
                  until="now",
                  whisker_options="Tukey",
                  viz_type='box_plot',
                  groupby=["region"],
              )),
        Slice(slice_name="Treemap",
              viz_type='treemap',
              datasource_type='table',
              table=tbl,
              params=get_slice_json(
                  defaults,
                  since="1960-01-01",
                  until="now",
                  viz_type='treemap',
                  metrics=["sum__SP_POP_TOTL"],
                  groupby=["region", "country_code"],
              )),
        Slice(slice_name="Parallel Coordinates",
              viz_type='para',
              datasource_type='table',
              table=tbl,
              params=get_slice_json(
                  defaults,
                  since="2011-01-01",
                  until="2011-01-01",
                  viz_type='para',
                  limit=100,
                  metrics=[
                      "sum__SP_POP_TOTL", 'sum__SP_RUR_TOTL_ZS',
                      'sum__SH_DYN_AIDS'
                  ],
                  secondary_metric='sum__SP_POP_TOTL',
                  series=["country_name"],
              )),
    ]
    for slc in slices:
        merge_slice(slc)

    print("Creating a World's Health Bank dashboard")
    dash_name = "World's Bank Data"
    slug = "world_health"
    dash = db.session.query(Dash).filter_by(slug=slug).first()

    if not dash:
        dash = Dash()
    js = textwrap.dedent("""\
    [
        {
            "col": 1,
            "row": 0,
            "size_x": 2,
            "size_y": 2,
            "slice_id": "1231"
        },
        {
            "col": 1,
            "row": 2,
            "size_x": 2,
            "size_y": 2,
            "slice_id": "1232"
        },
        {
            "col": 10,
            "row": 0,
            "size_x": 3,
            "size_y": 7,
            "slice_id": "1233"
        },
        {
            "col": 1,
            "row": 4,
            "size_x": 6,
            "size_y": 3,
            "slice_id": "1234"
        },
        {
            "col": 3,
            "row": 0,
            "size_x": 7,
            "size_y": 4,
            "slice_id": "1235"
        },
        {
            "col": 5,
            "row": 7,
            "size_x": 8,
            "size_y": 4,
            "slice_id": "1236"
        },
        {
            "col": 7,
            "row": 4,
            "size_x": 3,
            "size_y": 3,
            "slice_id": "1237"
        },
        {
            "col": 1,
            "row": 7,
            "size_x": 4,
            "size_y": 4,
            "slice_id": "1238"
        },
        {
            "col": 9,
            "row": 11,
            "size_x": 4,
            "size_y": 4,
            "slice_id": "1239"
        },
        {
            "col": 1,
            "row": 11,
            "size_x": 8,
            "size_y": 4,
            "slice_id": "1240"
        }
    ]
    """)
    l = json.loads(js)
    for i, pos in enumerate(l):
        pos['slice_id'] = str(slices[i].id)

    dash.dashboard_title = dash_name
    dash.position_json = json.dumps(l, indent=4)
    dash.slug = slug

    dash.slices = slices[:-1]
    db.session.merge(dash)
    db.session.commit()
Exemplo n.º 2
0
def load_world_bank_health_n_pop():
    """Loads the world bank health dataset, slices and a dashboard"""
    tbl_name = 'wb_health_population'
    with gzip.open(os.path.join(DATA_FOLDER, 'countries.json.gz')) as f:
        pdf = pd.read_json(f)
    pdf.columns = [col.replace('.', '_') for col in pdf.columns]
    pdf.year = pd.to_datetime(pdf.year)
    pdf.to_sql(
        tbl_name,
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'year': DateTime(),
            'country_code': String(3),
            'country_name': String(255),
            'region': String(255),
        },
        index=False)

    print("Creating table [wb_health_population] reference")
    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not tbl:
        tbl = TBL(table_name=tbl_name)
    tbl.description = utils.readfile(os.path.join(DATA_FOLDER, 'countries.md'))
    tbl.main_dttm_col = 'year'
    tbl.is_featured = True
    tbl.database = get_or_create_db(db.session)
    db.session.merge(tbl)
    db.session.commit()
    tbl.fetch_metadata()

    defaults = {
        "compare_lag": "10",
        "compare_suffix": "o10Y",
        "datasource_id": "1",
        "datasource_name": "birth_names",
        "datasource_type": "table",
        "limit": "25",
        "granularity": "year",
        "groupby": [],
        "metric": 'sum__SP_POP_TOTL',
        "metrics": ["sum__SP_POP_TOTL"],
        "row_limit": config.get("ROW_LIMIT"),
        "since": "2014-01-01",
        "until": "2014-01-01",
        "where": "",
        "markup_type": "markdown",
        "country_fieldtype": "cca3",
        "secondary_metric": "sum__SP_POP_TOTL",
        "entity": "country_code",
        "show_bubbles": "y",
    }

    print("Creating slices")
    slices = [
        Slice(
            slice_name="Region Filter",
            viz_type='filter_box',
            datasource_type='table',
            table=tbl,
            params=get_slice_json(
                defaults,
                viz_type='filter_box',
                groupby=['region', 'country_name'])),
        Slice(
            slice_name="World's Population",
            viz_type='big_number',
            datasource_type='table',
            table=tbl,
            params=get_slice_json(
                defaults,
                since='2000',
                viz_type='big_number',
                compare_lag="10",
                metric='sum__SP_POP_TOTL',
                compare_suffix="over 10Y")),
        Slice(
            slice_name="Most Populated Countries",
            viz_type='table',
            datasource_type='table',
            table=tbl,
            params=get_slice_json(
                defaults,
                viz_type='table',
                metrics=["sum__SP_POP_TOTL"],
                groupby=['country_name'])),
        Slice(
            slice_name="Growth Rate",
            viz_type='line',
            datasource_type='table',
            table=tbl,
            params=get_slice_json(
                defaults,
                viz_type='line',
                since="1960-01-01",
                metrics=["sum__SP_POP_TOTL"],
                num_period_compare="10",
                groupby=['country_name'])),
        Slice(
            slice_name="% Rural",
            viz_type='world_map',
            datasource_type='table',
            table=tbl,
            params=get_slice_json(
                defaults,
                viz_type='world_map',
                metric="sum__SP_RUR_TOTL_ZS",
                num_period_compare="10")),
        Slice(
            slice_name="Life Expexctancy VS Rural %",
            viz_type='bubble',
            datasource_type='table',
            table=tbl,
            params=get_slice_json(
                defaults,
                viz_type='bubble',
                since="2011-01-01",
                until="2011-01-01",
                series="region",
                limit="0",
                entity="country_name",
                x="sum__SP_RUR_TOTL_ZS",
                y="sum__SP_DYN_LE00_IN",
                size="sum__SP_POP_TOTL",
                max_bubble_size="50",
                flt_col_1="country_code",
                flt_op_1="not in",
                flt_eq_1="TCA,MNP,DMA,MHL,MCO,SXM,CYM,TUV,IMY,KNA,ASM,ADO,AMA,PLW",
                num_period_compare="10",)),
        Slice(
            slice_name="Rural Breakdown",
            viz_type='sunburst',
            datasource_type='table',
            table=tbl,
            params=get_slice_json(
                defaults,
                viz_type='sunburst',
                groupby=["region", "country_name"],
                secondary_metric="sum__SP_RUR_TOTL",
                since="2011-01-01",
                until="2011-01-01",)),
        Slice(
            slice_name="World's Pop Growth",
            viz_type='area',
            datasource_type='table',
            table=tbl,
            params=get_slice_json(
                defaults,
                since="1960-01-01",
                until="now",
                viz_type='area',
                groupby=["region"],)),
    ]
    for slc in slices:
        merge_slice(slc)

    print("Creating a World's Health Bank dashboard")
    dash_name = "World's Health Bank Dashboard"
    dash = db.session.query(Dash).filter_by(dashboard_title=dash_name).first()

    if not dash:
        dash = Dash()
    js = textwrap.dedent("""\
        [
            {
                "size_y": 2,
                "size_x": 3,
                "col": 1,
                "slice_id": "1",
                "row": 1
            },
            {
                "size_y": 3,
                "size_x": 3,
                "col": 1,
                "slice_id": "2",
                "row": 3
            },
            {
                "size_y": 8,
                "size_x": 3,
                "col": 10,
                "slice_id": "3",
                "row": 1
            },
            {
                "size_y": 3,
                "size_x": 6,
                "col": 1,
                "slice_id": "4",
                "row": 6
            },
            {
                "size_y": 5,
                "size_x": 6,
                "col": 4,
                "slice_id": "5",
                "row": 1
            },
            {
                "size_y": 4,
                "size_x": 6,
                "col": 7,
                "slice_id": "6",
                "row": 9
            },
            {
                "size_y": 3,
                "size_x": 3,
                "col": 7,
                "slice_id": "7",
                "row": 6
            },
            {
                "size_y": 4,
                "size_x": 6,
                "col": 1,
                "slice_id": "8",
                "row": 9
            }
        ]
    """)
    l = json.loads(js)
    for i, pos in enumerate(l):
        pos['slice_id'] = str(slices[i].id)

    dash.dashboard_title = dash_name
    dash.position_json = json.dumps(l, indent=4)
    dash.slug = "world_health"

    dash.slices = slices
    db.session.merge(dash)
    db.session.commit()