示例#1
0
def attrs(attr="bra", Attr_id=None, depth=None):

    Attr = globals()[attr.capitalize()]
    Attr_weight_mergeid = "{0}_id".format(attr)

    if attr == "bra":
        Attr_weight_tbl = Yb
        Attr_weight_col = "population"
    elif attr == "cnae":
        Attr_weight_tbl = Yi
        Attr_weight_col = "num_jobs"
    elif attr == "cbo":
        Attr_weight_tbl = Yo
        Attr_weight_col = "num_jobs"
    elif attr == "hs":
        Attr_weight_tbl = Ymp
        Attr_weight_col = "export_val"
    elif attr == "wld":
        Attr_weight_tbl = Ymw
        Attr_weight_col = "export_val"
    elif attr == "course_hedu":
        Attr_weight_tbl = Yc_hedu
        Attr_weight_col = "enrolled"
    elif attr == "university":
        Attr_weight_tbl = Yu
        Attr_weight_col = "enrolled"
    elif attr == "school":
        Attr_weight_tbl = Ys
        Attr_weight_col = "enrolled"
    elif attr == "course_sc":
        Attr_weight_tbl = Yc_sc
        Attr_weight_col = "enrolled"

    depths = {}
    depths["bra"] = [1, 3, 5, 7, 9]
    depths["cnae"] = [1, 3, 6]
    depths["cbo"] = [1, 4]
    depths["hs"] = [2, 6]
    depths["wld"] = [2, 5]
    depths["course_hedu"] = [2, 6]
    depths["university"] = [5]
    depths["course_sc"] = [2, 5]
    depths["school"] = [8]

    depth = request.args.get('depth', depth)
    order = request.args.get('order', None)
    offset = request.args.get('offset', None)
    limit = request.args.get('limit', None)
    if offset:
        offset = float(offset)
        limit = limit or 50
    elif limit:
        offset = float(0)

    lang = request.args.get('lang', None) or g.locale
    ret = {}
    dataset = "rais"
    if Attr == Wld or Attr == Hs:
        dataset = "secex"
    elif Attr == Course_hedu or Attr == University:
        dataset = "hedu"
    elif Attr == Course_sc or Attr == School:
        dataset = "sc"
    elif Attr == Bra:
        dataset = "population"

    cache_id = "attrs:" + request.path + lang
    if depth:
        cache_id = cache_id + "/" + depth
    # first lets test if this query is cached
    cached_q = cached_query(cache_id)
    if cached_q and limit is None:
        ret = make_response(cached_q)
        ret.headers['Content-Encoding'] = 'gzip'
        ret.headers['Content-Length'] = str(len(ret.data))
        return ret

    # if an ID is supplied only return that
    if Attr_id:

        # the '.show.' indicates that we are looking for a specific nesting
        if ".show." in Attr_id:
            this_attr, ret["nesting_level"] = Attr_id.split(".show.")
            # filter table by requested nesting level
            attrs = Attr.query \
                    .filter(Attr.id.startswith(this_attr)) \
                    .filter(func.char_length(Attr.id) == ret["nesting_level"]).all()

        # the 'show.' indicates that we are looking for a specific nesting
        elif "show." in Attr_id:
            ret["nesting_level"] = Attr_id.split(".")[1]
            # filter table by requested nesting level
            attrs = Attr.query.filter(
                func.char_length(Attr.id) == ret["nesting_level"]).all()

        # the '.' here means we want to see all attrs within a certain distance
        elif "." in Attr_id:
            this_attr, distance = Attr_id.split(".")
            this_attr = Attr.query.get_or_404(this_attr)
            attrs = this_attr.get_neighbors(distance)

        else:
            attrs = [Attr.query.get_or_404(Attr_id)]

        ret["data"] = [fix_name(a.serialize(), lang) for a in attrs]
    # an ID/filter was not provided
    else:
        latest_year = __year_range__[dataset][-1]
        latest_month = False
        if "-" in latest_year:
            latest_year, latest_month = latest_year.split("-")
            latest_month = int(latest_month)
        latest_year = int(latest_year)

        conds = [
            getattr(Attr_weight_tbl, "{0}_id".format(attr)) == Attr.id,
            Attr_weight_tbl.year == latest_year
        ]
        if latest_month:
            conds.append(Attr_weight_tbl.month == latest_month)

        query = db.session.query(Attr, Attr_weight_tbl).outerjoin(
            Attr_weight_tbl, and_(*conds))
        if Attr == School:
            query = query.filter(Attr.is_vocational == 1)

        if depth:
            query = query.filter(func.char_length(Attr.id) == depth)
        else:
            query = query.filter(func.char_length(Attr.id).in_(depths[attr]))

        if order:
            direction = "asc"

            if "." in order:
                o, direction = order.split(".")
            else:
                o = order

            if o == "name":
                o = "name_{0}".format(lang)

            if o == Attr_weight_col:
                order_table = Attr_weight_tbl
            else:
                order_table = Attr

            if direction == "asc":
                query = query.order_by(asc(getattr(order_table, o)))
            elif direction == "desc":
                query = query.order_by(desc(getattr(order_table, o)))

        if limit:
            query = query.limit(limit).offset(offset)

        attrs_all = query.all()

        # just get items available in DB
        attrs_w_data = None
        if depth is None and limit is None:
            attrs_w_data = db.session.query(Attr, Attr_weight_tbl) \
                .filter(getattr(Attr_weight_tbl, Attr_weight_mergeid) == Attr.id) \
                .group_by(Attr.id)
            attrs_w_data = [a[0].id for a in attrs_w_data]

        attrs = []

        # all_planning_regions = {}

        for i, a in enumerate(attrs_all):
            b = a[0].serialize()
            if a[1]:
                c = a[1].serialize()
                if Attr_weight_col in c:
                    b[Attr_weight_col] = c[Attr_weight_col]
                else:
                    b[Attr_weight_col] = 0
            else:
                b[Attr_weight_col] = 0
            a = b
            if attrs_w_data:
                a["available"] = False
                if a["id"] in attrs_w_data:
                    a["available"] = True
            # if Attr_weight_col == "population" and len(a["id"]) == 9 and a["id"][:3] == "4mg":
            #     if not all_planning_regions:
            #         all_planning_regions = get_planning_region_map()
            #     if a["id"] in all_planning_regions:
            #         plr = all_planning_regions[a["id"]]
            #         a["plr"] = plr
            if order:
                a["rank"] = int(i + offset + 1)
            if attr == "bra" and "id_ibge" not in a:
                a["id_ibge"] = False
            attrs.append(fix_name(a, lang))

        ret["data"] = attrs

    ret = jsonify(ret)
    ret.data = gzip_data(ret.data)

    if limit is None and cached_q is None:
        cached_query(cache_id, ret.data)

    ret.headers['Content-Encoding'] = 'gzip'
    ret.headers['Content-Length'] = str(len(ret.data))

    return ret
示例#2
0
def embed(app_name="tree_map", dataset="rais", bra_id="4mg",
          filter1="all", filter2="all", output="cbo"):
    prefix = "apps:embed:xhr:"
    lang = request.args.get('lang', None) or g.locale

    if (g.user is None or not g.user.is_authenticated()) and request.is_xhr:
        cache_id = prefix + request.path + lang
        cached_q = cached_query(cache_id)
        if cached_q:
            ret = make_response(cached_q)
            ret.headers['Content-Encoding'] = 'gzip'
            ret.headers['Content-Length'] = str(len(ret.data))
            return ret

    build_filter1, build_filter2 = filler(dataset, filter1, filter2)

    '''Grab attrs for bra and filters
    '''
    if bra_id == "all":
        bra_attr = Wld.query.get_or_404("sabra")
    else:
        bra_attr = [Bra.query.get_or_404(b) for b in bra_id.split("_")]
    filter1_attr = filter1
    filter2_attr = filter2
    if filter1 != "all":
        filter1_attr = globals()[build_filter1.capitalize()].query.get_or_404(filter1)
    if filter2 != "all":
        filter2_attr = globals()[build_filter2.capitalize()].query.get_or_404(filter2)

    if build_filter1 != "all":
        build_filter1 = "<{}>".format(build_filter1)
    if build_filter2 != "all":
        build_filter2 = "<{}>".format(build_filter2)

    '''This is an instance of the Build class for the selected app,
    determined by the combination of app_type, dataset, filters and output.
    '''
    current_app = App.query.filter_by(type=app_name).first_or_404()
    current_build = Build.query.filter_by(app=current_app, dataset=dataset, filter1=build_filter1, filter2=build_filter2, output=output).first_or_404()
    current_build.set_filter1(filter1_attr)
    current_build.set_filter2(filter2_attr)
    current_build.set_bra(bra_attr)

    '''Every possible build, required by the embed page for building the build
    dropdown.
    '''
    # all_builds = Build.query.all()
    # all_builds.sort(key=lambda x: x.dataset)
    # for build in all_builds:
    #     build.set_filter1(filter1_attr)
    #     build.set_filter2(filter2_attr)
    #     build.set_bra(bra_attr)

    '''Get URL query parameters from reqest.args object to return to the view.
    '''
    global_vars = {x[0]:x[1] for x in request.args.items()}
    if "controls" not in global_vars:
        global_vars["controls"] = "true"

    '''If user is logged in see if they have starred this app.'''
    starred = 0
    app_id = "/".join([app_name, dataset, bra_id, filter1, filter2, output])
    if g.user and g.user.is_authenticated():
        is_starred = Starred.query.filter_by(user=g.user, app_id=app_id).first()
        starred = 1 if is_starred else -1

    if request.is_xhr:
        ret = jsonify({
            "current_build": current_build.serialize(),
            # "all_builds": [b.json() for b in all_builds],
            "starred": starred
        })
        ret.data = gzip_data(ret.data)
        ret.headers['Content-Encoding'] = 'gzip'
        ret.headers['Content-Length'] = str(len(ret.data))
        if starred == 0 and cached_q is None:
            cached_query(cache_id, ret.data)
    else:

        year_range = json.dumps(__year_range__)

        ret = make_response(render_template("apps/embed.html",
            # apps = App.query.all(),
            # all_builds = all_builds,
            starred = starred,
            form = DownloadForm(),
            current_build = current_build,
            global_vars = json.dumps(global_vars),
            facebook_id = FACEBOOK_OAUTH_ID,
            year_range = year_range))
        ret.data = gzip_data(ret.data)
        ret.headers['Content-Encoding'] = 'gzip'
        ret.headers['Content-Length'] = str(len(ret.data))

    ret.headers.add('Last-Modified', datetime.now())
    ret.headers.add('Cache-Control', 'no-store, no-cache, must-revalidate, post-check=0, pre-check=0')
    ret.headers.add('Pragma', 'no-cache')

    return ret
示例#3
0
def make_query(data_table, url_args, lang, **kwargs):

    from dataviva import db
    from dataviva.attrs.models import Bra, Isic, Cbo, Hs, Wld

    ops = {
        ">": operator.gt,
        ">=": operator.ge,
        "<": operator.lt,
        "<=": operator.le
    }

    check_keys = ["bra_id", "isic_id", "cbo_id", "hs_id", "wld_id"]
    unique_keys = []

    download = url_args.get("download", None)
    raw = True if "raw" in kwargs else None
    order = url_args.get("order", None)
    offset = url_args.get("offset", None)
    limit = url_args.get("limit", None)
    cols = url_args.get("cols", None)
    if type(cols) == str or type(cols) == unicode:
        cols = cols.split(".")
    excluding = url_args.get("excluding", None)
    if offset:
        offset = float(offset)
        limit = limit or 50
    filter = url_args.get("filter", None)
    if filter:
        filter = re.split("(>=|>|<=|<)", filter)
    join = kwargs["join"] if "join" in kwargs else False
    show_id = None
    cache_id = request.path
    ret = {}
    # first lets test if this query is cached (be sure we are not paginating
    # results) as these should not get cached
    if limit is None and download is None and raw is None and cols is None:
        cached_q = cached_query(cache_id)
        if cached_q:
            return cached_q

    query = db.session.query(data_table)
    if join:
        for j in join:
            query = query.add_entity(j["table"])
            for col in j["on"]:
                query = query.filter(
                    getattr(data_table, col) == getattr(j["table"], col))

    query = query.group_by(data_table.year)

    # handle year (if specified)
    if "year" in kwargs:
        ret["year"] = parse_years(kwargs["year"])
        query = query \
            .filter(data_table.year.in_(ret["year"]))

    # parse all filters
    for key in check_keys:
        if key in kwargs:
            if key != "bra_id":
                unique_keys.append(key)
            if "show" in kwargs[key]:
                show_id = key
            parse_results = parse_filter(kwargs, key, query, data_table, ret)
            query = parse_results["query"]
            ret = parse_results["ret"]

    if filter:
        query = query.filter(ops[filter[1]](getattr(data_table, filter[0]),
                                            float(filter[2])))

    if excluding:
        for e in excluding:
            query = query.filter(
                not_(getattr(data_table, e).startswith(excluding[e])))

    # lastly we want to get the actual data held in the table requested
    if "aggregate" not in ret:
        # handle ordering
        if order:
            direction = "asc"

            if "." in order:
                o, direction = order.split(".")
            else:
                o = order

            order_table = None
            if join:
                for j in join:
                    if o in j["columns"]:
                        order_table = j["table"]

            if order_table == None:
                order_table = data_table
            if o in query:
                if direction == "asc":
                    query = query.order_by(asc(getattr(order_table, o)))
                elif direction == "desc":
                    query = query.order_by(desc(getattr(order_table, o)))

        if limit:
            query = query.limit(limit).offset(offset)

    # raise Exception(compile_query(query))
    if join:
        ret["data"] = []
        items = query.all()
        for row in items:
            datum = row[0].serialize()
            join_data = []
            for i, r in enumerate(row):
                if i != 0:
                    serialized = r.serialize()
                    for k in serialized:
                        if k in join[i - 1]["columns"]:
                            datum[k] = serialized[k]
            ret["data"].append(datum)
    elif raw:
        return query.all()
    else:
        ret["data"] = [d.serialize() for d in query.all()]

    if "aggregate" in ret:

        agg_data = []
        ret["data"] = sorted(ret["data"], key=lambda x: x["year"])

        if "bra" not in ret:
            ret["bra"] = {}

        for bra in ret["bra"]:

            if "aggregates" in bra:
                filtered_objs = []
                for key, group in groupby(ret["data"], lambda x: x["year"]):
                    year_data = []
                    for obj in group:
                        if obj["bra_id"] in bra["aggregates"]:
                            year_data.append(obj)

                    if len(unique_keys) > 0:

                        def check_filter(d, keys, i):
                            if i == len(keys):
                                merged_data = merge_objects(d)
                                merged_data["year"] = key
                                merged_data["bra_id"] = bra["id"]
                                agg_data.append(merged_data)
                            else:
                                d = sorted(d, key=lambda x: x[keys[i]])
                                for x, g in groupby(d, lambda x: x[keys[i]]):
                                    new_array = []
                                    for o in g:
                                        new_array.append(o)
                                    check_filter(new_array, keys, i + 1)

                        check_filter(year_data, unique_keys, 0)
                    else:
                        merged_data = merge_objects(year_data)
                        merged_data["year"] = key
                        merged_data["bra_id"] = bra["id"]
                        agg_data.append(merged_data)
            else:
                bra_data = [
                    obj for obj in ret["data"] if obj["bra_id"] == bra["id"]
                ]
                agg_data = agg_data + bra_data
        ret["data"] = agg_data

        # handle ordering
        if order:
            direction = "asc"
            if "." in order:
                o, direction = order.split(".")
            else:
                o = order
            if direction == "asc":
                ret["data"].sort(key=lambda x: x[o] if o in x else None)
            elif direction == "desc":
                ret["data"].sort(key=lambda x: x[o] if o in x else None,
                                 reverse=True)

        if limit:
            ret["data"] = ret["data"][int(offset):int(offset) + int(limit)]

    if cols:
        cols = ["year", "bra_id"] + unique_keys + cols
        new_return = []
        attrs = None
        if ("name" or "id_ibge" or "id_mdic" in cols) and show_id:
            attr_table = locals()[show_id.split("_")[0].title()]
            attrs = [x.serialize() for x in attr_table.query.all()]
            attrs = {x["id"]: x or None for x in attrs}
        for d in ret["data"]:
            new_obj = {}
            for k in d:
                if k in cols:
                    new_obj[k] = d[k]
            if attrs:
                if "name" in cols and "name_{0}".format(lang) in attrs[
                        d[show_id]]:
                    new_obj["name"] = attrs[d[show_id]]["name_{0}".format(
                        lang)]
                if "id_ibge" in cols and "id_ibge" in attrs[d[show_id]]:
                    new_obj["id_ibge"] = attrs[d[show_id]]["id_ibge"]
                if "id_mdic" in cols and "id_mdic" in attrs[d[show_id]]:
                    new_obj["id_mdic"] = attrs[d[show_id]]["id_mdic"]
            new_return.append(new_obj)
        ret["data"] = new_return

    if order:
        for i, d in enumerate(ret["data"]):
            r = i + 1
            if offset:
                r = r + offset
            d["rank"] = int(r)

    if download is not None:
        header = [str(c).split(".")[1] for c in data_table.__table__.columns]
        if cols:
            stickies = [c for c in header if c in unique_keys]
            header = stickies + cols

        def generate():
            for i, data_dict in enumerate(ret["data"]):
                row = [
                    str(data_dict[c]) if c in data_dict else '' for c in header
                ]
                if i == 0:
                    yield ';'.join(header) + '\n' + ';'.join(row) + '\n'
                yield ';'.join(row) + '\n'

        content_disposition = "attachment;filename=%s.csv" % (
            cache_id[1:-1].replace('/', "_"))

        if sys.getsizeof(ret["data"]) > 10485760:
            resp = Response(
                ['Unable to download, request is larger than 10mb'],
                mimetype="text/csv;charset=UTF-8",
                headers={"Content-Disposition": content_disposition})
        else:
            resp = Response(
                generate(),
                mimetype="text/csv;charset=UTF-8",
                headers={"Content-Disposition": content_disposition})
        return resp

    # gzip and jsonify result
    ret = gzip_data(jsonify(ret).data)

    if limit is None and download is None and raw is None and cols is None:
        cached_query(cache_id, ret)

    return ret
示例#4
0
def embed(app_name="tree_map",
          dataset="rais",
          bra_id="4mg",
          filter1="all",
          filter2="all",
          output="cbo"):
    prefix = "apps:embed:xhr:"
    lang = request.args.get('lang', None) or g.locale

    if (g.user is None or not g.user.is_authenticated()) and request.is_xhr:
        cache_id = prefix + request.path + lang
        cached_q = cached_query(cache_id)
        if cached_q:
            ret = make_response(cached_q)
            ret.headers['Content-Encoding'] = 'gzip'
            ret.headers['Content-Length'] = str(len(ret.data))
            return ret

    build_filter1, build_filter2 = filler(dataset, filter1, filter2)
    '''Grab attrs for bra and filters
    '''
    if bra_id == "all":
        bra_attr = Wld.query.get_or_404("sabra")
    else:
        bra_attr = [Bra.query.get_or_404(b) for b in bra_id.split("_")]
    filter1_attr = filter1
    filter2_attr = filter2
    if filter1 != "all":
        filter1_attr = globals()[build_filter1.capitalize()].query.get_or_404(
            filter1)
    if filter2 != "all":
        filter2_attr = globals()[build_filter2.capitalize()].query.get_or_404(
            filter2)

    if build_filter1 != "all":
        build_filter1 = "<{}>".format(build_filter1)
    if build_filter2 != "all":
        build_filter2 = "<{}>".format(build_filter2)
    '''This is an instance of the Build class for the selected app,
    determined by the combination of app_type, dataset, filters and output.
    '''
    current_app = App.query.filter_by(type=app_name).first_or_404()
    current_build = Build.query.filter_by(app=current_app,
                                          dataset=dataset,
                                          filter1=build_filter1,
                                          filter2=build_filter2,
                                          output=output).first_or_404()
    current_build.set_filter1(filter1_attr)
    current_build.set_filter2(filter2_attr)
    current_build.set_bra(bra_attr)
    '''Every possible build, required by the embed page for building the build
    dropdown.
    '''
    # all_builds = Build.query.all()
    # all_builds.sort(key=lambda x: x.dataset)
    # for build in all_builds:
    #     build.set_filter1(filter1_attr)
    #     build.set_filter2(filter2_attr)
    #     build.set_bra(bra_attr)
    '''Get URL query parameters from reqest.args object to return to the view.
    '''
    global_vars = {x[0]: x[1] for x in request.args.items()}
    if "controls" not in global_vars:
        global_vars["controls"] = "true"
    '''If user is logged in see if they have starred this app.'''
    starred = 0
    app_id = "/".join([app_name, dataset, bra_id, filter1, filter2, output])
    if g.user and g.user.is_authenticated():
        is_starred = Starred.query.filter_by(user=g.user,
                                             app_id=app_id).first()
        starred = 1 if is_starred else -1

    if request.is_xhr:
        ret = jsonify({
            "current_build": current_build.serialize(),
            # "all_builds": [b.json() for b in all_builds],
            "starred": starred
        })
        ret.data = gzip_data(ret.data)
        ret.headers['Content-Encoding'] = 'gzip'
        ret.headers['Content-Length'] = str(len(ret.data))
        if starred == 0 and cached_q is None:
            cached_query(cache_id, ret.data)
    else:

        year_range = json.dumps(__year_range__)

        ret = make_response(
            render_template(
                "apps/embed.html",
                # apps = App.query.all(),
                # all_builds = all_builds,
                starred=starred,
                form=DownloadForm(),
                current_build=current_build,
                global_vars=json.dumps(global_vars),
                facebook_id=FACEBOOK_OAUTH_ID,
                year_range=year_range))
        ret.data = gzip_data(ret.data)
        ret.headers['Content-Encoding'] = 'gzip'
        ret.headers['Content-Length'] = str(len(ret.data))

    ret.headers.add('Last-Modified', datetime.now())
    ret.headers.add(
        'Cache-Control',
        'no-store, no-cache, must-revalidate, post-check=0, pre-check=0')
    ret.headers.add('Pragma', 'no-cache')

    return ret
示例#5
0
def make_query(data_table, url_args, lang, **kwargs):

    from dataviva import db
    from dataviva.attrs.models import Bra, Cnae, Cbo, Hs, Wld, University, Course_hedu, Course_sc

    ops = {">": operator.gt,
           ">=": operator.ge,
           "<": operator.lt,
           "<=": operator.le}

    check_keys = ["bra_id", "cnae_id", "cbo_id", "hs_id", "wld_id", "university_id", "course_hedu_id", "course_sc_id"]
    unique_keys = []

    download = url_args.get("download", None)
    raw = True if "raw" in kwargs else None
    order = url_args.get("order", None)
    offset = url_args.get("offset", None)
    limit = url_args.get("limit", None)
    cols = url_args.get("cols", None)
    if type(cols) == str or type(cols) == unicode:
        cols = cols.split(".")
    excluding = url_args.get("excluding", None)
    if offset:
        offset = float(offset)
        limit = limit or 50
    filter = url_args.get("filter", None)
    if filter:
        filter = re.split("(>=|>|<=|<)", filter)
    join = kwargs["join"] if "join" in kwargs else False
    show_id = None
    cache_id = request.path
    ret = {}
    # first lets test if this query is cached (be sure we are not paginating
    # results) as these should not get cached
    if limit is None and download is None and raw is None and cols is None:
        cached_q = cached_query(cache_id)
        if cached_q:
            return cached_q

    query = db.session.query(data_table)
    if join:
        for j in join:
            query = query.add_entity(j["table"])
            for col in j["on"]:
                query = query.filter(getattr(data_table, col) == getattr(j["table"], col))

    query = query.group_by(data_table.year)

    # handle year (if specified)
    if "year" in kwargs:
        ret["year"] = parse_years(kwargs["year"])
        query = query \
            .filter(data_table.year.in_(ret["year"]))

    # parse all filters
    for key in check_keys:
        if key in kwargs:
            if key != "bra_id":
                unique_keys.append(key)
            if "show" in kwargs[key]:
                show_id = key
            parse_results = parse_filter(kwargs,key,query,data_table,ret)
            query = parse_results["query"]
            ret = parse_results["ret"]

    if filter:
        query = query.filter(ops[filter[1]](getattr(data_table, filter[0]), float(filter[2])))

    if excluding:
        for e in excluding:
            query = query.filter(not_(getattr(data_table, e).startswith(excluding[e])))

    # lastly we want to get the actual data held in the table requested
    if "aggregate" not in ret:
        # handle ordering
        if order:
            direction = "asc"

            if "." in order:
                o, direction = order.split(".")
            else:
                o = order

            order_table = None
            if join:
                for j in join:
                    if o in j["columns"]:
                        order_table = j["table"]

            if order_table == None:
                order_table = data_table

            all_col_names = data_table.__table__.columns.keys() + order_table.__table__.columns.keys()
            if o in all_col_names:
                if direction == "asc":
                    query = query.order_by(asc(getattr(order_table,o)))
                elif direction == "desc":
                    query = query.order_by(desc(getattr(order_table,o)))

        if limit:
            query = query.limit(limit).offset(offset)

    # raise Exception(compile_query(query))
    if join:
        ret["data"] = []
        items = query.all()
        for row in items:
            datum = row[0].serialize()
            join_data = []
            for i, r in enumerate(row):
                if i != 0:
                    serialized = r.serialize()
                    for k in serialized:
                        if k in join[i-1]["columns"]:
                            datum[k] = serialized[k]
            ret["data"].append(datum)
    elif raw:
        return query.all()
    else:
        ret["data"] = [d.serialize() for d in query.all()]

    if "aggregate" in ret:

        agg_data = []
        ret["data"] = sorted(ret["data"],key=lambda x: x["year"])

        if "bra" not in ret:
            ret["bra"] = {}

        for bra in ret["bra"]:

            if "aggregates" in bra:
                filtered_objs = []
                for key, group in groupby(ret["data"],lambda x: x["year"]):
                    year_data = []
                    for obj in group:
                        if obj["bra_id"] in bra["aggregates"]:
                            year_data.append(obj)

                    if len(unique_keys) > 0:

                        def check_filter(d,keys,i):
                            if i == len(keys):
                                merged_data = merge_objects(d)
                                merged_data["year"] = key
                                merged_data["bra_id"] = bra["id"]
                                agg_data.append(merged_data)
                            else:
                                d = sorted(d,key=lambda x: x[keys[i]])
                                for x, g in groupby(d,lambda x: x[keys[i]]):
                                    new_array = []
                                    for o in g:
                                        new_array.append(o)
                                    check_filter(new_array,keys,i+1)

                        check_filter(year_data,unique_keys,0)
                    else:
                        merged_data = merge_objects(year_data)
                        merged_data["year"] = key
                        merged_data["bra_id"] = bra["id"]
                        agg_data.append(merged_data)
            else:
                bra_data = [obj for obj in ret["data"] if obj["bra_id"] == bra["id"]]
                agg_data = agg_data + bra_data
        ret["data"] = agg_data

        # handle ordering
        if order:
            direction = "asc"
            if "." in order:
                o, direction = order.split(".")
            else:
                o = order
            if direction == "asc":
                ret["data"].sort(key=lambda x: x[o] if o in x else None)
            elif direction == "desc":
                ret["data"].sort(key=lambda x: x[o] if o in x else None, reverse=True)

        if limit:
            ret["data"] = ret["data"][int(offset):int(offset)+int(limit)]

    if cols:
        cols = ["year","bra_id"]+unique_keys+cols
        new_return = []
        attrs = None
        if ("name" or "id_ibge" or "id_mdic" in cols) and show_id:
            attr_table = locals()[show_id[:-3].capitalize()]
            attrs = [x.serialize() for x in attr_table.query.all()]
            attrs = {x["id"]:x or None for x in attrs}
        for d in ret["data"]:
            new_obj = {}
            for k in d:
                if k in cols:
                    new_obj[k] = d[k]
            if attrs:
                if "name" in cols and "name_{0}".format(lang) in attrs[d[show_id]]:
                    new_obj["name"] = attrs[d[show_id]]["name_{0}".format(lang)]
                if "id_ibge" in cols and "id_ibge" in attrs[d[show_id]]:
                    new_obj["id_ibge"] = attrs[d[show_id]]["id_ibge"]
                if "id_mdic" in cols and "id_mdic" in attrs[d[show_id]]:
                    new_obj["id_mdic"] = attrs[d[show_id]]["id_mdic"]
            new_return.append(new_obj)
        ret["data"] = new_return

    if order:
        for i, d in enumerate(ret["data"]):
            r = i+1
            if offset:
                r = r+offset
            d["rank"] = int(r)

    if download is not None:
        header = [str(c).split(".")[1] for c in data_table.__table__.columns]
        if cols:
            stickies = [c for c in header if c in unique_keys]
            header = stickies+cols

        def generate():
            for i, data_dict in enumerate(ret["data"]):
                row = [str(data_dict[c]) if c in data_dict else '' for c in header]
                if i == 0:
                    yield ';'.join(header) + '\n' + ';'.join(row) + '\n'
                yield ';'.join(row) + '\n'

        content_disposition = "attachment;filename=%s.csv" % (cache_id[1:-1].replace('/', "_"))

        if sys.getsizeof(ret["data"]) > 10485760:
            resp = Response(['Unable to download, request is larger than 10mb'],
                            mimetype="text/csv;charset=UTF-8",
                            headers={"Content-Disposition": content_disposition})
        else:
            resp = Response(generate(), mimetype="text/csv;charset=UTF-8",
                            headers={"Content-Disposition": content_disposition})
        return resp

    # gzip and jsonify result
    ret = gzip_data(jsonify(ret).data)

    if limit is None and download is None and raw is None and cols is None:
        cached_query(cache_id, ret)

    return ret
示例#6
0
def attrs(attr="bra",Attr_id=None, depth=None):

    Attr = globals()[attr.capitalize()]
    Attr_weight_mergeid = "{0}_id".format(attr)

    if attr == "bra":
        Attr_weight_tbl = Yb
        Attr_weight_col = "population"
    elif attr == "cnae":
        Attr_weight_tbl = Yi
        Attr_weight_col = "num_jobs"
    elif attr == "cbo":
        Attr_weight_tbl = Yo
        Attr_weight_col = "num_jobs"
    elif attr == "hs":
        Attr_weight_tbl = Ymp
        Attr_weight_col = "export_val"
    elif attr == "wld":
        Attr_weight_tbl = Ymw
        Attr_weight_col = "export_val"
    elif attr == "course_hedu":
        Attr_weight_tbl = Yc_hedu
        Attr_weight_col = "enrolled"
    elif attr == "university":
        Attr_weight_tbl = Yu
        Attr_weight_col = "enrolled"
    elif attr == "school":
        Attr_weight_tbl = Ys
        Attr_weight_col = "enrolled"
    elif attr == "course_sc":
        Attr_weight_tbl = Yc_sc
        Attr_weight_col = "enrolled"

    depths = {}
    depths["bra"] = [1, 3, 5, 7, 9]
    depths["cnae"] = [1, 3, 6]
    depths["cbo"] = [1, 4]
    depths["hs"] = [2, 6]
    depths["wld"] = [2, 5]
    depths["course_hedu"] = [2, 6]
    depths["university"] = [5]
    depths["course_sc"] = [2, 5]
    depths["school"] = [8]

    depth = request.args.get('depth', depth)
    order = request.args.get('order', None)
    offset = request.args.get('offset', None)
    limit = request.args.get('limit', None)

    if offset:
        offset = float(offset)
        limit = limit or 50
    elif limit:
        offset = float(0)

    lang = request.args.get('lang', None) or g.locale
    ret = {}
    dataset = "rais"
    if Attr == Wld or Attr == Hs:
        dataset = "secex"
    elif Attr == Course_hedu or Attr == University:
        dataset = "hedu"
    elif Attr == Course_sc or Attr == School:
        dataset = "sc"
    elif Attr == Bra:
        dataset = "population"

    cache_id = "attrs:" + request.path + lang
    if depth:
        cache_id = cache_id + "/" + depth
    # first lets test if this query is cached
    cached_q = cached_query(cache_id)
    if cached_q and limit is None:
        ret = make_response(cached_q)
        ret.headers['Content-Encoding'] = 'gzip'
        ret.headers['Content-Length'] = str(len(ret.data))
        return ret

    # if an ID is supplied only return that
    if Attr_id:

        # the '.show.' indicates that we are looking for a specific nesting
        if ".show." in Attr_id:
            this_attr, ret["nesting_level"] = Attr_id.split(".show.")
            # filter table by requested nesting level
            attrs = Attr.query \
                    .filter(Attr.id.startswith(this_attr)) \
                    .filter(func.char_length(Attr.id) == ret["nesting_level"]).all()

        # the 'show.' indicates that we are looking for a specific nesting
        elif "show." in Attr_id:
            ret["nesting_level"] = Attr_id.split(".")[1]
            # filter table by requested nesting level
            attrs = Attr.query.filter(func.char_length(Attr.id) == ret["nesting_level"]).all()

        # the '.' here means we want to see all attrs within a certain distance
        elif "." in Attr_id:
            this_attr, distance = Attr_id.split(".")
            this_attr = Attr.query.get_or_404(this_attr)
            attrs = this_attr.get_neighbors(distance)

        else:
            attrs = [Attr.query.get_or_404(Attr_id)]

        ret["data"] = [fix_name(a.serialize(), lang) for a in attrs]
    # an ID/filter was not provided
    else:
        latest_year = __year_range__[dataset][-1]
        latest_month = False
        if "-" in latest_year:
            latest_year, latest_month = latest_year.split("-")
            latest_month = int(latest_month)
        latest_year = int(latest_year)

        conds = [getattr(Attr_weight_tbl,"{0}_id".format(attr)) == Attr.id, Attr_weight_tbl.year == latest_year]
        if latest_month:
            conds.append(Attr_weight_tbl.month == latest_month)

        query = db.session.query(Attr,Attr_weight_tbl).outerjoin(Attr_weight_tbl, and_(*conds))
        if Attr == School:
            query = query.filter(Attr.is_vocational == 1)


        if depth:
            query = query.filter(func.char_length(Attr.id) == depth)
        else:
            query = query.filter(func.char_length(Attr.id).in_(depths[attr]))

        if order:
            direction = "asc"

            if "." in order:
                o, direction = order.split(".")
            else:
                o = order

            if o == "name":
                o = "name_{0}".format(lang)

            if o == Attr_weight_col:
                order_table  = Attr_weight_tbl
            else:
                order_table = Attr

            if direction == "asc":
                query = query.order_by(asc(getattr(order_table,o)))
            elif direction == "desc":
                query = query.order_by(desc(getattr(order_table,o)))

        if limit:
            query = query.limit(limit).offset(offset)

        attrs_all = query.all()

        # just get items available in DB
        attrs_w_data = None
        if depth is None and limit is None:
            attrs_w_data = db.session.query(Attr, Attr_weight_tbl) \
                .filter(getattr(Attr_weight_tbl, Attr_weight_mergeid) == Attr.id) \
                .group_by(Attr.id)
            attrs_w_data = [a[0].id for a in attrs_w_data]

        attrs = []

        # all_planning_regions = {}

        for i, a in enumerate(attrs_all):
            b = a[0].serialize()
            if a[1]:
                c = a[1].serialize()
                if Attr_weight_col in c:
                    b[Attr_weight_col] = c[Attr_weight_col]
                else:
                    b[Attr_weight_col] = 0
            else:
                b[Attr_weight_col] = 0
            a = b
            if attrs_w_data:
                a["available"] = False
                if a["id"] in attrs_w_data:
                    a["available"] = True
            # if Attr_weight_col == "population" and len(a["id"]) == 9 and a["id"][:3] == "4mg":
            #     if not all_planning_regions:
            #         all_planning_regions = get_planning_region_map()
            #     if a["id"] in all_planning_regions:
            #         plr = all_planning_regions[a["id"]]
            #         a["plr"] = plr
            if order:
                a["rank"] = int(i+offset+1)
            if attr == "bra" and "id_ibge" not in a:
                a["id_ibge"] = False
            attrs.append(fix_name(a, lang))

        ret["data"] = attrs

    ret = jsonify(ret)
    ret.data = gzip_data(ret.data)

    if limit is None and cached_q is None:
        cached_query(cache_id, ret.data)

    ret.headers['Content-Encoding'] = 'gzip'
    ret.headers['Content-Length'] = str(len(ret.data))

    return ret
示例#7
0
def embed(app_name=None,
          dataset=None,
          bra_id=None,
          filter1=None,
          filter2=None,
          output=None):

    lang = request.args.get('lang', None) or g.locale

    if (g.user is None or not g.user.is_authenticated()) and request.is_xhr:
        cache_id = request.path + lang
        cached_q = cached_query(cache_id)
        if cached_q:
            ret = make_response(cached_q)
            ret.headers['Content-Encoding'] = 'gzip'
            ret.headers['Content-Length'] = str(len(ret.data))
            return ret
    '''Since the "builds" are held in the database with placeholders for
    attributes i.e. <cbo>, <hs>, <isic> we need to convert the IDs given
    in the URL to these placeholders. i.e.
         - a0111    = <isic>
         - 010101   = <hs>
         - all      = all
    '''
    build_filter1 = filter1
    if dataset == "rais" and build_filter1 != "all":
        build_filter1 = "<isic>"
    if dataset == "secex" and build_filter1 != "all":
        build_filter1 = "<hs>"

    build_filter2 = filter2
    if dataset == "rais" and build_filter2 != "all":
        build_filter2 = "<cbo>"
    if dataset == "secex" and build_filter2 != "all":
        build_filter2 = "<wld>"
    '''This is an instance of the Build class for the selected app,
    determined by the combination of app_type, dataset, filters and output.
    '''
    current_app = App.query.filter_by(type=app_name).first_or_404()
    current_build = Build.query.filter_by(app=current_app,
                                          dataset=dataset,
                                          filter1=build_filter1,
                                          filter2=build_filter2,
                                          output=output).first_or_404()
    current_build.set_filter1(filter1)
    current_build.set_filter2(filter2)
    current_build.set_bra(bra_id)
    '''Get the recommended app list to pass with data'''
    filler_bra = bra_id
    filler1 = filter1
    filler2 = filter2
    if output == "isic" or output == "hs":
        filler1 = "filler"
    elif output == "cbo" or output == "wld":
        filler2 = "filler"
    elif output == "bra":
        filler_bra = "filler"

    recs = recommend(app_name=app_name, dataset=dataset, bra_id=filler_bra, \
                        filter1=filler1, filter2=filler2, output=output)
    '''Every possible build, required by the embed page for building the build
    dropdown.
    '''
    all_builds = Build.query.all()
    for build in all_builds:
        build.set_filter1(filter1)
        build.set_filter2(filter2)
        build.set_bra(bra_id)
    '''Get URL query parameters from reqest.args object to return to the view.
    '''
    global_vars = {x[0]: x[1] for x in request.args.items()}
    if "controls" not in global_vars:
        global_vars["controls"] = "true"
    '''If user is logged in see if they have starred this app.'''
    starred = 0
    app_id = "/".join([app_name, dataset, bra_id, filter1, filter2, output])
    if g.user and g.user.is_authenticated():
        is_starred = Starred.query.filter_by(user=g.user,
                                             app_id=app_id).first()
        starred = 1 if is_starred else -1
    '''Get the actual data for the current build'''
    # view_data = rais_ybi(bra_id='sp', isic_id='a0112').data
    # app.url_map.bind('/').match('/attrs/wld/nausa/')

    if request.is_xhr:
        ret = jsonify({
            "current_build": current_build.serialize(),
            "all_builds": [b.serialize() for b in all_builds],
            "recommendations": json.loads(recs.data),
            "starred": starred
        })
        ret.data = gzip_data(ret.data)
        ret.headers['Content-Encoding'] = 'gzip'
        ret.headers['Content-Length'] = str(len(ret.data))
        if starred == 0 and cached_q is None:
            cached_query(cache_id, ret.data)
    else:
        ret = make_response(
            render_template("apps/embed.html",
                            all_builds=all_builds,
                            starred=starred,
                            form=DownloadForm(),
                            current_build=current_build,
                            global_vars=json.dumps(global_vars),
                            facebook_id=FACEBOOK_OAUTH_ID))

    ret.headers.add('Last-Modified', datetime.now())
    ret.headers.add(
        'Cache-Control',
        'no-store, no-cache, must-revalidate, post-check=0, pre-check=0')
    ret.headers.add('Pragma', 'no-cache')

    return ret
示例#8
0
def attrs(attr="bra",Attr_id=None):
    
    Attr = globals()[attr.title()]
    Attr_weight_mergeid = "{0}_id".format(attr)
    
    if attr == "bra":
        Attr_weight_tbl = Yb
        Attr_weight_col = "population"
    elif attr == "isic":
        Attr_weight_tbl = Yi
        Attr_weight_col = "num_emp"
    elif attr == "cbo":
        Attr_weight_tbl = Yo
        Attr_weight_col = "num_emp"
    elif attr == "hs":
        Attr_weight_tbl = Yp
        Attr_weight_col = "val_usd"
    elif attr == "wld":
        Attr_weight_tbl = Yw
        Attr_weight_col = "val_usd"
        
    depths = {}
    depths["bra"] = [2,4,7,8]
    depths["isic"] = [1,3,5]
    depths["cbo"] = [1,2,4]
    depths["hs"] = [2,4,6]
    depths["wld"] = [2,5]
    
    depth = request.args.get('depth', None)
    order = request.args.get('order', None)
    offset = request.args.get('offset', None)
    limit = request.args.get('limit', None)
    if offset:
        offset = float(offset)
        limit = limit or 50
        
    lang = request.args.get('lang', None) or g.locale
    ret = {}
    dataset = "rais"
    if Attr == Cbo or Attr == Hs:
        dataset = "secex"
    latest_year = __latest_year__[dataset]
        
    cache_id = request.path + lang
    if depth:
        cache_id = cache_id + "/" + depth
    # first lets test if this query is cached
    cached_q = cached_query(cache_id)
    if cached_q and limit is None:
        ret = make_response(cached_q)
        ret.headers['Content-Encoding'] = 'gzip'
        ret.headers['Content-Length'] = str(len(ret.data))
        return ret
    
    # if an ID is supplied only return that
    if Attr_id:
        
        # the '.show.' indicates that we are looking for a specific nesting
        if ".show." in Attr_id:
            this_attr, ret["nesting_level"] = Attr_id.split(".show.")
            # filter table by requested nesting level
            attrs = Attr.query \
                    .filter(Attr.id.startswith(this_attr)) \
                    .filter(func.char_length(Attr.id) == ret["nesting_level"]).all()

        # the 'show.' indicates that we are looking for a specific nesting
        elif "show." in Attr_id:
            ret["nesting_level"] = Attr_id.split(".")[1]
            # filter table by requested nesting level
            attrs = Attr.query.filter(func.char_length(Attr.id) == ret["nesting_level"]).all()
        
        # the '.' here means we want to see all attrs within a certain distance
        elif "." in Attr_id:
            this_attr, distance = Attr_id.split(".")
            this_attr = Attr.query.get_or_404(this_attr)
            attrs = this_attr.get_neighbors(distance)
        
        else:
            attrs = [Attr.query.get_or_404(Attr_id)]
        
        ret["data"] = [fix_name(a.serialize(), lang) for a in attrs]
    # an ID/filter was not provided
    else:
        query = db.session.query(Attr,Attr_weight_tbl) \
            .outerjoin(Attr_weight_tbl, and_(getattr(Attr_weight_tbl,"{0}_id".format(attr)) == Attr.id, Attr_weight_tbl.year == latest_year))
        if depth:
            query = query.filter(func.char_length(Attr.id) == depth)
        else:
            query = query.filter(func.char_length(Attr.id).in_(depths[attr]))
            
        if order:
            direction = "asc"
        
            if "." in order:
                o, direction = order.split(".")
            else:
                o = order
                
            if o == "name":
                o = "name_{0}".format(lang)
                
            if o == Attr_weight_col:
                order_table  = Attr_weight_tbl
            else:
                order_table = Attr
                
            if direction == "asc":
                query = query.order_by(asc(getattr(order_table,o)))
            elif direction == "desc":
                query = query.order_by(desc(getattr(order_table,o)))
                
        if limit:
            query = query.limit(limit).offset(offset)
        
        attrs_all = query.all()
        
        # just get items available in DB
        attrs_w_data = None
        if depth is None and limit is None:
            attrs_w_data = db.session.query(Attr, Attr_weight_tbl) \
                .filter(getattr(Attr_weight_tbl, Attr_weight_mergeid) == Attr.id) \
                .group_by(Attr.id)
            # raise Exception(attrs_w_data.all())
            attrs_w_data = [a[0].id for a in attrs_w_data]
                        
        attrs = []
        for i, a in enumerate(attrs_all):
            b = a[0].serialize()
            if a[1]:
                b[Attr_weight_col] = a[1].serialize()[Attr_weight_col]
            else:
                b[Attr_weight_col] = 0
            a = b
            if attrs_w_data:
                a["available"] = False
                if a["id"] in attrs_w_data:
                    a["available"] = True
            if Attr_weight_col == "population" and len(a["id"]) == 8 and a["id"][:2] == "mg":
                plr = Bra.query.get_or_404(a["id"]).pr2.first()
                if plr: a["plr"] = plr.id
            if order:
                a["rank"] = int(i+offset+1)
            attrs.append(fix_name(a, lang))
        
        ret["data"] = attrs
    
    ret = jsonify(ret)
    ret.data = gzip_data(ret.data)
    
    if limit is None and cached_q is None:
        cached_query(cache_id, ret.data)
            
    ret.headers['Content-Encoding'] = 'gzip'
    ret.headers['Content-Length'] = str(len(ret.data))
        
    return ret