Exemplo n.º 1
0
def app(request):
    request.max_content_length = MAX_CONTENT_LENGTH

    auth = request.authorization
    if not auth:
        return login_required()

    match = location_re.match(request.path)

    if not match:
        return NotFound()

    groups = match.groups()
    if not groups:
        return NotFound()

    db, schema, func_name, path = groups
    path = '' if path is None else path
    username = auth.username
    password = auth.password

    try:
        environ = Json({k: v for k, v in request.environ.items() if k.isupper()})
        path = Json(filter(None, path.split('/')))
        args = Json(dict(request.args))
        data = Json(loads(request.get_data() or 'null'))
        db = connect(database=db, user=username, password=password)
    except Exception:
        logger.exception(request.path)
        return InternalServerError()
    with db.cursor() as cur:
        try:
            cur.callproc(
                'http.process_request',
                (request.method,
                 schema, func_name,
                 environ, path, args,
                 data))
            result = cur.fetchone()[0]
            response = Response(dumps(result), mimetype='application/json')
            db.commit()
            return response
        except psycopg2.Error as e:
            db.rollback()
            logger.exception(request.path)
            if e.diag.message_primary.isdigit():
                code = int(e.diag.message_primary)
                return default_exceptions.get(code, InternalServerError)()
            else:
                return InternalServerError(e.diag.message_primary)
        except Exception:
            db.rollback()
            logger.exception(request.path)
            return InternalServerError
Exemplo n.º 2
0
def add_user_privileges(db, userPrivileges, userId):
    db.call_procedure("AddUserPrivileges", [Json(userPrivileges), userId])
    return {"privileges": userPrivileges, "user_id": userId}
Exemplo n.º 3
0
def create_multiprod_range_entry(dc, product, crses):
    conn = get_sqlconn(dc)
    txn = conn.begin()
    prodids = [ p.id for p in product.products ]
    wms_name = product.name

    # Attempt to insert row
    conn.execute("""
        INSERT INTO wms.multiproduct_ranges
        (wms_product_name,lat_min,lat_max,lon_min,lon_max,dates,bboxes)
        VALUES
        (%(p_id)s, 0, 0, 0, 0, %(empty)s, %(empty)s)
        ON CONFLICT (wms_product_name) DO NOTHING
        """,
                 {"p_id": wms_name, "empty": Json("")})

    # Update extents
    conn.execute("""
        UPDATE wms.multiproduct_ranges
        SET (lat_min,lat_max,lon_min,lon_max) =
        (wms_get_min(%(p_prodids)s, 'lat'), wms_get_max(%(p_prodids)s, 'lat'), wms_get_min(%(p_prodids)s, 'lon'), wms_get_max(%(p_prodids)s, 'lon'))
        WHERE wms_product_name=%(p_id)s
        """,
                 {"p_id": wms_name, "p_prodids": prodids})

    # Create sorted list of dates
    conn.execute("""
        WITH sorted
        AS (SELECT to_jsonb(array_agg(dates.d))
            AS dates
            FROM (SELECT DISTINCT to_date(metadata::json->'extent'->>'center_dt', 'YYYY-MM-DD')
                  AS d
                  FROM agdc.dataset
                  WHERE dataset_type_ref = any (%(p_prodids)s)
                  AND archived IS NULL
                  ORDER BY d) dates)
        UPDATE wms.multiproduct_ranges
        SET dates=sorted.dates
        FROM sorted
        WHERE wms_product_name=%(p_id)s
        """,
                 {"p_id": wms_name, "p_prodids": prodids})

    # calculate bounding boxes
    results = list(conn.execute("""
        SELECT lat_min,lat_max,lon_min,lon_max
        FROM wms.multiproduct_ranges
        WHERE wms_product_name=%(p_id)s
        """,
        {"p_id": wms_name} ))

    r = results[0]

    epsg4326 = datacube.utils.geometry.CRS("EPSG:4326")
    box = datacube.utils.geometry.box(
        float(r[2]),
        float(r[0]),
        float(r[3]),
        float(r[1]),
        epsg4326)

    svc = get_service_cfg()
    conn.execute("""
        UPDATE wms.multiproduct_ranges
        SET bboxes = %s::jsonb
        WHERE wms_product_name=%s
        """,
                 { crsid: Json(jsonise_bbox(box.to_crs(crs).boundingbox)) for crsid, crs in get_crses(svc).items() },
                 wms_name
    )

    txn.commit()
    conn.close()
    return
Exemplo n.º 4
0
def _to_tuple(obj):
    """Serialize dataclass into SQL insertable tuple"""
    return tuple(
        Json(v) if isinstance(v, dict) else v for _, v in obj.__dict__.items())
Exemplo n.º 5
0
 def contains(self, other):
     clone = self.as_json(True)
     if isinstance(other, (list, dict)):
         return Expression(clone, JSONB_CONTAINS, Json(other))
     return Expression(clone, JSONB_EXISTS, other)
Exemplo n.º 6
0
 def contained_by(self, other):
     return Expression(cast_jsonb(self), JSONB_CONTAINED_BY, Json(other))
Exemplo n.º 7
0
 def concat(self, value):
     return super(JSONField, self).concat(Json(value))
Exemplo n.º 8
0
 def concat(self, rhs):
     if not isinstance(rhs, Node):
         rhs = Json(rhs)
     return Expression(self.as_json(True), OP.CONCAT, rhs)
Exemplo n.º 9
0
retweet_filter = '-filter:retweets'
reply_filter = '-filter:replies'
atuser = '******'
tweetsPerQry = 100
searchQuery = atuser + ' AND ' + retweet_filter + ' AND ' + reply_filter
test_tweets = api.search(q=searchQuery,
                         count=tweetsPerQry,
                         tweet_mode='extended')
tweetframe = pd.DataFrame(columns=['tweet_id', 'tweet_json'],
                          index=range(len(test_tweets)))
i = 0
if i < 10:
    for tweet in test_tweets:
        #print(tweet.id)
        tweetframe.loc[i]['tweet_id'] = tweet.id
        tweetframe.loc[i]['tweet_json'] = Json(tweet._json)
        i += 1

tablename = 'test_tweets'
cur = con.cursor()

tweetframe.to_sql(tablename, engine, if_exists='replace')
#if not engine.dialect.has_table(engine,tablename):
#    command = """
#        CREATE TABLE test_tweets(
#        tweet_id int,
#        tweet_json varchar(10000)
#        PRIMARY KEY (tweet_id)
#        )
#        """
#    cur.execute(command)
Exemplo n.º 10
0
 def convert_to_column(self, value, record, values=None, validate=True):
     val = self.convert_to_cache(value, record, validate=validate)
     return Json(val) if val else None
Exemplo n.º 11
0
 def _wrap_json(self, arguments: Dict[str, Any]):
     return {
         key: Json(value, dumps=self.json_dumps)
         if isinstance(value, dict) else value
         for key, value in arguments.items()
     }
Exemplo n.º 12
0
def insert(items=None,
           commerce_listings=None,
           commerce_prices=None,
           scratch=False):

    if (items and commerce_listings and commerce_prices) is None:

        return

    if items is None:

        items = []

    if commerce_listings is None:

        commerce_listings = []

    if commerce_prices is None:

        commerce_prices = []

    con = None

    try:

        con = psycopg2.connect("dbname='market_manipulator' user='******'")
        cur = con.cursor()

        if scratch:

            for item in items:

                cur.execute(
                    "INSERT INTO scratch_items "
                    "(id, name, icon, type, rarity, level, vendor_value, flags, game_types, restrictions) "
                    "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", [
                        item[u'id'], item[u'name'], item[u'icon'],
                        item[u'type'], item[u'rarity'], item[u'level'],
                        item[u'vendor_value'], item[u'flags'],
                        item[u'game_types'], item[u'restrictions']
                    ])

            for commerce_listing in commerce_listings:

                for buy in xrange(len(commerce_listing[u'buys'])):

                    buy_listings = commerce_listing[u'buys'][buy][u'listings']
                    buy_unit_price = commerce_listing[u'buys'][buy][
                        u'unit_price']
                    buy_quantity = commerce_listing[u'buys'][buy][u'quantity']

                    cur.execute(
                        "INSERT INTO scratch_commerce_listings_buy" +
                        "(id, buy_listings, buy_unit_price, buy_quantity) "
                        "VALUES (%s, %s, %s, %s)", [
                            commerce_listing[u'id'], buy_listings,
                            buy_unit_price, buy_quantity
                        ])

                for sell in xrange(len(commerce_listing[u'sells'])):

                    sell_listings = commerce_listing[u'sells'][sell][
                        u'listings']
                    sell_unit_price = commerce_listing[u'sells'][sell][
                        u'unit_price']
                    sell_quantity = commerce_listing[u'sells'][sell][
                        u'quantity']

                    cur.execute(
                        "INSERT INTO scratch_commerce_listings_sell" +
                        "(id, sell_listings, sell_unit_price, sell_quantity) "
                        "VALUES (%s, %s, %s, %s)", [
                            commerce_listing[u'id'], sell_listings,
                            sell_unit_price, sell_quantity
                        ])

            for commerce_price in commerce_prices:

                cur.execute(
                    "INSERT INTO scratch_commerce_prices "
                    "(id, buy_quantity, buy_unit_price, sell_quantity, sell_unit_price) "
                    "VALUES (%s, %s, %s, %s, %s)", [
                        commerce_price[u'id'],
                        commerce_price[u'buys'][u'quantity'],
                        commerce_price[u'buys'][u'unit_price'],
                        commerce_price[u'sells'][u'quantity'],
                        commerce_price[u'sells'][u'unit_price']
                    ])

        else:

            for item in items:

                cur.execute("INSERT INTO items (id, data) VALUES (%s, %s)",
                            [item[u'id'], Json(item)])

            for commerce_listing in commerce_listings:

                cur.execute(
                    "INSERT INTO commerce_listings (id, data) VALUES (%s, %s)",
                    [commerce_listing[u'id'],
                     Json(commerce_listing)])

            for commerce_price in commerce_prices:

                cur.execute(
                    "INSERT INTO commerce_prices (id, data) VALUES (%s, %s)",
                    [commerce_price[u'id'],
                     Json(commerce_price)])

        con.commit()

    except psycopg2.DatabaseError, e:

        if con:
            con.rollback()

        print 'Database Error: %s' % e
        sys.exit(1)
Exemplo n.º 13
0
parser.add_argument('--host', type=str, help='dbpassword', default="127.0.0.1")
parser.add_argument('--dryrun', action="store_true")
args = parser.parse_args()
conn = psycopg2.connect(dbname=args.dbname,
                        user=args.user,
                        password=args.password,
                        host=args.host)

SELECT_SQL = "SELECT created, updated, id, json, version_id from records_metadata"
UPDATE_SQL = "UPDATE records_metadata SET json = %s WHERE id = %s AND version_id = %s"

with conn.cursor() as cur:
    with conn.cursor() as cur2:
        cur.execute(SELECT_SQL)
        for rec in cur:
            md = rec[3]
            if md is not None:
                b2rec_value = get_b2rec_value(md['_pid'])
                if not pid_has_doi(md['_pid']) and b2rec_value:
                    doi_value = {
                        'type': 'DOI',
                        'value': '10.23728/b2share.{0}'.format(b2rec_value)
                    }
                    md['_pid'].append(doi_value)
                    pprint(doi_value)
                    pprint(md)
                    if not args.dryrun:
                        cur2.execute(UPDATE_SQL, (Json(md), rec[2], rec[4]))
if not args.dryrun:
    conn.commit()
Exemplo n.º 14
0
            input_script = tx[counter:counter + scriptLen]
            counter += scriptLen
            #input_sequence is the seperator 'FF FF FF FF' for inputs and outputs
            input_sequence = tx[counter:counter + 8]
            counter += 8

            input_details = {
                "in_num": input_num,
                "in_tx": input_tx,
                "in_index": input_index,
                "in_script": input_script,
                "address": "",
                "in_val": 0
            }
            trans["inputs"].append(input_details)
            trans["JSONinputs"].append(Json(input_details))
            #print "input:num,tx,index,script_len,sequence", input_num, input_tx, input_index, input_sequence

        # -----------------------------OUTPUT PARSING-----------------------------------------------------
        #let's get output count
        tx_outCount = tx[counter:counter + 2]
        #print tx_outCount
        if tx_outCount != "fd":  # < 255 outputs
            outCount = int(tx_outCount, 16)
            counter += 2
        elif tx_outCount == "fd" and len(
                access.getrawtransaction(bInfo["tx"][r],
                                         1)["vout"]) >= 255:  # > 255 outputs
            outCount = getLitEndian(tx[counter + 2:counter + 6])
            counter += 6
        else:  # the actual hex byte === fd --> exactly 253 outputs
Exemplo n.º 15
0
 def run(pg_txn):
     val = Json(txn._marshal())
     pg_txn.execute("SELECT pgetcd.submit(%s,%s)", (val, 10))
     rows = pg_txn.fetchall()
     res = "{0}".format(rows[0][0])
     return res
Exemplo n.º 16
0
 def concat(self, value):
     if not isinstance(value, Node):
         value = Json(value)
     return super(JSONField, self).concat(value)
Exemplo n.º 17
0
def upsert_summary(
    msm,
    scores,
    anomaly: bool,
    confirmed: bool,
    msm_failure: bool,
    measurement_uid: str,
    software_name: str,
    software_version: str,
    platform: str,
    update: bool,
) -> None:
    """Insert a row in the fastpath_scores table. Overwrite an existing one."""
    sql_base_tpl = dedent("""\
    INSERT INTO fastpath (measurement_uid, report_id, domain, input, probe_cc, probe_asn, test_name,
        test_start_time, measurement_start_time, platform, software_name, software_version, scores,
        anomaly, confirmed, msm_failure)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
    ON CONFLICT ON CONSTRAINT fastpath_pkey DO
    """)
    sql_update = dedent("""\
    UPDATE SET
        report_id = excluded.report_id,
        domain = excluded.domain,
        input = excluded.input,
        probe_cc = excluded.probe_cc,
        probe_asn = excluded.probe_asn,
        test_name = excluded.test_name,
        test_start_time = excluded.test_start_time,
        measurement_start_time = excluded.measurement_start_time,
        platform = excluded.platform,
        software_name = excluded.software_name,
        software_version = excluded.software_version,
        scores = excluded.scores,
        anomaly = excluded.anomaly,
        confirmed = excluded.confirmed,
        msm_failure = excluded.msm_failure
    """)
    sql_noupdate = " NOTHING"

    tpl = sql_base_tpl + (sql_update if update else sql_noupdate)

    # TODO: remove msmt parsing from upsert_summary
    asn = int(msm["probe_asn"][2:])  # AS123
    test_name = msm.get("test_name", None)
    input_ = msm.get("input", None)

    if test_name == "meek_fronted_requests_test" and isinstance(input_, list):
        domain = None if input_ is None else urlparse(input_[0]).netloc
        input_ = ":".join(input_)
    else:
        domain = None if input_ is None else urlparse(input_).netloc

    args = (
        measurement_uid,
        msm["report_id"],
        domain,
        input_,
        msm["probe_cc"],
        asn,
        test_name,
        msm["test_start_time"],
        msm["measurement_start_time"],
        platform,
        software_name,
        software_version,
        Json(scores, dumps=ujson.dumps),
        anomaly,
        confirmed,
        msm_failure,
    )

    # Send notification using pg_notify
    # TODO: do not send notifications during manual run or in devel mode
    notif_cols = (
        "report_id",
        "input",
        "probe_cc",
        "probe_asn",
        "test_name",
        "test_start_time",
        "measurement_start_time",
    )

    assert _autocommit_conn
    with _autocommit_conn.cursor() as cur:
        try:
            cur.execute(tpl, args)
        except psycopg2.ProgrammingError:
            log.error("upsert syntax error in %r", tpl, exc_info=True)
            return

        if cur.rowcount == 0:
            if update:
                log.error("Failed to upsert")
            else:
                metrics.incr("measurement_noupsert_count")
                log.info(f"measurement tid/uid collision")
                return
        else:
            metrics.incr("measurement_upsert_count")
Exemplo n.º 18
0
 def db_value(self, value):
     if value is None:
         return value
     if not isinstance(value, Json):
         return Json(value, dumps=self.dumps)
     return value
Exemplo n.º 19
0
def seed_database(conn, schema="public"):
    with conn.cursor() as cur:

        cur.execute(
            sql.SQL("SET search_path TO {};").format(sql.Identifier(schema)))

        print("Writing metadata maps...", end="", flush=True)

        # Load metadata map
        with open(data_path / "metadata_map.json", "r") as fp:
            metadata_map = json.loads(fp.read())

        # Make a table for each metadata field
        for field in config["metadata_cols"].keys():
            table_name = "metadata_{}".format(field)
            cur.execute('DROP TABLE IF EXISTS "{table_name}";'.format(
                table_name=table_name))
            cur.execute("""
                CREATE TABLE "{table_name}" (
                    id INTEGER PRIMARY KEY,
                    value TEXT NOT NULL
                );
            """.format(table_name=table_name))
            metadata_df = pd.DataFrame.from_records(
                list(metadata_map[field].items()),
                columns=["id", "value"]).set_index("id")
            df_to_sql(cur, metadata_df, table_name, index_label="id")
            cur.execute("""
                CREATE INDEX "ix_{table_name}_id" ON "{table_name}"("id");
                """.format(table_name=table_name))

        print("done")

        print("Writing SNV maps...", end="", flush=True)

        # DNA SNVs
        dna_snp = process_dna_snvs(metadata_map["dna_snp"])
        cur.execute('DROP TABLE IF EXISTS "dna_snp";')
        cur.execute("""
            CREATE TABLE "dna_snp" (
                id          INTEGER  PRIMARY KEY,
                snp_str     TEXT     NOT NULL,
                pos         INTEGER  NOT NULL,
                ref         TEXT     NOT NULL,
                alt         TEXT     NOT NULL,
                color       TEXT     NOT NULL,
                snv_name    TEXT     NOT NULL
            );
            """)
        df_to_sql(cur, dna_snp, "dna_snp", index_label="id")
        cur.execute('CREATE INDEX "ix_dna_snp_pos" ON "dna_snp"("pos");')

        # AA SNVs
        gene_aa_snp = process_aa_snvs(metadata_map["gene_aa_snp"], "gene",
                                      genes)
        cur.execute('DROP TABLE IF EXISTS "gene_aa_snp";')
        cur.execute("""
            CREATE TABLE "gene_aa_snp" (
                id          INTEGER  PRIMARY KEY,
                snp_str     TEXT     NOT NULL,
                gene        TEXT     NOT NULL,
                pos         INTEGER  NOT NULL,
                ref         TEXT     NOT NULL,
                alt         TEXT     NOT NULL,
                color       TEXT     NOT NULL,
                snv_name    TEXT     NOT NULL,
                nt_pos      INTEGER  NOT NULL
            );
            """)
        df_to_sql(cur, gene_aa_snp, "gene_aa_snp", index_label="id")
        cur.execute(
            'CREATE INDEX "ix_gene_aa_snp_pos" ON "gene_aa_snp"("pos");')
        cur.execute(
            'CREATE INDEX "ix_gene_aa_snp_nt_pos" ON "gene_aa_snp"("nt_pos");')
        cur.execute(
            'CREATE INDEX "ix_gene_aa_snp_gene" ON "gene_aa_snp"("gene");')

        protein_aa_snp = process_aa_snvs(metadata_map["protein_aa_snp"],
                                         "protein", proteins)
        cur.execute('DROP TABLE IF EXISTS "protein_aa_snp";')
        cur.execute("""
            CREATE TABLE "protein_aa_snp" (
                id          INTEGER  PRIMARY KEY,
                snp_str     TEXT     NOT NULL,
                protein     TEXT     NOT NULL,
                pos         INTEGER  NOT NULL,
                ref         TEXT     NOT NULL,
                alt         TEXT     NOT NULL,
                color       TEXT     NOT NULL,
                snv_name    TEXT     NOT NULL,
                nt_pos      INTEGER  NOT NULL
            );
            """)
        df_to_sql(cur, protein_aa_snp, "protein_aa_snp", index_label="id")
        cur.execute(
            'CREATE INDEX "ix_protein_aa_snp_pos" ON "protein_aa_snp"("pos");')
        cur.execute(
            'CREATE INDEX "ix_protein_aa_snp_nt_pos" ON "protein_aa_snp"("nt_pos");'
        )
        cur.execute(
            'CREATE INDEX "ix_protein_aa_snp_protein" ON "protein_aa_snp"("protein");'
        )

        print("done")

        print("Writing location map...", end="", flush=True)

        # Locations
        location_map = pd.read_json(data_path / "location_map.json")
        cur.execute('DROP TABLE IF EXISTS "location";')
        cur.execute("""
            CREATE TABLE "location" (
                id        INTEGER  PRIMARY KEY,
                region    TEXT     NOT NULL,
                country   TEXT     NOT NULL,
                division  TEXT     NOT NULL,
                location  TEXT     NOT NULL
            );
            """)
        df_to_sql(cur, location_map, "location", index_label="id")

        print("done")

        print("Writing groups and consensus SNVs...", end="", flush=True)

        # Consensus SNVs
        with (data_path / "group_consensus_snps.json").open("r") as fp:
            group_consensus_snps = json.loads(fp.read())

        snp_fields = ["dna", "gene_aa", "protein_aa"]
        for grouping in group_consensus_snps.keys():
            for snp_field in snp_fields:
                table_name = "{grouping}_consensus_{snp_field}_snp".format(
                    grouping=grouping, snp_field=snp_field)
                # Create tables
                cur.execute('DROP TABLE IF EXISTS "{table_name}";'.format(
                    table_name=table_name))
                cur.execute("""
                    CREATE TABLE "{table_name}" (
                        name    TEXT     NOT NULL,
                        snp_id  INTEGER  NOT NULL
                    );
                    """.format(table_name=table_name))

                # Collect tuples of (group, snp_id)
                group_snps = []
                for group in group_consensus_snps[grouping].keys():
                    for snp_id in group_consensus_snps[grouping][group][
                            snp_field + "_snp_ids"]:
                        group_snps.append((group, snp_id))

                group_snp_df = pd.DataFrame.from_records(
                    group_snps, columns=["name", "snp_id"]).set_index("name")
                df_to_sql(cur, group_snp_df, table_name, index_label="name")

                cur.execute("""
                    CREATE INDEX "idx_{table_name}_name" ON "{table_name}"("name");
                    """.format(table_name=table_name))

        # Grouping tables

        # Build colormaps
        for grouping in group_consensus_snps.keys():
            group_df = pd.DataFrame(
                {"name": list(group_consensus_snps[grouping].keys())})
            group_df["color"] = group_df["name"].map(
                get_categorical_colormap(group_df["name"]))

            cur.execute(
                'DROP TABLE IF EXISTS "{grouping}";'.format(grouping=grouping))
            cur.execute("""
                CREATE TABLE "{grouping}" (
                    id     INTEGER  PRIMARY KEY,
                    name   TEXT     NOT NULL,
                    color  TEXT     NOT NULL
                );
                """.format(grouping=grouping))
            df_to_sql(cur, group_df, grouping, index_label="id")
            cur.execute("""
                CREATE INDEX "idx_{grouping}_name" on "{grouping}"("name")
                """.format(grouping=grouping))

        print("done")

        print("Writing sequence metadata...", end="", flush=True)

        # Sequence metadata
        case_data = pd.read_json(data_path / "case_data.json")
        # case_data = case_data.set_index("Accession ID")
        case_data["collection_date"] = pd.to_datetime(
            case_data["collection_date"])
        case_data["submission_date"] = pd.to_datetime(
            case_data["submission_date"])
        # print(case_data.columns)

        # Make a column for each metadata field
        metadata_cols = []
        metadata_col_defs = ""
        for field in config["metadata_cols"].keys():
            metadata_col_defs += "{field} INTEGER NOT NULL,\n".format(
                field=field)
            metadata_cols.append(field)

        # Make a column for each grouping
        grouping_cols = []
        grouping_col_defs = ""
        for grouping in group_consensus_snps.keys():
            grouping_col_defs += "{grouping} TEXT NOT NULL,\n".format(
                grouping=grouping)
            grouping_cols.append(grouping)

        cur.execute('DROP TABLE IF EXISTS "metadata";')
        cur.execute("""
            CREATE TABLE "metadata" (
                id               INTEGER    PRIMARY KEY,
                "Accession ID"   TEXT       NOT NULL,
                collection_date  TIMESTAMP  NOT NULL,
                submission_date  TIMESTAMP  NOT NULL,
                {metadata_col_defs}
                {grouping_col_defs}
                location_id      INTEGER    NOT NULL
            );
            """.format(metadata_col_defs=metadata_col_defs,
                       grouping_col_defs=grouping_col_defs))
        df_to_sql(
            cur,
            case_data[["Accession ID", "collection_date", "submission_date"] +
                      metadata_cols + grouping_cols + ["location_id"]],
            "metadata",
            index_label="id",
        )
        # Create indices
        cur.execute(
            'CREATE INDEX "ix_metadata_collection_date" ON "metadata"("collection_date");'
        )
        cur.execute(
            'CREATE INDEX "ix_metadata_submission_date" ON "metadata"("submission_date");'
        )
        cur.execute(
            'CREATE INDEX "ix_metadata_location_id" ON "metadata"("location_id");'
        )
        for field in config["metadata_cols"].keys():
            cur.execute(
                'CREATE INDEX "ix_metadata_{field}" ON "metadata"("{field}");'.
                format(field=field))

        print("done")

        print("Writing sequence SNVs...", end="", flush=True)

        # Sequence SNV data
        snp_fields = ["dna", "gene_aa", "protein_aa"]
        for snp_field in snp_fields:
            snp_col = snp_field + "_snp_str"
            table_name = "sequence_{snp_field}_snp".format(snp_field=snp_field)
            cur.execute('DROP TABLE IF EXISTS "{table_name}";'.format(
                table_name=table_name))
            cur.execute("""
                CREATE TABLE "{table_name}" (
                    sequence_id  INTEGER  NOT NULL,
                    snp_id       INTEGER  NOT NULL
                );
                """.format(table_name=table_name))
            df_to_sql(
                cur,
                case_data[[snp_col]].explode(snp_col),
                table_name,
                index_label="sequence_id",
            )
            cur.execute(
                'CREATE INDEX "ix_{table_name}_sequence_id" ON "{table_name}"("sequence_id");'
                .format(table_name=table_name))
            cur.execute(
                'CREATE INDEX "ix_{table_name}_snp_id" ON "{table_name}"("snp_id");'
                .format(table_name=table_name))

        print("done")

        print("Writing JSONs...", end="", flush=True)

        # Stats table
        cur.execute('DROP TABLE IF EXISTS "stats";')
        cur.execute("""
            CREATE TABLE "stats" (
                value JSON NOT NULL
            );
            """)

        stats = {
            "num_sequences": len(case_data),
            "data_date": datetime.date.today().isoformat(),
        }

        cur.execute(
            """
            INSERT INTO "stats" (value) VALUES (%s)
            """,
            [Json(stats)],
        )

        # Country score
        # Just dump this as a big JSON
        cur.execute('DROP TABLE IF EXISTS "country_score";')
        cur.execute("""
            CREATE TABLE "country_score" (
                value JSON NOT NULL
            );
            """)
        with (data_path / "country_score.json").open("r") as fp:
            country_score = json.loads(fp.read())

        cur.execute(
            """
            INSERT INTO "country_score" (value) VALUES (%s)
            """,
            [Json(country_score)],
        )

        # Geo select tree
        # Just dump this as a JSON string in a table
        cur.execute('DROP TABLE IF EXISTS "geo_select_tree";')
        cur.execute("""
            CREATE TABLE "geo_select_tree" (
                value JSON NOT NULL
            )
            """)
        with (data_path / "geo_select_tree.json").open("r") as fp:
            geo_select_tree = json.loads(fp.read())

        cur.execute(
            """
            INSERT INTO "geo_select_tree" (value) VALUES (%s)
            """,
            [Json(geo_select_tree)],
        )

        print("done")
Exemplo n.º 20
0
 def contains(self, other):
     if isinstance(other, (list, dict)):
         return Expression(self, JSONB_CONTAINS, Json(other))
     return Expression(cast_jsonb(self), JSONB_EXISTS, other)
Exemplo n.º 21
0
def save_package_metadata(package_metadata):
    cur.execute("insert into package (id, metadata) values (%s, %s)",
                (package_metadata['name'], Json(package_metadata)))
Exemplo n.º 22
0
 def concat(self, rhs):
     return Expression(self.as_json(True), OP.CONCAT, Json(rhs))
Exemplo n.º 23
0
 def contains(self, other):
     if isinstance(other, (list, dict)):
         return Expression(self, OP.JSONB_CONTAINS, Json(other))
     return Expression(self, OP.JSONB_EXISTS, Passthrough(other))
Exemplo n.º 24
0
def _to_dict(obj):
    """Serialize dataclass into SQL insertable dictionary"""
    return {
        k: Json(v) if isinstance(v, dict) else v
        for k, v in obj.__dict__.items()
    }
Exemplo n.º 25
0
 def contained_by(self, other):
     return Expression(self, OP.JSONB_CONTAINED_BY, Json(other))
Exemplo n.º 26
0
def rng_update(conn, rng, product, path=None):
    # pylint: disable=bad-continuation
    if isinstance(product, ProductLayerDef):
        if product.multi_product:
            assert path is None
            conn.execute("""
            UPDATE wms.multiproduct_ranges
            SET
                  lat_min=%s,
                  lat_max=%s,
                  lon_min=%s,
                  lon_max=%s,   
                  dates=%s,
                  bboxes=%s
            WHERE wms_product_name=%s
            """,
                         rng["lat"]["min"],
                         rng["lat"]["max"],
                         rng["lon"]["min"],
                         rng["lon"]["max"],
                         Json([t.strftime("%Y-%m-%d") for t in rng["times"]]),
                         Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }),
                         product.name)
            return
        product = product.product
    if path is not None:
        conn.execute("""
            UPDATE wms.sub_product_ranges
            SET
                  lat_min=%s,
                  lat_max=%s,
                  lon_min=%s,
                  lon_max=%s,   
                  dates=%s,
                  bboxes=%s
            WHERE product_id=%s
            AND   sub_product_id=%s
                 """,
                     rng["lat"]["min"],
                     rng["lat"]["max"],
                     rng["lon"]["min"],
                     rng["lon"]["max"],

                     Json([t.strftime("%Y-%m-%d") for t in rng["times"]]),
                     Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }),
                     product.id,
                     path
                     )
    else:
        conn.execute("""
            UPDATE wms.product_ranges
            SET
                  lat_min=%s,
                  lat_max=%s,
                  lon_min=%s,
                  lon_max=%s,   
                  dates=%s,
                  bboxes=%s
            WHERE id=%s
                 """,
                 rng["lat"]["min"],
                 rng["lat"]["max"],
                 rng["lon"]["min"],
                 rng["lon"]["max"],

                 Json([t.strftime("%Y-%m-%d") for t in rng["times"]]),
                 Json({crsid: jsonise_bbox(bbox) for crsid, bbox in rng["bboxes"].items() }),
                     product.id
                    )
Exemplo n.º 27
0
        def fit(df, fval, i, n):
            """
            Given a dataframe df with group-by query results, partition attributes f (outer scope), and values fval for the partition attributes, try to find local patterns for all aggregation functions and n (TODO explain n). Also keep track of deviation of agg function results from the median for this values of the partition attributes.
            """
            nonlocal global_dev_pos, global_dev_neg, valid_l_f, valid_c_f
            if not self.config.fit:
                return
            self.stats.startTimer('regression')
            for agg in aggList:
                if agg not in sample_invalid[i] or 'c' not in sample_invalid[
                        i][agg]:
                    avg = mean(df[agg])
                    describe = [
                        avg,
                        mode(df[agg]),
                        percentile(df[agg], 25),
                        percentile(df[agg], 50),
                        percentile(df[agg], 75)
                    ]
                    dev_pos = max(df[agg]) - avg
                    dev_neg = min(df[agg]) - avg
                    # fitting constant
                    theta_c = chisquare(df[agg].dropna())[1]
                    self.stats.incr('patcand.local')
                    if theta_c > self.config.theta_c:
                        try:
                            valid_c_f[i][agg] += 1
                        except KeyError:
                            valid_c_f[i][agg] = 1
                        global_dev_pos[i][agg]['c'] = max(
                            global_dev_pos[i][agg]['c'], dev_pos)
                        global_dev_neg[i][agg]['c'] = min(
                            global_dev_neg[i][agg]['c'], dev_neg)
                        # self.pc.add_local(f,oldKey,v,a,agg,'const',theta_c)
                        log.debug(
                            "local constant pattern holds: (f: %s, %s, %s, agg: %s, GOF: %s) - dev-:%f - dev+:%f",
                            f[i], fval, v[i], agg, theta_l,
                            global_dev_neg[i][agg]['c'],
                            global_dev_pos[i][agg]['c'])
                        pattern.append(
                            self.addLocal(f[i], fval, v[i], agg, 'const',
                                          theta_c, describe, 'NULL', dev_pos,
                                          dev_neg))
                        self.stats.incr('patterns.local')

                # fitting linear
                if agg not in sample_invalid[i] or 'l' not in sample_invalid[
                        i][agg]:
                    if theta_c != 1 and (
                        (self.config.reg_package == 'sklearn'
                         and all(attr in self.num for attr in v[i]) or
                         (self.config.reg_package == 'statsmodels'
                          and all(attr in self.num for attr in v[i])))):

                        if self.config.reg_package == 'sklearn':
                            lr = LinearRegression()
                            lr.fit(df[v[i]], df[agg])
                            theta_l = lr.score(df[v[i]], df[agg])
                            theta_l = 1 - (1 - theta_l) * (n - 1) / (
                                n - len(v[i]) - 1)
                            param = lr.coef_.tolist()
                            param.append(lr.intercept_.tolist())
                            param = "'" + str(param) + "'"
                        else:  # statsmodels
                            if n <= len(v[i]) + 1:  # negative R^2 for sure
                                return
                            lr = sm.ols(agg + '~' + '+'.join([
                                attr if attr in self.num else 'C(' + attr + ')'
                                for attr in v[i]
                            ]),
                                        data=df,
                                        missing='drop').fit()
                            theta_l = lr.rsquared_adj
                            param = Json(dict(lr.params))
                            dev_pos = max(lr.resid)
                            dev_neg = min(lr.resid)

                        self.stats.incr('patcand.local')
                        if theta_l and theta_l > self.config.theta_l:
                            try:
                                valid_l_f[i][agg] += 1
                            except KeyError:
                                valid_l_f[i][agg] = 1
                            global_dev_pos[i][agg]['l'] = max(
                                global_dev_pos[i][agg]['l'], dev_pos)
                            global_dev_neg[i][agg]['l'] = min(
                                global_dev_neg[i][agg]['l'], dev_neg)
                            log.debug(
                                "local linear pattern holds: (f: %s, %s, %s, agg: %s, GOF: %s) dev-: %f, dev+: %f",
                                f[i], fval, v[i], agg, theta_l,
                                global_dev_neg[i][agg]['l'],
                                global_dev_pos[i][agg]['l'])
                            pattern.append(
                                self.addLocal(f[i], fval, v[i], agg, 'linear',
                                              theta_l, describe, param,
                                              dev_pos, dev_neg))
                            self.stats.incr('patterns.local')

            self.stats.stopTimer('regression')
Exemplo n.º 28
0
def create_range_entry(dc, product, crses):
  conn = get_sqlconn(dc)
  txn = conn.begin()
  prodid = product.id

  # Attempt to insert row
  conn.execute("""
    INSERT INTO wms.product_ranges
    (id,lat_min,lat_max,lon_min,lon_max,dates,bboxes)
    VALUES
    (%(p_id)s, 0, 0, 0, 0, %(empty)s, %(empty)s)
    ON CONFLICT (id) DO NOTHING
    """,
    {"p_id": prodid, "empty": Json("")})

  # Update extents
  conn.execute("""
    UPDATE wms.product_ranges
    SET (lat_min,lat_max,lon_min,lon_max) =
    (wms_get_min(%(p_idarr)s, 'lat'), wms_get_max(%(p_id)s, 'lat'), wms_get_min(%(p_id)s, 'lon'), wms_get_max(%(p_id)s, 'lon'))
    WHERE id=%(p_id)s
    """,
    {"p_id": prodid, "p_idarr": [ prodid ]})

  # Create sorted list of dates
  conn.execute("""
    WITH sorted
    AS (SELECT to_jsonb(array_agg(dates.d))
        AS dates
        FROM (SELECT DISTINCT to_date(metadata::json->'extent'->>'center_dt', 'YYYY-MM-DD')
              AS d
              FROM agdc.dataset
              WHERE dataset_type_ref=%(p_id)s
              AND archived IS NULL
              ORDER BY d) dates)
    UPDATE wms.product_ranges
    SET dates=sorted.dates
    FROM sorted
    WHERE id=%(p_id)s
    """,
    {"p_id": prodid})

  # calculate bounding boxes
  results = list(conn.execute("""
    SELECT lat_min,lat_max,lon_min,lon_max
    FROM wms.product_ranges
    WHERE id=%s
    """,
    prodid))

  r = results[0]

  epsg4326 = datacube.utils.geometry.CRS("EPSG:4326")
  box = datacube.utils.geometry.box(
    float(r[2]),
    float(r[0]),
    float(r[3]),
    float(r[1]),
    epsg4326)

  conn.execute("""
    UPDATE wms.product_ranges
    SET bboxes = %s::jsonb
    WHERE id=%s
    """,
    Json(
      {crsid: {"top": box.to_crs(crs).boundingbox.top,
               "bottom": box.to_crs(crs).boundingbox.bottom,
               "left": box.to_crs(crs).boundingbox.left,
               "right": box.to_crs(crs).boundingbox.right}
        for crsid, crs in crses.items()
       }
    ),
    product.id)

  txn.commit()
  conn.close()
Exemplo n.º 29
0
        def fit(df, f, fval, v, n):
            if not self.config.fit:
                return
            log.debug("do regression for F=%s, f=%s", f, fval)
            self.stats.startTimer('regression')
            for agg in aggList:
                nonlocal global_dev_pos, global_dev_neg
                if agg not in sample_invalid or 'c' not in sample_invalid[agg]:
                    avg = mean(df[agg])
                    describe = [
                        avg,
                        mode(df[agg]),
                        percentile(df[agg], 25),
                        percentile(df[agg], 50),
                        percentile(df[agg], 75)
                    ]
                    dev_pos = max(df[agg]) - avg
                    dev_neg = min(df[agg]) - avg
                    # fitting constant
                    theta_c = chisquare(df[agg].dropna())[1]
                    self.stats.incr('patcand.local')
                    if theta_c > self.config.theta_c:
                        nonlocal valid_c_f
                        try:
                            valid_c_f[agg] += 1
                        except KeyError:
                            valid_c_f[agg] = 1
                        global_dev_pos[agg]['c'] = max(
                            global_dev_pos[agg]['c'], dev_pos)
                        global_dev_neg[agg]['c'] = min(
                            global_dev_neg[agg]['c'], dev_neg)

                        pattern.append(
                            self.addLocal(f, fval, v, agg, 'const', theta_c,
                                          describe, 'NULL', dev_pos, dev_neg))
                        self.stats.incr('patterns.local')

                # fitting linear
                if agg not in sample_invalid or 'l' not in sample_invalid[agg]:
                    if theta_c != 1 and (
                        (self.config.reg_package == 'sklearn'
                         and all(attr in self.num for attr in v) or
                         (self.config.reg_package == 'statsmodels'
                          and all(attr in self.num for attr in v)))):

                        if self.config.reg_package == 'sklearn':
                            lr = LinearRegression()
                            lr.fit(df[v], df[agg])
                            theta_l = lr.score(df[v], df[agg])
                            theta_l = 1 - (1 - theta_l) * (n - 1) / (
                                n - len(v) - 1)
                            param = lr.coef_.tolist()
                            param.append(lr.intercept_.tolist())
                            param = "'" + str(param) + "'"
                        else:  # statsmodels
                            if n <= len(v) + 1:  # negative R^2 for sure
                                return
                            lr = sm.ols(agg + '~' + '+'.join([
                                attr if attr in self.num else 'C(' + attr + ')'
                                for attr in v
                            ]),
                                        data=df,
                                        missing='drop').fit()
                            # theta_l=lr.rsquared_adj
                            theta_l = chisquare(df[agg], lr.predict())[1]
                            param = Json(dict(lr.params))
                            dev_pos = max(lr.resid)
                            dev_neg = min(lr.resid)
                        self.stats.incr('patcand.local')

                        if theta_l and theta_l > self.config.theta_l:
                            nonlocal valid_l_f
                            try:
                                valid_l_f[agg] += 1
                            except KeyError:
                                valid_l_f[agg] = 1
                            global_dev_pos[agg]['l'] = max(
                                global_dev_pos[agg]['l'], dev_pos)
                            global_dev_neg[agg]['l'] = min(
                                global_dev_neg[agg]['l'], dev_neg)
                            pattern.append(
                                self.addLocal(f, fval, v, agg, 'linear',
                                              theta_l, describe, param,
                                              dev_pos, dev_neg))
                            self.stats.incr('patterns.local')

            self.stats.stopTimer('regression')
 def get_prep_value(self, value):
     if value is not None:
         return Json(value,
                     dumps=partial(json.dumps,
                                   cls=DateTimeAwareJSONEncoder))
     return value
Exemplo n.º 31
0
def PGJson(data):
    return Json(data, dumps=json_dumps),