Пример #1
0
def update(dataset_id, dictionary, author_id):
    # TODO(roman): Make author_id argument optional (keep old author if None).
    jsonschema.validate(dictionary, BASE_JSON_SCHEMA)

    with create_cursor() as cursor:
        if "description" not in dictionary:
            dictionary["description"] = None

        cursor.execute("""UPDATE dataset
                          SET (name, description, public, author) = (%s, %s, %s, %s)
                          WHERE id = %s""",
                       (dictionary["name"], dictionary["description"], dictionary["public"], author_id, dataset_id))

        # Replacing old classes with new ones
        cursor.execute("""DELETE FROM dataset_class WHERE dataset = %s""", (dataset_id,))

        for cls in dictionary["classes"]:
            if "description" not in cls:
                cls["description"] = None
            cursor.execute("""INSERT INTO dataset_class (name, description, dataset)
                              VALUES (%s, %s, %s) RETURNING id""",
                           (cls["name"], cls["description"], dataset_id))
            cls_id = cursor.fetchone()[0]

            for recording_mbid in cls["recordings"]:
                cursor.execute("INSERT INTO dataset_class_member (class, mbid) VALUES (%s, %s)",
                               (cls_id, recording_mbid))

    # If anything bad happens above, it should just rollback by default.
    commit()
Пример #2
0
 def drop_tables(self):
     with create_cursor() as cursor:
         # TODO(roman): See if there's a better way to drop all tables.
         # FIXME: Need to drop all tables that we have there.
         #cursor.execute('DROP TABLE IF EXISTS i_am_a_table CASCADE;')
         pass
     commit()
Пример #3
0
def get_brewery():
    b = True
    if 'brewery' in request.args:
        bry = '.*' + request.args['brewery'] + '.*'
    elif 'sid' in request.args:
        bry = request.args['sid']
        b = False
    else:
        return "No brewery provided. You fool."
    conn = db.establish_connection()
    cur = db.create_cursor(conn)
    out = ""
    try:
        if b:
            cur.execute('SELECT * FROM Breweries WHERE name ~* %s', (bry, ))
        else:
            cur.execute('SELECT * FROM Breweries WHERE StateID=%s', (bry, ))
        for brew_data in cur:
            out += "<h1>Here's a brewery</h1><p>" + str(
                brew_data) + "</p><a href='/beer?id=" + str(
                    brew_data[4]) + "'>BEERS</a>"
        # cur.execute('SELECT * FROM Beers WHERE BreweryId=%s',(brew_data[4],))
        # for item in cur:
        #     out+="<p>"+str(item)+"</p>"

    except Exception as e:
        out = "<h1>Here's an error</h1><p>" + str(e) + "</p>"
    finally:
        # conn.commit()
        db.close_connection(conn, cur)
        return out
Пример #4
0
def get(id):
    """Get user with a specified ID (integer)."""
    with create_cursor() as cursor:
        cursor.execute('SELECT id, created, musicbrainz_id FROM "user" WHERE id = %s',
                       (id,))
        row = cursor.fetchone()
        return dict(row) if row else None
Пример #5
0
def import_db_dump(archive_path):
    """Import data from .tar.xz archive into the database."""
    pxz_command = ["pxz", "--decompress", "--stdout", archive_path]
    pxz = subprocess.Popen(pxz_command, stdout=subprocess.PIPE)

    table_names = _TABLES.keys()

    with create_cursor() as cursor:

        with tarfile.open(fileobj=pxz.stdout, mode="r|") as tar:
            for member in tar:
                file_name = member.name.split("/")[-1]

                if file_name == "SCHEMA_SEQUENCE":
                    # Verifying schema version
                    schema_seq = int(tar.extractfile(member).read().strip())
                    if schema_seq != db.SCHEMA_VERSION:
                        raise Exception("Incorrect schema version! Expected: %d, got: %d."
                                        "Please, get the latest version of the dump."
                                        % (db.SCHEMA_VERSION, schema_seq))
                    else:
                        logging.info("Schema version verified.")

                else:
                    if file_name in table_names:
                        logging.info(" - Importing data into %s table..." % file_name)
                        cursor.copy_from(tar.extractfile(member), '"%s"' % file_name,
                                         columns=_TABLES[file_name])

    commit()
    pxz.stdout.close()
Пример #6
0
def create_from_dict(dictionary, author_id):
    """Creates a new dataset from a dictionary.

    Returns:
        Tuple with two values: new dataset ID and error. If error occurs first
        will be None and second is an exception. If there are no errors, second
        value will be None.
    """
    jsonschema.validate(dictionary, BASE_JSON_SCHEMA)

    with create_cursor() as cursor:
        if "description" not in dictionary:
            dictionary["description"] = None

        cursor.execute("""INSERT INTO dataset (id, name, description, public, author)
                          VALUES (uuid_generate_v4(), %s, %s, %s, %s) RETURNING id""",
                       (dictionary["name"], dictionary["description"], dictionary["public"], author_id))
        dataset_id = cursor.fetchone()[0]

        for cls in dictionary["classes"]:
            if "description" not in cls:
                cls["description"] = None
            cursor.execute("""INSERT INTO dataset_class (name, description, dataset)
                              VALUES (%s, %s, %s) RETURNING id""",
                           (cls["name"], cls["description"], dataset_id))
            cls_id = cursor.fetchone()[0]

            for recording_mbid in cls["recordings"]:
                cursor.execute("INSERT INTO dataset_class_member (class, mbid) VALUES (%s, %s)",
                               (cls_id, recording_mbid))

    # If anything bad happens above, it should just rollback by default.
    commit()

    return dataset_id
Пример #7
0
def _get_incremental_dump_timestamp(dump_id=None):
    with create_cursor() as cursor:
        if dump_id:
            cursor.execute("SELECT created FROM incremental_dumps WHERE id = %s", (dump_id,))
        else:
            cursor.execute("SELECT created FROM incremental_dumps ORDER BY id DESC")
        row = cursor.fetchone()
    return row[0] if row else None
Пример #8
0
def _create_new_inc_dump_record():
    """Creates new record for incremental dump and returns its ID and creation time."""
    with create_cursor() as cursor:
        cursor.execute("INSERT INTO incremental_dumps (created) VALUES (now()) RETURNING id, created")
        commit()
        row = cursor.fetchone()
    logging.info("Created new incremental dump record (ID: %s)." % row[0])
    return row
Пример #9
0
def count_lowlevel(mbid):
    """Count number of stored low-level submissions for a specified MBID."""
    with db.create_cursor() as cursor:
        cursor.execute(
            "SELECT count(*) FROM lowlevel WHERE mbid = %s",
            (str(mbid),)
        )
        return cursor.fetchone()[0]
Пример #10
0
def create(musicbrainz_id):
    with create_cursor() as cursor:
        # TODO(roman): Do we need to make sure that musicbrainz_id is case insensitive?
        cursor.execute('INSERT INTO "user" (musicbrainz_id) VALUES (%s) RETURNING id',
                       (musicbrainz_id,))
        commit()
        new_id = cursor.fetchone()[0]
        return new_id
def set_job_result(job_id, result):
    with create_cursor() as cursor:
        cursor.execute(
            "UPDATE dataset_eval_jobs "
            "SET (result, updated) = (%s, current_timestamp) "
            "WHERE id = %s",
            (result, job_id)
        )
    commit()
Пример #12
0
def _get_recordings_in_class(class_id):
    with create_cursor() as cursor:
        cursor.execute("SELECT mbid FROM dataset_class_member WHERE class = %s",
                       (class_id,))
        rows = cursor.fetchall()
        recordings = []
        for row in rows:
            recordings.append(row["mbid"])
        return recordings
Пример #13
0
def api_get_beer():
    name = None
    abbr = None
    abv = None
    brew = None
    style = None
    results = 20
    vals = []
    if 'name' in request.args:
        name = ".*" + request.args['name'] + ".*"
        vals.append(name)
    if 'brew' in request.args:
        brew = ".*" + request.args['brew'] + ".*"
        vals.append(brew)
    if 'style' in request.args:
        style = ".*" + request.args['style'] + ".*"
        vals.append(style)
    if 'abv' in request.args:
        abv = request.args['abv']
        vals.append(abv)
    if 'abbr' in request.args:
        abbr = request.args['abbr']
        vals.append(abbr)
    if 'results' in request.args:
        try:
            results = int(request.args['abbr'])
        except Exception as e:
            return str(e)

    if len(vals) == 0:
        return jsonify(
            'Usage: supply a beername, brewery, state abbreviation, style, or abv'
        )
    conn = db.establish_connection()
    cur = db.create_cursor(conn)
    vals.append(results)
    try:
        qbase = 'SELECT * FROM Beers WHERE TRUE'
        abbrq = '' if abbr == None else ' AND stateid=(SELECT stateid FROM States WHERE abbr=%s)'
        brewq = '' if brew == None else ' AND breweryid= ANY (SELECT breweryid FROM Breweries WHERE name ~* %s)'
        beerq = '' if name == None else ' AND name ~* %s'
        abvq = '' if abv == None else ' AND abv=%s'
        styleq = '' if style == None else ' AND style ~* %s'
        qend = ' ORDER BY ratings DESC LIMIT %s'
        cur.execute(qbase + beerq + brewq + styleq + abvq + abbrq + qend, vals)
        # if name != None:
        #     cur.execute(qbase+brewq+abbrq,vals)
        # elif beer != None:
        #     cur.execute(qbase+beerq+abbrq,vals)
        # else:
        #     cur.execute(qbase+'TRUE'+abbrq,vals)
        results = cur.fetchall()
    except Exception as e:
        results = str(e)
    finally:
        db.close_connection(conn, cur)
        return jsonify(results)
def get_job(job_id):
    with create_cursor() as cursor:
        cursor.execute(
            "SELECT id, dataset_id, status, status_msg, result, created, updated "
            "FROM dataset_eval_jobs "
            "WHERE id = %s",
            (job_id,)
        )
        return dict(cursor.fetchone()) if cursor.rowcount > 0 else None
Пример #15
0
def list_incremental_dumps():
    """Get information about all created incremental dumps.

    Returns:
        List of (id, created) pairs ordered by dump identifier, or None if
        there are no incremental dumps yet.
    """
    with create_cursor() as cursor:
        cursor.execute("SELECT id, created FROM incremental_dumps ORDER BY id DESC")
        return cursor.fetchall()
def _create_job(dataset_id):
    with create_cursor() as cursor:
        cursor.execute(
            "INSERT INTO dataset_eval_jobs (id, dataset_id, status) "
            "VALUES (uuid_generate_v4(), %s, %s) RETURNING id",
            (dataset_id, STATUS_PENDING)
        )
        job_id = cursor.fetchone()[0]
    commit()
    return job_id
Пример #17
0
def _copy_tables(location, start_time=None, end_time=None):
    """Copies all tables into separate files within a specified location (directory).

    ou can also define time frame that will be used during data selection.
    Files in a specified directory will only contain rows that have timestamps
    within specified time frame. We assume that each table contains some sort
    of timestamp that can be used as a reference.
    """
    def generate_where(row_name, start_t=start_time, end_t=end_time):
        """This function generates SQL WHERE clause that can be used to select
        rows only within specified time frame using `row_name` as a reference.
        """
        if start_t or end_t:
            start_cond = "%s > '%s'" % (row_name, str(start_t)) if start_t else ""
            end_cond = "%s <= '%s'" % (row_name, str(end_t)) if end_t else ""
            if start_t and end_t:
                return "WHERE %s AND %s" % (start_cond, end_cond)
            else:
                return "WHERE %s%s" % (start_cond, end_cond)
        else:
            return ""

    with create_cursor() as cursor:

        # lowlevel
        with open(os.path.join(location, "lowlevel"), "w") as f:
            logging.info(" - Copying table lowlevel...")
            cursor.copy_to(f, "(SELECT %s FROM lowlevel %s)" %
                           (", ".join(_TABLES["lowlevel"]), generate_where("submitted")))

        # highlevel
        with open(os.path.join(location, "highlevel"), "w") as f:
            logging.info(" - Copying table highlevel...")
            cursor.copy_to(f, "(SELECT %s FROM highlevel %s)" %
                           (", ".join(_TABLES["highlevel"]), generate_where("submitted")))

        # highlevel_json
        with open(os.path.join(location, "highlevel_json"), "w") as f:
            logging.info(" - Copying table highlevel_json...")
            query = "SELECT %s FROM highlevel_json WHERE id IN (SELECT data FROM highlevel %s)" \
                    % (", ".join(_TABLES["highlevel_json"]), generate_where("submitted"))
            cursor.copy_to(f, "(%s)" % query)

        # statistics
        with open(os.path.join(location, "statistics"), "w") as f:
            logging.info(" - Copying table statistics...")
            cursor.copy_to(f, "(SELECT %s FROM statistics %s)" %
                           (", ".join(_TABLES["statistics"]), generate_where("collected")))

        # incremental_dumps
        with open(os.path.join(location, "incremental_dumps"), "w") as f:
            logging.info(" - Copying table incremental_dumps...")
            cursor.copy_to(f, "(SELECT %s FROM incremental_dumps %s)" %
                           (", ".join(_TABLES["incremental_dumps"]), generate_where("created")))
Пример #18
0
def get_by_mb_id(musicbrainz_id):
    """Get user with a specified MusicBrainz ID."""
    with create_cursor() as cursor:
        cursor.execute(
            'SELECT id, created, musicbrainz_id '
            'FROM "user" '
            'WHERE LOWER(musicbrainz_id) = LOWER(%s)',
            (musicbrainz_id,)
        )
        row = cursor.fetchone()
        return dict(row) if row else None
def get_next_pending_job():
    with create_cursor() as cursor:
        cursor.execute(
            "SELECT id, dataset_id, status, status_msg, result, created, updated "
            "FROM dataset_eval_jobs "
            "WHERE status = %s "
            "ORDER BY created ASC "
            "LIMIT 1",
            (STATUS_PENDING,)
        )
        return dict(cursor.fetchone()) if cursor.rowcount > 0 else None
Пример #20
0
def get_all_bids():
    conn = db.establish_connection()
    cur = db.create_cursor(conn)
    out = []
    try:
        cur.execute('SELECT BreweryID FROM Breweries')
        for bid in cur:
            out.append(bid[0])
    except Exception as e:
        logger.info(e)
    finally:
        db.close_connection(conn, cur)
        return out
Пример #21
0
def get_blinks_from_bid(bid):
    conn = db.establish_connection()
    cur = db.create_cursor(conn)
    out = []
    try:
        cur.execute('SELECT Link FROM Beers WHERE BreweryID=%s', (bid, ))
        for link in cur:
            out.append(link[0])
    except Exception as e:
        logger.info(e)
    finally:
        db.close_connection(conn, cur)
        return out
Пример #22
0
def get_summary_data(mbid, offset=0):
    """Fetches the low-level and high-level features from for the specified MBID.

    Args:
        offset: Offset can be specified if you need to get summary for a
        different submission. They are ordered by creation time.

    Returns:
        Dictionary with low-level data ("lowlevel" key) for the specified MBID
        and, if it has been calculated, high-level data ("highlevel" key).
    """
    summary = {}
    mbid = str(mbid)
    with db.create_cursor() as cursor:
        cursor.execute(
            "SELECT id, data "
            "FROM lowlevel "
            "WHERE mbid = %s "
            "ORDER BY submitted "
            "OFFSET %s",
            (mbid, offset)
        )
        if not cursor.rowcount:
            raise db.exceptions.NoDataFoundException("Can't find low-level data for this recording.")

        ll_row_id, lowlevel = cursor.fetchone()
        if 'artist' not in lowlevel['metadata']['tags']:
            lowlevel['metadata']['tags']['artist'] = ["[unknown]"]
        if 'release' not in lowlevel['metadata']['tags']:
            lowlevel['metadata']['tags']['release'] = ["[unknown]"]
        if 'title' not in lowlevel['metadata']['tags']:
            lowlevel['metadata']['tags']['title'] = ["[unknown]"]

        lowlevel['metadata']['audio_properties']['length_formatted'] = \
            time.strftime("%M:%S", time.gmtime(lowlevel['metadata']['audio_properties']['length']))

        summary['lowlevel'] = lowlevel

        cursor.execute(
            "SELECT highlevel_json.data "
            "FROM highlevel, highlevel_json "
            "WHERE highlevel.id = %s "
            "      AND highlevel.data = highlevel_json.id "
            "      AND highlevel.mbid = %s",
            (ll_row_id, mbid)
        )
        if cursor.rowcount:
            summary['highlevel'] = cursor.fetchone()[0]

        return summary
Пример #23
0
def _any_new_data(from_time):
    """Checks if there's any new data since specified time in tables that
    support incremental dumps.

    Returns:
        True if there is new data in one of tables that support incremental
        dumps, False if there is no new data there.
    """
    with create_cursor() as cursor:
        cursor.execute("SELECT count(*) FROM lowlevel WHERE submitted > %s", (from_time,))
        lowlevel_count = cursor.fetchone()[0]
        cursor.execute("SELECT count(*) FROM highlevel WHERE submitted > %s", (from_time,))
        highlevel_count = cursor.fetchone()[0]
    return lowlevel_count > 0 or highlevel_count > 0
Пример #24
0
 def drop_tables(self):
     with create_cursor() as cursor:
         # TODO(roman): See if there's a better way to drop all tables.
         cursor.execute('DROP TABLE IF EXISTS highlevel_json       CASCADE;')
         cursor.execute('DROP TABLE IF EXISTS highlevel            CASCADE;')
         cursor.execute('DROP TABLE IF EXISTS lowlevel             CASCADE;')
         cursor.execute('DROP TABLE IF EXISTS statistics           CASCADE;')
         cursor.execute('DROP TABLE IF EXISTS incremental_dumps    CASCADE;')
         cursor.execute('DROP TABLE IF EXISTS dataset_eval_jobs    CASCADE;')
         cursor.execute('DROP TABLE IF EXISTS dataset_class_member CASCADE;')
         cursor.execute('DROP TABLE IF EXISTS dataset_class        CASCADE;')
         cursor.execute('DROP TABLE IF EXISTS dataset              CASCADE;')
         cursor.execute('DROP TABLE IF EXISTS "user"               CASCADE;')
     commit()
Пример #25
0
def get_by_user_id(user_id, public_only=True):
    """Get datasets created by a specified user.

    Returns:
        List of dictionaries with dataset details.
    """
    with create_cursor() as cursor:
        where = "WHERE author = %s"
        if public_only:
            where += " AND public = TRUE"
        cursor.execute("SELECT id, name, description, author, created "
                       "FROM dataset " + where,
                       (user_id,))
        return [dict(row) for row in cursor.fetchall()]
Пример #26
0
def _get_classes(dataset_id):
    with create_cursor() as cursor:
        cursor.execute(
            "SELECT id, name, description "
            "FROM dataset_class "
            "WHERE dataset = %s",
            (dataset_id,)
        )
        rows = cursor.fetchall()
        classes = []
        for row in rows:
            row = dict(row)
            row["recordings"] = _get_recordings_in_class(row["id"])
            classes.append(row)
        return classes
Пример #27
0
def load_low_level(mbid, offset=0):
    """Load low-level data for a given MBID."""
    with db.create_cursor() as cursor:
        cursor.execute(
            "SELECT data::text "
            "FROM lowlevel "
            "WHERE mbid = %s "
            "ORDER BY submitted "
            "OFFSET %s",
            (str(mbid), offset)
        )
        if not cursor.rowcount:
            raise db.exceptions.NoDataFoundException

        row = cursor.fetchone()
        return row[0]
Пример #28
0
def load_high_level(mbid, offset=0):
    """Load high-level data for a given MBID."""
    with db.create_cursor() as cursor:
        cursor.execute(
            "SELECT hlj.data::text "
            "FROM highlevel hl "
            "JOIN highlevel_json hlj "
            "ON hl.data = hlj.id "
            "WHERE mbid = %s "
            "ORDER BY submitted "
            "OFFSET %s",
            (str(mbid), offset)
        )
        if not cursor.rowcount:
            raise db.exceptions.NoDataFoundException
        return cursor.fetchone()[0]
Пример #29
0
def get_stats():
    stats_keys = ["lowlevel-lossy", "lowlevel-lossy-unique", "lowlevel-lossless", "lowlevel-lossless-unique"]
    # TODO: Port this to new implementation:
    stats = cache._mc.get_multi(stats_keys, key_prefix="ac-num-")
    last_collected = cache.get('last-collected')

    # Recalculate everything together, always.
    if sorted(stats_keys) != sorted(stats.keys()) or last_collected is None:
        stats_parameters = dict([(a, 0) for a in stats_keys])

        with create_cursor() as cursor:
            cursor.execute("SELECT now() as now, collected FROM statistics ORDER BY collected DESC LIMIT 1")
            update_db = False
            if cursor.rowcount > 0:
                (now, last_collected) = cursor.fetchone()
            if cursor.rowcount == 0 or now - last_collected > datetime.timedelta(minutes=59):
                update_db = True

            cursor.execute("SELECT lossless, count(*) FROM lowlevel GROUP BY lossless")
            for row in cursor.fetchall():
                if row[0]: stats_parameters['lowlevel-lossless'] = row[1]
                if not row[0]: stats_parameters['lowlevel-lossy'] = row[1]

            cursor.execute("SELECT lossless, count(*) FROM (SELECT DISTINCT ON (mbid) mbid, lossless FROM lowlevel ORDER BY mbid, lossless DESC) q GROUP BY lossless;")
            for row in cursor.fetchall():
                if row[0]: stats_parameters['lowlevel-lossless-unique'] = row[1]
                if not row[0]: stats_parameters['lowlevel-lossy-unique'] = row[1]

            if update_db:
                for key, value in six.iteritems(stats_parameters):
                    cursor.execute("INSERT INTO statistics (collected, name, value) VALUES (now(), %s, %s) RETURNING collected", (key, value))
                commit()

            cursor.execute("SELECT now()")
            last_collected = cursor.fetchone()[0]

        value = stats_parameters

        # TODO: Port this to new implementation:
        cache._mc.set_multi(stats_parameters, key_prefix="ac-num-", time=STATS_CACHE_TIMEOUT)
        cache.set('last-collected', last_collected, time=STATS_CACHE_TIMEOUT)
    else:
        value = stats

    return value, last_collected
Пример #30
0
def get_beers():
    if 'id' in request.args:
        id = request.args['id']
    else:
        return "No brewery id provided. You fool."
    conn = db.establish_connection()
    cur = db.create_cursor(conn)
    out = ""
    try:
        cur.execute('SELECT * FROM Beers WHERE BreweryId=%s', (id, ))
        for item in cur:
            out += "<p>" + str(item) + "</p>"
    except Exception as e:
        out = "<h1>Here's an error</h1><p>" + str(e) + "</p>"
    finally:
        # conn.commit()
        db.close_connection(conn, cur)
        return out
Пример #31
0
def get_last_submitted_recordings():
    last_submitted_data = cache.get('last-submitted-data')
    if not last_submitted_data:
        with create_cursor() as cursor:
            cursor.execute("""SELECT mbid,
                                     data->'metadata'->'tags'->'artist'->>0,
                                     data->'metadata'->'tags'->'title'->>0
                                FROM lowlevel
                            ORDER BY id DESC
                               LIMIT 5
                              OFFSET 10""")
            last_submitted_data = cursor.fetchall()
        last_submitted_data = [
            (r[0], r[1], r[2]) for r in last_submitted_data if r[1] and r[2]
        ]
        cache.set('last-submitted-data', last_submitted_data, time=LAST_MBIDS_CACHE_TIMEOUT)

    return last_submitted_data
def get_jobs_for_dataset(dataset_id):
    """Get a list of evaluation jobs for the specified dataset.

    Args:
        dataset_id: UUID of the dataset.

    Returns:
        List of evaluation jobs (dicts) for the dataset. Ordered by creation
        time (oldest job first)
    """
    with create_cursor() as cursor:
        cursor.execute(
            "SELECT id, dataset_id, status, status_msg, result, created, updated "
            "FROM dataset_eval_jobs "
            "WHERE dataset_id = %s "
            "ORDER BY created ASC",
            (dataset_id,)
        )
        return [dict(j) for j in cursor.fetchall()]
def evaluate_dataset(dataset_id):
    """Add dataset into evaluation queue.

    Args:
        dataset_id: ID of the dataset that needs to be added into the list of
            jobs.

    Returns:
        ID of the newly created evaluation job.
    """
    with create_cursor() as cursor:
        cursor.execute(
            "SELECT count(*) FROM dataset_eval_jobs WHERE dataset_id = %s AND status IN %s",
            (dataset_id, (STATUS_PENDING, STATUS_RUNNING))
        )
        if cursor.fetchone()[0] > 0:
            raise JobExistsException
    validate_dataset(db.dataset.get(dataset_id))
    return _create_job(dataset_id)
Пример #34
0
def get(id):
    """Get dataset with a specified ID.

    Returns:
        Dictionary with dataset details if it has been found, None
        otherwise.
    """
    with create_cursor() as cursor:
        cursor.execute(
            "SELECT id, name, description, author, created, public "
            "FROM dataset "
            "WHERE id = %s",
            (str(id),)
        )
        if cursor.rowcount > 0:
            row = dict(cursor.fetchone())
            row["classes"] = _get_classes(row["id"])
            return row
        else:
            return None
Пример #35
0
def get_state():
    if 'ab' in request.args:
        ab = request.args['ab']
    else:
        ab = '__'
    conn = db.establish_connection()
    cur = db.create_cursor(conn)
    out = ""
    try:
        cur.execute('SELECT * FROM States WHERE Abbr LIKE %s', (ab, ))
        for item in cur:
            out += "<p>" + str(item) + "</p><a href='/brewery?sid=" + str(
                item[2]) + "'>BREWERIES</a>"

    except Exception as e:
        out = "<h1>Here's an error</h1><p>" + str(e) + "</p>"
    finally:
        # conn.commit()
        db.close_connection(conn, cur)
        return out
Пример #36
0
def main():
    ts = time()
    s = requests.session()
    f = open("test.html", "a")
    #download_dir = setup_download_dir()
    # links = []
    login(s)
    bids = get_all_bids()
    # for state in states:
    #     l = get_links(state, s)
    #     links += l
    #     p_to_f(str(l))
    # print(links)

    # logger.info("Total brew and beer data: %s",download_link(links[0],s))
    # Create a queue to communicate with the worker threads
    queue = Queue()
    # # Create 8 worker threads
    bc = 0

    for x in range(8):
        conn = db.establish_connection()
        cur = db.create_cursor(conn)
        worker = DownloadWorker(queue, cur, conn, bc)
        #     # Setting daemon to True will let the main thread exit even though the workers are blocking
        worker.daemon = True
        worker.start()
    # # Put the tasks into the queue as a tuple
    for bid in bids:
        links = get_blinks_from_bid(bid)
        for link in links:
            # logger.info('Queueing {}'.format(link))
            queue.put((link, s))
    # # Causes the main thread to wait for the queue to finish processing all the tasks
    queue.join()
    # conn.commit()
    # db.close_connection(conn,cur)
    f.close()
    print(bids)
    print(ld)
    logging.info('Took %s', time() - ts)
Пример #37
0
def api_get_brewery():
    name = None
    abbr = None
    beer = None
    vals = []
    if 'name' in request.args:
        name = ".*" + request.args['name'] + ".*"
        vals.append(name)
    if 'beer' in request.args:
        beer = ".*" + request.args['beer'] + ".*"
        vals.append(beer)
    if 'abbr' in request.args:
        abbr = request.args['abbr']
        vals.append(abbr)
    results = None
    if name == None and abbr == None and beer == None:
        return jsonify(
            'Usage: supply a brewery name: ?name=brewname, and or a state abbreviation: ?abbr=stabbr, and or a beer name: ?beer=bname'
        )
    conn = db.establish_connection()
    cur = db.create_cursor(conn)

    try:
        qbase = 'SELECT * FROM Breweries WHERE '
        abbrq = '' if abbr == None else ' AND stateid=(SELECT stateid FROM States WHERE abbr=%s)'
        beerq = '' if beer == None else 'breweryid= ANY (SELECT breweryid FROM Beers WHERE name ~* %s)'
        brewq = '' if name == None else 'name ~* %s'

        if name != None:
            cur.execute(qbase + brewq + abbrq, vals)
        elif beer != None:
            cur.execute(qbase + beerq + abbrq, vals)
        else:
            cur.execute(qbase + 'TRUE' + abbrq, vals)
        results = cur.fetchall()
    except Exception as e:
        results = str(e)
    finally:
        return jsonify(results)
Пример #38
0
def get_statistics_data():
    with create_cursor() as cursor:
        cursor.execute(
            "SELECT name, array_agg(collected ORDER BY collected ASC) AS times,"
            "       array_agg(value ORDER BY collected ASC) AS values "
            "FROM statistics "
            "GROUP BY name"
        )
        stats_key_map = {
            "lowlevel-lossy": "Lossy (all)",
            "lowlevel-lossy-unique": "Lossy (unique)",
            "lowlevel-lossless": "Lossless (all)",
            "lowlevel-lossless-unique": "Lossless (unique)"
        }
        ret = []
        total_unique = {"name": "Total (unique)", "data": {}}
        total_all = {"name": "Total (all)", "data": {}}
        for val in cursor:
            pairs = zip([_make_timestamp(v) for v in val[1]], val[2])
            ret.append({
                "name": stats_key_map.get(val[0], val[0]),
                "data": [[v[0], v[1]] for v in pairs]
            })
            second = {}
            if val[0] in ["lowlevel-lossy", "lowlevel-lossless"]:
                second = total_all
            elif val[0] in ["lowlevel-lossy-unique", "lowlevel-lossless-unique"]:
                second = total_unique
            for pair in pairs:
                if pair[0] in second['data']:
                    second['data'][pair[0]] = second['data'][pair[0]] + pair[1]
                else:
                    second['data'][pair[0]] = pair[1]

    total_unique['data'] = [[k, total_unique['data'][k]] for k in sorted(total_unique['data'].keys())]
    total_all['data'] = [[k, total_all['data'][k]] for k in sorted(total_all['data'].keys())]
    ret.extend([total_unique, total_all])
    return ret
Пример #39
0
    prob2 = clf2.predict_proba(data)
    pre1 = clf1.predict(data)
    pre2 = clf2.predict(data)
    result = [None] * len(data)
    for i in range(len(prob1)):
        if max(prob1[i]) > max(prob2[i]):
            result[i] = pre1[i]
        else:
            result[i] = pre2[i]
    return result


if __name__ == "__main__":

    conn = establish_connection()
    cur = create_cursor(conn)
    data, style = grab_rand_data(cur, length)
    svd = TruncatedSVD(n_components=dim, n_iter=7, random_state=42)
    print(len(data))
    # print(vals)
    # print(len(data[999]))
    data = check_array(data, accept_sparse=True)
    # print("d1: ",data[0])
    svd.fit(data)
    d = svd.fit_transform(data)
    # print("d1: ",data[0])
    #DummyClassifier(strategy='most_frequent').fit(d,[styledict[s] for s in style])
    # classifier1 = MLPClassifier(random_state=1,max_iter=1000).fit(d,[styledict[s] for s in style])
    classifier = GradientBoostingClassifier(
        n_estimators=100,
        init=MLPClassifier(max_iter=1000)).fit(d,