def set_row_mode(connection: sqlite3.Connection, row_factory: Callable) -> None: """Simple context manager to make a sqlite3 Connection temporarily return rows processed in a specific way and then switch back to using the row factory that it was previously using. Can be used just like this: >>> with set_row_mode(my_connection, UserRow.from_row): ... return my_cursor.execute("select * from users;").fetchall() """ prev_row_factory = connection.row_factory connection.row_factory = row_factory yield connection.row_factory = prev_row_factory
def _Get(self, colNamesAndValues: typing.List[typing.Tuple[str, typing.Any]] = [], conn: sqlite3.Connection = None) -> typing.List[T]: """Gets items that match an AND based column and value filter. Typically a manager will have a series of more type specific Get methods to call. If you pass an empty list (default) it gets all of them. Which is probably faster than listing them. @param colNamesAndValues: a list of tupples of column name and value pairs e.g. [(foo,1),(bar,100)] means: WHERE foo = 1 AND bar = 100 @return: A list of items. Empty list if none meet the criteria. """ if conn == None: conn = self._GetDBConnection() conn.row_factory = sqlite3.Row cur = conn.cursor() self._GetRowsByMultipleAND(cur, colNamesAndValues) rows = cur.fetchall() ret = [] for row in rows: ret.append(self._ItemFromRow(row)) return ret
def _configure_db_connection(self, connection: sqlite3.Connection): ''' Configure connection-level settings on the SQLite database. ''' # set database-specific settings connection.isolation_level = None # autocommit mode; transactions can be explicitly created with BEGIN/COMMIT statements connection.row_factory = sqlite3.Row # return dictionaries instead of tuples from SELECT statements
def query( conn: Connection, table_name: str, fields: Union[List[str], Tuple[str]] = None, row_factory: Callable[[Cursor, Tuple], Any] = None, where: Union[OperatorSQLArgument, ConditionSQLArgument] = None) -> List: """Query All Rows from Custom Table Args: conn (Connection): The Connection object table_name (str): Name of table fields (Union[List[str], Tuple[str]], optional): Defaults to None. where (Union[OperatorSQLArgument, ConditionSQLArgument], optional): Defaults to None. Returns: List: List of rows """ sql = f"""SELECT {', '.join(map(str, fields)) if fields is not None and len(fields) > 0 else '*'} FROM {table_name} {f'WHERE {where.sql}' if where is not None else ''} """ logging.info(sql) if row_factory is not None: conn.row_factory = row_factory try: cur = conn.cursor() cur.execute(sql) rows = cur.fetchall() return rows except Error as e: logging.error(e)
def get_one_variant( conn: sqlite3.Connection, variant_id: int, with_annotations=False, with_samples=False, ): r"""Get the variant with the given id TODO: with_annotations, with_samples are quite useless and not used for now Args: conn (sqlite3.Connection): sqlite3 connection variant_id (int): Database id of the variant with_annotations (bool, optional): Add annotations items. Default is True with_samples (bool, optional): add samples items. Default is True Returns: dict: A variant item with all fields in "variants" table; \+ all fields of annotations table if `with_annotations` is True; \+ all fields of sample_has_variant associated to all samples if `with_samples` is True. Example: .. code-block:: python { variant fields as keys..., "annotations": dict of annotations fields as keys..., "samples": dict of samples fields as keys..., } """ conn.row_factory = sqlite3.Row # Cast sqlite3.Row object to dict because later, we use items() method. variant = dict( conn.execute( f"SELECT * FROM variants WHERE variants.id = {variant_id}" ).fetchone() ) variant["annotations"] = [] if with_annotations: variant["annotations"] = [ dict(annotation) for annotation in conn.execute( f"SELECT * FROM annotations WHERE variant_id = {variant_id}" ) ] variant["samples"] = [] if with_samples: variant["samples"] = [ dict(sample) for sample in conn.execute( f"""SELECT samples.name, sample_has_variant.* FROM sample_has_variant LEFT JOIN samples on samples.id = sample_has_variant.sample_id WHERE variant_id = {variant_id}""" ) ] return variant
def read(data: Connection, id: int) -> Contact: sql = """SELECT f_name,l_name,tel FROM contacts WHERE id = ?""" data.row_factory = lambda _, row: Contact(row[0], row[1], row[2]) cursor = data.cursor() cursor.execute(sql, (id, )) # on passe un tuple # row = cursor.fetchone() # return Contact(row[0],row[1],row[2]) return cursor.fetchone()
def load_id(conn: Connection, path_id): conn.row_factory = sqlite3.Row cursor = conn.cursor() cursor.execute( """ SELECT path, url, hash, author, title, description, alias FROM items WHERE id=?;""", (path_id, )) return Item(_id=path_id, **cursor.fetchone())
def get_metadatas(conn: sqlite3.Connection): """Return a dictionary of metadatas Returns: [dict]: matadata fieldname as keys """ conn.row_factory = sqlite3.Row g = (dict(data) for data in conn.execute("SELECT key, value FROM metadatas")) return {data["key"]: data["value"] for data in g}
def findall(data: Connection) -> list[[Contact]]: sql = """SELECT f_name,l_name, tel FROM contacts""" data.row_factory = lambda _, row: Contact(row[0], row[1], row[2]) cursor = data.cursor() cursor.execute(sql) # rows = cursor.fetchall() # contacts = [] # for row in rows: # contacts.append(Contact(row[0],row[1],row[2])) # return contacts return cursor.fetchall()
def __init__(self, conn: sqlite3.Connection, es_loader: ESLoader): def dict_factory(cursor: sqlite3.Cursor, row: sqlite3.Row): d = {} for idx, col in enumerate(cursor.description): d[col[0]] = row[idx] return d conn.row_factory = dict_factory self.sqlite = conn self.loader = es_loader
def update(data: Connection, old_contact: Contact, new_contact: Contact): sql_update = """ UPDATE contacts SET f_name=?, l_name=?, tel=? WHERE id = ?""" sql_query = """ SELECT id FROM contacts WHERE f_name=? AND l_name=? AND tel=?""" data.row_factory = lambda _, row: row[0] cursor = data.cursor() cursor.execute(sql_query, old_contact) old_id = cursor.fetchone() cursor.execute( sql_update, (new_contact.f_name, new_contact.l_name, new_contact.tel, old_id)) data.commit()
async def composers_tracks(composer_name: str, db: sqlite3.Connection = Depends(get_db)): db.row_factory = lambda c, x: x[0] data = db.execute( "SELECT name FROM tracks WHERE composer = ? ORDER BY name;", (composer_name, )).fetchall() if not data: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail={"error": f"Composer: {composer_name} not found"}, ) else: return data
def filter_processed_files(log_files:List[str], conn: sqlite3.Connection, key=apache_access_log_file_chronological_decode)->List[str]: processed_files = [] try: with conn: conn.row_factory = sqlite3.Row c = conn.cursor() for row in c.execute("SELECT content_hash, path FROM processed_log_file"): processed_files.append(row[0]) except sqlite3.OperationalError: LOGGER.warning( "Cannot read table processed_files in database so use all expanded files") log_files_hash = map(lambda f: (content_hash(f), f), log_files) effective_log_files = sorted(filter(lambda hf: hf[0] not in processed_files, log_files_hash), key=lambda hf: key(hf[1])) return list(effective_log_files)
def get_all_results(connection: sqlite3.Connection) -> List[Dict]: """ Returns all measurements. """ query = """ SELECT m.timestamp, d.download_rate, u.upload_rate, p.ping FROM measurements m JOIN downloads d ON m.measurement_id = d.measurement_id JOIN uploads u ON m.measurement_id = u.measurement_id JOIN pings p ON m.measurement_id = p.measurement_id """ connection.row_factory = sqlite3.Row cursor = connection.execute(query) return [dict(row) for row in cursor.fetchall()]
def get_selections(conn: sqlite3.Connection): """Get selections in "selections" table Args: conn (sqlite3.connection): Sqlite3 connection Yield: Dictionnaries with as many keys as there are columnsin the table. Example:: {"id": ..., "name": ..., "count": ..., "query": ...} """ conn.row_factory = sqlite3.Row return (dict(data) for data in conn.execute("SELECT * FROM selections"))
def get_result_for_measurement_id(connection: sqlite3.Connection, measurement_id: int) -> Dict: """ Returns the download, upload, and ping measurements for `measurement_id`. """ query = """ SELECT m.timestamp, d.download_rate, u.upload_rate, p.ping FROM measurements m JOIN downloads d ON m.measurement_id = d.measurement_id JOIN uploads u ON m.measurement_id = u.measurement_id JOIN pings p ON m.measurement_id = p.measurement_id WHERE m.measurement_id = ?; """ connection.row_factory = sqlite3.Row cursor = connection.execute(query, [measurement_id]) return dict(cursor.fetchone())
def get_results_for_start_and_end_date(connection: sqlite3.Connection, start_datetime: datetime, end_datetime: datetime) -> List[Dict]: """ Returns all measurements between `start_datetime` and `end_datetime`. """ query = """ SELECT m.timestamp, d.download_rate, u.upload_rate, p.ping FROM measurements m JOIN downloads d ON m.measurement_id = d.measurement_id JOIN uploads u ON m.measurement_id = u.measurement_id JOIN pings p ON m.measurement_id = p.measurement_id WHERE m.timestamp >= ? AND m.timestamp <= ?; """ connection.row_factory = sqlite3.Row cursor = connection.execute(query, [start_datetime, end_datetime]) return [dict(row) for row in cursor.fetchall()]
def update_videos(conn: sqlite3.Connection, api_auth, update_age_cutoff=86400, verbosity=1): verbosity_level_1 = verbosity >= 1 verbosity_level_2 = verbosity >= 2 verbosity_level_3 = verbosity >= 3 records_passed, updated, newly_inactive, newly_active, deleted = [0] * 5 conn.row_factory = sqlite3.Row cur = conn.cursor() cur.execute( """SELECT id, last_updated FROM videos WHERE title NOT IN (?, ?) AND NOT status = ? ORDER BY last_updated;""", ('unknown', 'YouTube Music', 'deleted')) records = {k: v for k, v in cur.fetchall()} cur.execute("""SELECT * FROM channels WHERE title is not NULL;""") channels = {k: v for k, v in cur.fetchall()} cur.execute("""SELECT * FROM tags;""") existing_tags = {v: k for k, v in cur.fetchall()} cur.execute("""SELECT * FROM videos_tags""") existing_videos_tags = {} for video_tag_entry in cur.fetchall(): existing_videos_tags.setdefault(video_tag_entry[0], []) existing_videos_tags[video_tag_entry[0]].append(video_tag_entry[1]) cur.execute("""SELECT * FROM videos_topics""") existing_topics_tags = {} for video_topic_entry in cur.fetchall(): existing_topics_tags.setdefault(video_topic_entry[0], []) existing_topics_tags[video_topic_entry[0]].append(video_topic_entry[1]) cur.close() now = datetime.utcnow() # for determining if the record is old enough dt_strp = datetime.strptime dt_format = '%Y-%m-%d %H:%M:%S' records_filtered_by_age = [ k for k, v in records.items() if (now - dt_strp(v, dt_format)).total_seconds() > update_age_cutoff ] sub_percent, sub_percent_int = calculate_subpercentage( len(records_filtered_by_age)) commit_interval = calculate_commit_interval(sub_percent_int) commit_interval_counter = 0 del records if verbosity_level_1: logger.info(f'\nStarting records\' updating...\n' + '-' * 100) for record in records_filtered_by_age: records_passed += 1 if records_passed % sub_percent_int == 0: yield ((records_passed // sub_percent) / 10, records_passed, updated, newly_inactive, newly_active, deleted) record = execute_query(conn, 'SELECT * FROM videos WHERE id = ?', (record, )) record = dict(record[0]) video_id = record['id'] for attempt in range(1, 6): api_response = youtube.get_video_info(video_id, api_auth) time.sleep(0.01 * attempt**attempt) if api_response: if api_response['items']: api_video_data = wrangle_video_record( api_response['items']) if len(api_video_data) >= 7: # a record must have at least 7 fields after # going through wrangle_video_record, otherwise it's a # record of a deleted video with no valid data api_video_data.pop('published_at', None) if 'channel_title' not in api_video_data: # the video is somehow available through API # (though some data is missing), but not on YouTube pass else: if record['status'] == 'inactive': record['status'] = 'active' newly_active += 1 if verbosity_level_1: logger.info( f'{get_record_id_and_title(record)}, ' f'is now active') record.update(api_video_data) else: record['status'] = 'deleted' deleted += 1 if verbosity_level_1: logger.info(f'{get_record_id_and_title(record)}, ' f'is now deleted from YT') else: if record['status'] == 'active': record['status'] = 'inactive' newly_inactive += 1 if verbosity_level_1: logger.info(f'{get_record_id_and_title(record)}, ' f'is now inactive') record['last_updated'] = datetime.utcnow().replace( microsecond=0) break else: continue if 'tags' in record: tags = record.pop('tags') add_tags_to_table_and_videos(conn, tags, video_id, existing_tags, existing_videos_tags, verbosity_level_3) # perhaps, the record should also be checked for tags that have # been removed from the updated version and have them removed from # the DB as well. However, keeping a fuller record, despite what # the video's uploader/author might think about its accuracy, # seems like a better option channel_id = record['channel_id'] if 'channel_title' in record: channel_title = record.pop('channel_title') try: if channel_title != channels[channel_id]: update_channel(conn, channel_id, channel_title, channels[channel_id], verbosity_level_1) channels[channel_id] = channel_title except KeyError: """The channel now has a different ID... it's a thing. One possible reason for this is large channels, belonging to large media companies, getting split off into smaller channels. That's what it looked like when I came across it. Only encountered this once in ~19k of my own records.""" add_channel(conn, channel_id, channel_title, verbosity_level_2) else: # Less than a handful of videos were not available on YouTube, but # were available through API, with channel id, but no channel title. # In Takeout, these had no channel title or id, but had regular # title/id. Very strange. if channel_id not in channels: add_channel(conn, channel_id) if 'relevant_topic_ids' in record: topics_list = record.pop('relevant_topic_ids') if existing_topics_tags.get(video_id): for topic in topics_list: if topic not in existing_topics_tags[video_id]: add_topic_to_video(conn, topic, video_id, verbosity_level_2) if update_video(conn, record, verbosity_level_3): updated += 1 commit_interval_counter += 1 if commit_interval_counter == commit_interval: conn.commit() commit_interval_counter = 0 conn.commit() execute_query(conn, 'VACUUM') conn.row_factory = None results = { 'records_processed': records_passed, 'records_updated': updated, 'newly_inactive': newly_inactive, 'newly_active': newly_active, 'deleted_from_youtube': deleted } if verbosity_level_1: logger.info(json.dumps(results, indent=4)) logger.info('\n' + '-' * 100 + f'\nUpdating finished')
def search(data: Connection, part_name: str) -> list[[Contact]]: sql = "SELECT f_name, l_name,tel FROM contacts WHERE l_name LIKE ?" data.row_factory = lambda _, row: Contact(row[0], row[1], row[2]) cursor = data.cursor() cursor.execute(sql, (part_name + "%", )) return cursor.fetchall()