def migrate_stock_data(): mj_file = join(PROCESSED_DATA_DIR, 'stock_data_master.json') ks_file = join(PROCESSED_DATA_DIR, '20210201_partials_stock_data.json') with open(mj_file, 'r') as mjf, open(ks_file, 'r') as ksf: master_json = json.load(mjf) ks_json = json.load(ksf) symbols = set(master_json.keys()).union(set(ks_json.keys())) session = database.create_session() existing_rows = set([r.symbol for r in session.query(StockData).all()]) for symbol in symbols: if symbol in existing_rows: continue price = deep_get(master_json, [symbol, 'PRICE']) key_stats = deep_get(ks_json, [symbol, 'KEY_STATS']) advanced_stats = deep_get(master_json, [symbol, 'ADVANCED_STATS']) cash_flow = deep_get(master_json, [symbol, 'CASH_FLOW', 'cashflow'], []) row = compact_object({ 'symbol': symbol, 'price': price, 'key_stats': key_stats, 'advanced_stats': advanced_stats, 'cash_flow': None if len(cash_flow) == 0 else cash_flow[0] }) session.guarded_add(StockData(**row)) session = database.recreate_session_contingent(session)
def insert_tracks(tracks): session = database.create_session() try: artist_updates = {} artist_track_updates = {} for new_track_path, track in tracks.items(): # Create new row track_metadata = track.get_metadata() db_row = { k: v for k, v in track_metadata.items() if k in ALL_TRACK_DB_COLS } db_row[TrackDBCols.FILE_PATH.value] = new_track_path title = extract_unformatted_title( db_row[TrackDBCols.TITLE.value]) db_row[TrackDBCols.TITLE.value] = title try: # Persist row to DB session.add(Track(**db_row)) session.commit() except Exception as e: handle(e) session.rollback() continue # Update artists comment = load_comment( track_metadata.get(TrackDBCols.COMMENT.value), '{}') artist_updates_result = DataManager.update_artists( session, comment) artist_updates[title] = artist_updates_result # Add artist tracks track_id = session.query(Track).filter_by( file_path=new_track_path).first().id successful_artist_ids = [ a for a, s in artist_updates_result.items() if s != DBUpdateType.FAILURE.value ] artist_track_updates[title] = DataManager.insert_artist_tracks( session, track_id, successful_artist_ids) DataManager.print_database_operation_statuses( 'Artist updates', artist_updates) DataManager.print_database_operation_statuses( 'Artist track updates', artist_track_updates) except Exception as e: handle(e) session.rollback() raise e finally: session.close()
def sync_fields(): session = database.create_session() try: tracks = session.query(Track).all() DataManager.sync_track_fields(tracks) session.commit() except Exception as e: handle(e, 'Top-level exception occurred while syncing track fields') session.rollback() finally: session.close()
def __init__(self, session=None): """ Initializes data manager and track data. """ self.tracks = DataManager.load_tracks() self.camelot_map, self.collection_metadata = generate_camelot_map(self.tracks) self.session = session if session is not None else database.create_session() self.max_results = get_config_value(['HARMONIC_MIXING', 'MAX_RESULTS']) self.cutoff_threshold_score = get_config_value(['HARMONIC_MIXING', 'SCORE_THRESHOLD']) self.result_threshold = get_config_value(['HARMONIC_MIXING', 'RESULT_THRESHOLD']) TransitionMatch.db_session = self.session TransitionMatch.collection_metadata = self.collection_metadata
def ingest_tracks(input_dir, target_dir=PROCESSED_MUSIC_DIR): """ Ingest new tracks - extract tags, format fields, and create track table entries. """ session = database.create_session() try: input_files = get_audio_files(input_dir) tracks_to_save = {} for f in input_files: old_path = join(input_dir, f) # Load track and read ID3 tags try: track = AudioFile(old_path) except Exception as e: handle(e, 'Couldn\'t read ID3 tags for %s' % old_path) continue # Verify requisite ID3 tags exist id3_data = track.get_tags() if not REQUIRED_ID3_TAGS.issubset(set(id3_data.keys())): print( 'Can\'t ingest %s due to missing requisite ID3 tags' % old_path) continue # Copy to target directory new_path = join(target_dir, f) try: print('\nCopying:\t%s\nto:\t\t%s' % (old_path, new_path)) copyfile(old_path, new_path) except Exception as e: handle(e, 'Couldn\'t copy %s to target directory' % new_path) continue tracks_to_save[new_path] = track # Update database DataManager.insert_tracks(tracks_to_save) except Exception as e: handle(e) finally: session.close()
def find_artist_disparities(): session = database.create_session() try: for track in session.query(Track).all(): # Generate comment track_comment = track.comment if track_comment is None: try: track_model = AudioFile(track.file_path) track_metadata = track_model.get_metadata() track_comment = track_metadata.get( TrackDBCols.COMMENT.value, '{}') except Exception: track_comment = '{}' track_comment = load_comment(track_comment) # Extract artist names from comment artist_str = track_comment.get(ArtistFields.ARTISTS.value, '') remixer_str = track_comment.get(ArtistFields.REMIXERS.value, '') comment_artists = set([ ca for ca in [a.strip() for a in artist_str.split(',')] + [r.strip() for r in remixer_str.split(',')] if not is_empty(ca) ]) # Get artist names in DB artist_tracks = session.query(ArtistTrack).filter_by( track_id=track.id).all() artist_rows = set() for artist_track in artist_tracks: artist_row = session.query(Artist).filter_by( id=artist_track.artist_id).first() artist_rows.add(artist_row.name) # Find diff between comment and DB entries if len(comment_artists.difference(artist_rows)) > 0: print('Artist disparity for track %s' % track.title) print('Comment artist entry: %s' % str(comment_artists)) print('DB artist entries: %s' % str(artist_rows)) print('-------\n') except Exception as e: handle(e, 'Top-level exception occurred while syncing track fields') session.rollback() finally: session.close()
def __init__(self, record_type, source_dir=UNPROCESSED_DIR): self.record_type = record_type self.session = database.create_session() self.source_dir = source_dir self.track_files = get_audio_files(source_dir) self.cmd_overrides = {}
fv_row = { 'track_id': track_id, 'features': { smms.feature_name: smms.preprocess(feature_value) } } session.guarded_add(FeatureValue(**fv_row)) except Exception as e: handle(e) continue except Exception as e: handle(e) session.rollback() return finally: session.close() if __name__ == '__main__': warnings.simplefilter('ignore') session = database.create_session() tracks = set([t for t in session.query(Track).all()]) args = sys.argv run(set([int(t) for t in args[1:]]) if len(args) > 1 else set())
def delete_tracks(track_ids): session = database.create_session() try: # Delete entries from artist_track tables first deletion_statuses, artist_ids_to_update = DataManager.delete_artist_tracks( session, track_ids) DataManager.print_database_operation_statuses( 'Artist track deletion statuses', deletion_statuses) # Then, update artist track count column update_statuses = DataManager.update_artist_counts( session, artist_ids_to_update) DataManager.print_database_operation_statuses( 'Artist track count update statuses', update_statuses) # Then, remove references from the ingestion pipeline tables tag_record_deletion_statuses = defaultdict(lambda: {}) for track_id in track_ids: try: initial_tr = session.query(InitialTagRecord).filter_by( track_id=track_id).first() session.delete(initial_tr) tag_record_deletion_statuses['Initial Record'][ track_id] = DBUpdateType.DELETE.value except Exception as e: handle(e) tag_record_deletion_statuses['Initial Record'][ track_id] = DBUpdateType.FAILURE.value continue try: post_mik_tr = session.query(PostMIKTagRecord).filter_by( track_id=track_id).first() session.delete(post_mik_tr) tag_record_deletion_statuses['Post-MIK Record'][ track_id] = DBUpdateType.DELETE.value except Exception as e: handle(e) tag_record_deletion_statuses['Post-MIK Record'][ track_id] = DBUpdateType.FAILURE.value continue try: post_rb_tr = session.query( PostRekordboxTagRecord).filter_by( track_id=track_id).first() session.delete(post_rb_tr) tag_record_deletion_statuses['Post-RB Record'][ track_id] = DBUpdateType.DELETE.value except Exception as e: handle(e) tag_record_deletion_statuses['Post-RB Record'][ track_id] = DBUpdateType.FAILURE.value continue try: final_tr = session.query(FinalTagRecord).filter_by( track_id=track_id).first() session.delete(final_tr) tag_record_deletion_statuses['Final Record'][ track_id] = DBUpdateType.DELETE.value except Exception as e: handle(e) tag_record_deletion_statuses['Final Record'][ track_id] = DBUpdateType.FAILURE.value continue DataManager.print_database_operation_statuses( 'Tag record update statuses', tag_record_deletion_statuses) # Finally, delete the tracks themselves track_deletion_statuses = {} for track_id in track_ids: try: track = session.query(Track).filter_by(id=track_id).first() session.delete(track) track_deletion_statuses[ track_id] = DBUpdateType.DELETE.value except Exception as e: handle(e) track_deletion_statuses[ track_id] = DBUpdateType.FAILURE.value continue DataManager.print_database_operation_statuses( 'Track deletion statuses', track_deletion_statuses) print('Committing session') session.commit() except Exception as e: handle(e) print('Session not committed') finally: session.close()
def load_tracks(): session = database.create_session() try: return session.query(Track).all() finally: session.close()
def __init__(self): """ Initialize the assistant. """ self.session = database.create_session() self.transition_match_finder = TransitionMatchFinder(self.session)