def main(): length = 500 n_pairs = 50 cfg = load_config('..\\cfg.json') dg = DataGatherer(cfg) dg.video_catalog = get_catalog(length) dg.pairs = get_pairs(dg.video_catalog, n_pairs) sorted_catalog = dg.sort_by_pairs() print sorted_catalog
db = create_engine('postgres://%s%s/%s' % (dbuser, dbhost, dbname)) con = None con = psycopg2.connect( database=dbname, host=dbhost, user=dbuser, password=dbpass) return con if __name__ == '__main__': con = get_db_conn() data_gather = DataGatherer() data_updater = EventUpdater() data_gather.set_target_file() data_gather.download_zip() data_gather.unzip_download() df = data_gather.get_csv_dataframe() df = data_updater.initial_df_clean(df) df = data_updater.get_states(df) df = data_updater.normalize_goldstein(df) df = data_updater.clean_df(df) df = data_updater.aggregate_data(df) df = data_updater.batch_update(df, con) data_gather.delete_recent_files()
def count_all_video(path): pairs = dg.read_raw_pairs(path) dg.fill_video_catalog(pairs) print 'total videos: {}'.format(len(dg.get_video_catalog())) def update_stats(dg, limit=None): dg.update_video_catalog(limit=limit) def print_words(dg): words = dg.get_all_words() for k in sorted(words.keys()): print k, print ': ', print words[k] if __name__ == '__main__': cfg = load_config('..\\cfg.json') print 'starting {} v.{}\n'.format(PROJECT_NAME, VERSION) dg = DataGatherer(cfg) print dg.calculate_title_rank('Cyanide And Happiness- Can Of Paint', lambda x: x.rank1) print dg.calculate_title_rank('Geoff Ramsey: Expert Parent - Drunk Tank Animated Adventures', lambda x: x.rank1) print dg.calculate_title_rank('Raju Shrivastav - Bevda Train Main', lambda x: x.rank1) print dg.calculate_title_rank('Hitler Rants Parodies Outro', lambda x: x.rank1) print dg.calculate_title_rank('Man Goes Crazy with ROAD RAGE & A GUN', lambda x: x.rank1) #cl = Correlator() #print cl.calculate_c(dg.db_handler.get_all_videos(), lambda x: x.rank1, lambda x: x.rank1) #dg.update_views()