def update_metrics_all(self, db_conn, nonbib_conn, row_view_schema='ingest', start_offset=1, end_offset=-1): """update all elements in the metrics database between the passed id offsets""" # we request one block of rows from the database at a time start_time = time.time() step_size = 1000 count = 0 offset = start_offset max_rows = self.config['MAX_ROWS'] sql_sync = nonbib.NonBib(row_view_schema) Session = sessionmaker(bind=nonbib_conn) session = Session() session.execute('set search_path to {}'.format(row_view_schema)) for current_row in session.query(models.NonBibTable).yield_per(100): metrics_dict = self.row_view_to_metrics(current_row, nonbib_conn, row_view_schema) self.save(db_conn, metrics_dict) count += 1 if max_rows > 0 and count > max_rows: break if count % 1000 == 0: self.logger.debug( 'metrics.py, metrics count = {}'.format(count)) self.flush(db_conn) end_time = time.time()
def update_metrics_bibcode( self, bibcode, db_conn, nonbib_conn, row_view_schema='nonbib'): #, delta_schema='delta'): """changed bibcodes are in sql table, for each we update metrics record""" Metrics_Session = sessionmaker() metrics_sess = Metrics_Session(bind=db_conn) metrics_sess.execute('set search_path to {}'.format('metrics')) sql_sync = nonbib.NonBib(row_view_schema) row = sql_sync.get_by_bibcode(nonbib_conn, bibcode) metrics_old = metrics_sess.query(models.MetricsTable).filter( models.MetricsTable.bibcode == bibcode).first() metrics_new = self.row_view_to_metrics(row, nonbib_conn, row_view_schema, metrics_old) if metrics_old: metrics_sess.merge(metrics_new) else: metrics_sess.add(metrics_new) metrics_sess.commit() metrics_sess.close() self.flush(db_conn)
def update_metrics_changed(self, db_conn, nonbib_conn, row_view_schema='ingest'): """changed bibcodes are in sql table, for each we update metrics record""" Nonbib_Session = sessionmaker(bind=nonbib_conn) nonbib_sess = Nonbib_Session() nonbib_sess.execute('set search_path to {}'.format(row_view_schema)) Metrics_Session = sessionmaker() metrics_sess = Metrics_Session(bind=db_conn) metrics_sess.execute('set search_path to {}'.format('metrics')) sql_sync = nonbib.NonBib(row_view_schema) query = nonbib_sess.query(models.NonBibTable) count = 0 for delta_row in nonbib_sess.query( models.NonBibDeltaTable).yield_per(100): row = sql_sync.get_by_bibcode(nonbib_conn, delta_row.bibcode) metrics_old = metrics_sess.query(models.MetricsTable).filter( models.MetricsTable.bibcode == delta_row.bibcode).first() metrics_new = self.row_view_to_metrics(row, nonbib_conn, row_view_schema, metrics_old) if metrics_old: metrics_sess.merge(metrics_new) else: metrics_sess.add(metrics_new) metrics_sess.commit() if (count % 10000) == 0: self.logger.debug('delta count = {}, bibcode = {}'.format( count, delta_row.bibcode)) count += 1 nonbib_sess.close() metrics_sess.close() self.flush(db_conn)
def update_metrics_test(self, bibcode, row_view_schema='ingest'): sql_sync = nonbib.NonBib(row_view_schema) row_view_bibcode = sql_sync.get_by_bibcode(bibcode) metrics_dict = self.row_view_to_metrics(row_view_bibcode, sql_sync) self.save(metrics_dict) self.flush()