def __init__(self, schema_='metrics'): self.logger = setup_logging('AdsDataSqlSync', 'INFO') self.schema = schema_ self.table = models.MetricsTable() self.table.schema = self.schema # used to buffer writes self.upserts = [] self.tmp_update_buffer = [] self.tmp_count = 0 self.config = {} self.config.update(load_config())
def __init__(self, schema_='metrics'): # - Use app logger: #import logging #logger = logging.getLogger('ads-data') # - Or individual logger for this file: proj_home = os.path.realpath( os.path.join(os.path.dirname(__file__), '../')) self.config = load_config(proj_home=proj_home) self.logger = setup_logging( __name__, proj_home=proj_home, level=self.config.get('LOGGING_LEVEL', 'INFO'), attach_stdout=self.config.get('LOG_STDOUT', False)) self.schema = schema_ self.table = models.MetricsTable() self.table.schema = self.schema # used to buffer writes self.upserts = [] self.tmp_update_buffer = [] self.tmp_count = 0
def create_metrics_table(self, db_engine): db_engine.execute(CreateSchema(self.schema)) table = models.MetricsTable() table.__table__.schema = self.schema table.__table__.create(db_engine) self.logger.info('metrics.py, metrics table created')
def row_view_to_metrics(self, passed_row_view, nonbib_db_conn, row_view_schema='nonbib', m=None): """convert the passed row view into a complete metrics dictionary""" if m is None: m = models.MetricsTable() # first do easy fields bibcode = passed_row_view.bibcode m.bibcode = bibcode m.refereed = passed_row_view.refereed m.citations = passed_row_view.citations m.reads = passed_row_view.reads m.downloads = passed_row_view.downloads m.citation_num = len( passed_row_view.citations) if passed_row_view.citations else 0 m.author_num = max(len(passed_row_view.authors), 1) if passed_row_view.authors else 1 m.reference_num = len( passed_row_view.reference) if passed_row_view.reference else 0 #metrics_dict['citation_num'] = len(passed_row_view.get('citations', []) #metrics_dict['author_num'] = max(len(passed_row_view.get('authors'),[]),1) #metrics_dict['reference_num'] = len(passed_row_view.get('reference'),[]) # next deal with papers that cite the current one # compute histogram, normalized values of citations # and create list of refereed citations citations = passed_row_view.citations normalized_reference = 0.0 citations_json_records = [] refereed_citations = [] citations_histogram = defaultdict(float) total_normalized_citations = 0.0 if citations: q = 'select refereed,array_length(reference,1),bibcode from ' + row_view_schema + \ '.RowViewM where bibcode in (select unnest(citations) from ' + row_view_schema + \ '.RowViewM where bibcode=%s);' result = nonbib_db_conn.execute(q, bibcode) for row in result: citation_refereed = row[0] if row[0] else False citation_refereed = citation_refereed in (True, 't', 'true') len_citation_reference = int(row[1]) if row[1] else 0 citation_bibcode = row[2] citation_normalized_references = 1.0 / float( max(5, len_citation_reference)) total_normalized_citations += citation_normalized_references normalized_reference += citation_normalized_references tmp_json = { "bibcode": citation_bibcode.encode('utf-8'), "ref_norm": citation_normalized_references, "auth_norm": 1.0 / m.author_num, "pubyear": int(bibcode[:4]), "cityear": int(citation_bibcode[:4]) } citations_json_records.append(tmp_json) if (citation_refereed): refereed_citations.append(citation_bibcode) citations_histogram[ citation_bibcode[:4]] += total_normalized_citations m.refereed_citations = refereed_citations m.refereed_citation_num = len(refereed_citations) # annual citations today = datetime.today() resource_age = max(1.0, today.year - int(bibcode[:4]) + 1) m.an_citations = float(m.citation_num) / float(resource_age) m.an_refereed_citations = float( m.refereed_citation_num) / float(resource_age) # normalized info m.rn_citations = normalized_reference # total_normalized_citations m.rn_citation_data = citations_json_records m.rn_citations_hist = dict(citations_histogram) m.modtime = datetime.now().strftime('%Y-%m-%d %H:%M:%S') return m