示例#1
0
    def __init__(self, schema_='metrics'):
        self.logger = setup_logging('AdsDataSqlSync', 'INFO')

        self.schema = schema_
        self.table = models.MetricsTable()
        self.table.schema = self.schema

        # used to buffer writes
        self.upserts = []
        self.tmp_update_buffer = []
        self.tmp_count = 0
        self.config = {}
        self.config.update(load_config())
示例#2
0
    def __init__(self, schema_='metrics'):
        # - Use app logger:
        #import logging
        #logger = logging.getLogger('ads-data')
        # - Or individual logger for this file:
        proj_home = os.path.realpath(
            os.path.join(os.path.dirname(__file__), '../'))
        self.config = load_config(proj_home=proj_home)
        self.logger = setup_logging(
            __name__,
            proj_home=proj_home,
            level=self.config.get('LOGGING_LEVEL', 'INFO'),
            attach_stdout=self.config.get('LOG_STDOUT', False))

        self.schema = schema_
        self.table = models.MetricsTable()
        self.table.schema = self.schema

        # used to buffer writes
        self.upserts = []
        self.tmp_update_buffer = []
        self.tmp_count = 0
示例#3
0
 def create_metrics_table(self, db_engine):
     db_engine.execute(CreateSchema(self.schema))
     table = models.MetricsTable()
     table.__table__.schema = self.schema
     table.__table__.create(db_engine)
     self.logger.info('metrics.py, metrics table created')
示例#4
0
    def row_view_to_metrics(self,
                            passed_row_view,
                            nonbib_db_conn,
                            row_view_schema='nonbib',
                            m=None):
        """convert the passed row view into a complete metrics dictionary"""
        if m is None:
            m = models.MetricsTable()
        # first do easy fields
        bibcode = passed_row_view.bibcode
        m.bibcode = bibcode
        m.refereed = passed_row_view.refereed
        m.citations = passed_row_view.citations
        m.reads = passed_row_view.reads
        m.downloads = passed_row_view.downloads

        m.citation_num = len(
            passed_row_view.citations) if passed_row_view.citations else 0
        m.author_num = max(len(passed_row_view.authors),
                           1) if passed_row_view.authors else 1
        m.reference_num = len(
            passed_row_view.reference) if passed_row_view.reference else 0

        #metrics_dict['citation_num'] = len(passed_row_view.get('citations', [])
        #metrics_dict['author_num'] = max(len(passed_row_view.get('authors'),[]),1)
        #metrics_dict['reference_num'] = len(passed_row_view.get('reference'),[])

        # next deal with papers that cite the current one
        # compute histogram, normalized values of citations
        #  and create list of refereed citations
        citations = passed_row_view.citations
        normalized_reference = 0.0
        citations_json_records = []
        refereed_citations = []
        citations_histogram = defaultdict(float)
        total_normalized_citations = 0.0
        if citations:
            q = 'select refereed,array_length(reference,1),bibcode from ' + row_view_schema + \
                '.RowViewM where bibcode in (select unnest(citations) from ' + row_view_schema + \
                '.RowViewM where bibcode=%s);'
            result = nonbib_db_conn.execute(q, bibcode)
            for row in result:
                citation_refereed = row[0] if row[0] else False
                citation_refereed = citation_refereed in (True, 't', 'true')
                len_citation_reference = int(row[1]) if row[1] else 0
                citation_bibcode = row[2]

                citation_normalized_references = 1.0 / float(
                    max(5, len_citation_reference))
                total_normalized_citations += citation_normalized_references
                normalized_reference += citation_normalized_references
                tmp_json = {
                    "bibcode": citation_bibcode.encode('utf-8'),
                    "ref_norm": citation_normalized_references,
                    "auth_norm": 1.0 / m.author_num,
                    "pubyear": int(bibcode[:4]),
                    "cityear": int(citation_bibcode[:4])
                }
                citations_json_records.append(tmp_json)
                if (citation_refereed):
                    refereed_citations.append(citation_bibcode)
                citations_histogram[
                    citation_bibcode[:4]] += total_normalized_citations

        m.refereed_citations = refereed_citations
        m.refereed_citation_num = len(refereed_citations)

        # annual citations
        today = datetime.today()
        resource_age = max(1.0, today.year - int(bibcode[:4]) + 1)
        m.an_citations = float(m.citation_num) / float(resource_age)
        m.an_refereed_citations = float(
            m.refereed_citation_num) / float(resource_age)

        # normalized info
        m.rn_citations = normalized_reference  # total_normalized_citations
        m.rn_citation_data = citations_json_records
        m.rn_citations_hist = dict(citations_histogram)
        m.modtime = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        return m