Exemplo n.º 1
0
 def mark_volume_ok(self, volume_db_id):
     sql = "update journal_volume set is_crawled=true \
             where id={}".format(volume_db_id)
     cur = REMOTE_CONNS_POOL.new_db_cursor()
     cur.execute(sql)
     cur.close()
     print('Mark volume {} ok!'.format(volume_db_id))
Exemplo n.º 2
0
 def get_db_volume_item(self, volume_link):
     cur = REMOTE_CONNS_POOL.new_db_cursor()
     cur.execute("select is_crawled from journal_volume \
             where link = '{}' ".format(volume_link))
     data = cur.fetchall()
     cur.close()
     return data
Exemplo n.º 3
0
 def get_unfinished_journals(self,
                             single_area_relation=True,
                             open_access=True,
                             limit=100,
                             volume_links_got=True):
     journal_filter = ' '
     if limit < 0:
         limit = -limit
         desc_word = 'desc'
     else:
         desc_word = ''
     if single_area_relation:
         journal_filter += ' area_relation_cot=1 and '
     if open_access:
         journal_filter += ' open_access=true and '
     if volume_links_got != 'no limit':
         if volume_links_got:
             journal_filter += ' volume_links_got=true and '
         else:
             journal_filter += ' volume_links_got=false and '
     cur = REMOTE_CONNS_POOL.new_db_cursor()
     sql =  "select name,sjr_id,site_source,area_relation_cot,\
                 category_relation_cot,publisher,volume_links_got from journal \
           WHERE{}is_crawled_all_article=FALSE\
             and ( site_source like '%{}%') order by id {} limit {}"\
         .format(journal_filter,self.publisher_keyword,desc_word,limit)
     #print(sql)
     cur.execute(sql)
     data = cur.fetchall()
     cur.close()
     return {self.publisher_keyword: data}
Exemplo n.º 4
0
def journals_of_specific_index(index_sjr_id,
                               single_area_relation,
                               index_name,
                               open_access,
                               limit=100):
    if single_area_relation:
        single_area_relation_word = ' area_relation_cot=1 and '
    else:
        single_area_relation_word = ''
    if open_access:
        open_access_word = ' open_access=true and '
    else:
        open_access_word = ''
    cur = REMOTE_CONNS_POOL.new_db_cursor()
    if limit < 0:
        limit = -limit
        desc_word = 'desc'
    else:
        desc_word = ''
    sql = "select name,sjr_id,site_source,area_relation_cot,\
            category_relation_cot,publisher,volume_links_got from journal \
          WHERE{}{}(site_source like '%lsevier%' or site_source like '%ieee%' or site_source like '%springer%')and\
          is_crawled_all_article=FALSE and \
          sjr_id IN(\
            select journal_id from journal_{} \
            WHERE {}_id={} \
        ) ORDER by h_index {} limit {}".format(single_area_relation_word,
                                               open_access_word, index_name,
                                               index_name, index_sjr_id,
                                               desc_word, limit)
    #print(sql)
    cur.execute(sql)
    return cur.fetchall()
Exemplo n.º 5
0
 def get_unfinished_volume_links(self):
     if not self.JournalObj.volume_links_got:
         #第一次初始化
         self.create_new_volumes()
     if self.just_init:
         return []
     cur = REMOTE_CONNS_POOL.new_db_cursor()
     cur.execute(
         'select link,id from journal_volume \
           where journal_sjr_id={} and is_crawled=FALSE'\
             .format(self.JournalObj.sjr_id)
     )
     return cur.fetchall()
Exemplo n.º 6
0
 def create_new_volumes(self):
     print('Init volume_links of {}...'.format(self.JournalObj.name))
     if self.volume_links == []:
         #抓取volume_links失败时,避免对journal volume_links_got置1
         return
     for volume_link in self.volume_links:
         self.create_volume(volume_link)
     cur = REMOTE_CONNS_POOL.new_db_cursor()
     cur.execute('update journal set volume_links_got=TRUE \
           where sjr_id={}'.format(self.JournalObj.sjr_id))
     cur.close()
     print(' volume links created ok! <{}>'.\
           format(self.JournalObj.name))
Exemplo n.º 7
0
 def create_volume(self, volume_link):
     try:
         cur = REMOTE_CONNS_POOL.new_db_cursor()
         cur.execute(
             "insert into journal_volume(link,journal_sjr_id,is_crawled,create_time)"
             "values(%s,%s,%s,%s)", (volume_link, self.JournalObj.sjr_id,
                                     False, get_beijing_time()))
         print('[Success]Save ok volume_link: {} !'.format(volume_link))
     except psycopg2.IntegrityError as e:
         print('[Error] in volume_link create:\n{} '.format(str(e)))
     except psycopg2.OperationalError as e:
         print('[Error] in volume_link create:\nserver conn error{}'.format(
             str(e)))
     cur.close()
Exemplo n.º 8
0
 def mark_journal_ok(self):
     cur = REMOTE_CONNS_POOL.new_db_cursor()
     cur.execute('update journal set is_crawled_all_article = true\
          where sjr_id = {}'.format(self.JournalObj.sjr_id))
     cur.close()
Exemplo n.º 9
0
def categories_of_specific_area(area_sjr_id):
    cur = REMOTE_CONNS_POOL.new_db_cursor()
    cur.execute('select name,sjr_id from sjr_category \
            WHERE area_id={}'.format(area_sjr_id))
    return cur.fetchall()
Exemplo n.º 10
0
 def __init__(self, major_keyword):
     self.cur = REMOTE_CONNS_POOL.new_db_cursor()
     self.major_keyword = major_keyword