Пример #1
0
 def create_table(self):
     db = DB_Maker()
     sql = """create table car_info (
                             [id]            integer PRIMARY KEY autoincrement,
                             [name]         varchar (10),
                             [image]      varchar (30),
                             [founded]      varchar (30),
                             [models]      varchar (30),
                             [website]      varchar (30)
                         )"""
     print(sql)
     db.create_table_by_sql(sql=sql)
Пример #2
0
    def correct_names(self):
        with open('./data/author_url_dic.json', 'r') as f:
            self.author_url_dic = json.load(f)

        with open('./data/author_dic.json', 'r') as f:
            self.author_dic = json.load(f)

        with open('./data/skip_author.json', 'r') as f:
            skip = set(json.load(f))

        from db_maker import DB_Maker
        db_maker = DB_Maker()

        candidates = []
        for x in self.author_url_dic.keys():
            if db_maker.is_kr(
                    x):  # ('.' in x or '-' in x or len(x.split()) > 3)
                candidates.append(x)

        candidates += [
            smooth(x) for x in get_file('./data/kr_hard_coding.txt')
        ]
        candidates = sorted(list(set(candidates)))

        print(len(candidates))

        for i, author in enumerate(candidates):
            print(i, '/', len(candidates))
            if not (author in self.author_url_dic) or author in skip:
                continue
            url = self.author_url_dic[author]
            html = BS(url)

            primary = smooth(html.find('span', {'class': 'name primary'}).text)
            secondary_list = [
                smooth(x.text)
                for x in html.find_all('span', {'class': 'name secondary'})
            ]

            print(primary, secondary_list)

            skip.add(primary)
            for name in secondary_list:
                if name and name != name.lower():
                    skip.add(name)
                    self.author_dic[name] = primary

            with open('./data/author_dic.json', 'w') as f:
                json.dump(self.author_dic, f)
            with open('./data/skip_author.json', 'w') as f:
                json.dump(sorted(list(skip)), f)
Пример #3
0
 def __init__(self):
     self.url = "http://chengyu.haoshiwen.org"
     self.headers = {
         'Host':
         "chengyu.haoshiwen.org",
         'Connection':
         'keep-alive',
         'Accept':
         'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
         'user-agent':
         ('Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 '
          '(KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36')
     }
     self.db = DB_Maker()
Пример #4
0
 def make_data(self):
     db = DB_Maker()
     for uppercase in au:
         url = "http://www.chebiaow.com/logo/{}.html".format(uppercase)
         response = requests.get(url=url, headers=self.headers)
         soup = BeautifulSoup(response.content, 'html.parser')
         items = soup.select("li .zq")
         for item in items:
             url2 = "http://www.chebiaow.com{}".format(item.attrs['href'])
             response2 = requests.get(url2, headers=self.headers)
             soup2 = BeautifulSoup(response2.content, 'html.parser')
             image = soup2.select(".xq-left>.img>img")[0].get("src")
             name = soup2.select(".xq-right>li>a")[0].get_text()
             founded = soup2.select(".xq-right>li>span")[2].get_text()
             models = soup2.select(".xq-right>li>span")[4].get_text()
             website = soup2.select(".xq-right>li>span")[6].get_text()
             db.insert(self.insert_sql, (name, image, founded, models, website))
     pass
Пример #5
0
import traceback
from updater import Updater
from db_maker import DB_Maker
from datetime import datetime
from utils import webhook
import json
import traceback


if __name__ == '__main__':
    try:
        webhook("Update start!")
        current_year = datetime.now().year
        my_updater = Updater()
        my_db_maker = DB_Maker()
        my_db_maker.load_model()

        recent_year_dict = json.load(open('./data/recent_year_dict.json'))
        for conf, dblp in my_updater.get_conf2dblp().items():
            fromyear = recent_year_dict[conf] + 1
            toyear = current_year
            print(conf, fromyear, toyear)
            success_years = my_updater.update_conf(conf, dblp, fromyear, toyear)
            for year in success_years:
                while not my_db_maker.make_conf_year_db(conf, year):
                    pass
            #if len(success_years) == 0:
            #    webhook(conf + " is already updated")
        
        # For manual update
        # while not my_db_maker.make_conf_year_db('iclr', 2020):
Пример #6
0
 def query_data(self):
     db = DB_Maker()
     print(db.fetch_one(self.query_sql))