def create_table(self): db = DB_Maker() sql = """create table car_info ( [id] integer PRIMARY KEY autoincrement, [name] varchar (10), [image] varchar (30), [founded] varchar (30), [models] varchar (30), [website] varchar (30) )""" print(sql) db.create_table_by_sql(sql=sql)
def correct_names(self): with open('./data/author_url_dic.json', 'r') as f: self.author_url_dic = json.load(f) with open('./data/author_dic.json', 'r') as f: self.author_dic = json.load(f) with open('./data/skip_author.json', 'r') as f: skip = set(json.load(f)) from db_maker import DB_Maker db_maker = DB_Maker() candidates = [] for x in self.author_url_dic.keys(): if db_maker.is_kr( x): # ('.' in x or '-' in x or len(x.split()) > 3) candidates.append(x) candidates += [ smooth(x) for x in get_file('./data/kr_hard_coding.txt') ] candidates = sorted(list(set(candidates))) print(len(candidates)) for i, author in enumerate(candidates): print(i, '/', len(candidates)) if not (author in self.author_url_dic) or author in skip: continue url = self.author_url_dic[author] html = BS(url) primary = smooth(html.find('span', {'class': 'name primary'}).text) secondary_list = [ smooth(x.text) for x in html.find_all('span', {'class': 'name secondary'}) ] print(primary, secondary_list) skip.add(primary) for name in secondary_list: if name and name != name.lower(): skip.add(name) self.author_dic[name] = primary with open('./data/author_dic.json', 'w') as f: json.dump(self.author_dic, f) with open('./data/skip_author.json', 'w') as f: json.dump(sorted(list(skip)), f)
def __init__(self): self.url = "http://chengyu.haoshiwen.org" self.headers = { 'Host': "chengyu.haoshiwen.org", 'Connection': 'keep-alive', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'user-agent': ('Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 ' '(KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36') } self.db = DB_Maker()
def make_data(self): db = DB_Maker() for uppercase in au: url = "http://www.chebiaow.com/logo/{}.html".format(uppercase) response = requests.get(url=url, headers=self.headers) soup = BeautifulSoup(response.content, 'html.parser') items = soup.select("li .zq") for item in items: url2 = "http://www.chebiaow.com{}".format(item.attrs['href']) response2 = requests.get(url2, headers=self.headers) soup2 = BeautifulSoup(response2.content, 'html.parser') image = soup2.select(".xq-left>.img>img")[0].get("src") name = soup2.select(".xq-right>li>a")[0].get_text() founded = soup2.select(".xq-right>li>span")[2].get_text() models = soup2.select(".xq-right>li>span")[4].get_text() website = soup2.select(".xq-right>li>span")[6].get_text() db.insert(self.insert_sql, (name, image, founded, models, website)) pass
import traceback from updater import Updater from db_maker import DB_Maker from datetime import datetime from utils import webhook import json import traceback if __name__ == '__main__': try: webhook("Update start!") current_year = datetime.now().year my_updater = Updater() my_db_maker = DB_Maker() my_db_maker.load_model() recent_year_dict = json.load(open('./data/recent_year_dict.json')) for conf, dblp in my_updater.get_conf2dblp().items(): fromyear = recent_year_dict[conf] + 1 toyear = current_year print(conf, fromyear, toyear) success_years = my_updater.update_conf(conf, dblp, fromyear, toyear) for year in success_years: while not my_db_maker.make_conf_year_db(conf, year): pass #if len(success_years) == 0: # webhook(conf + " is already updated") # For manual update # while not my_db_maker.make_conf_year_db('iclr', 2020):
def query_data(self): db = DB_Maker() print(db.fetch_one(self.query_sql))