def get_risks_by_date(project_id, base_date): return DBHelper().fetch(u" SELECT r.*" u" FROM risks r" u" INNER JOIN projects p ON p.code = r.code" u" WHERE p.id = %s" u" AND r.added >= '%s'" % (project_id, base_date))
def insert_recommendation(project_id, risk_id, base_date, distance, sample, steps, type): DBHelper().execute( u" INSERT INTO recommendations" u" (project_id, risk_id, base_date, distance, sample, steps, type)" u" VALUES (%s, %s, '%s', %s, %s, %s, '%s');" % (project_id, risk_id, base_date, distance, sample, steps, type))
def get_requirements_by_date(project_id, base_date): return DBHelper().fetch(u" SELECT r.*" u" FROM requirements r" u" INNER JOIN projects p ON p.id = r.project_id" u" WHERE p.id = %s" u" AND r.added >= '%s'" % (project_id, base_date))
def get_risk_by_id(id): risks = DBHelper().fetch(u"SELECT * FROM risks WHERE id=%s;" % (id)) if (len(risks)): return risks[0] return None
def execute_one() -> None: try: os.makedirs("./tmp/", exist_ok=True) db = DBHelper() job_id = db.get_import_job() if job_id is None: print("no job, wait") time.sleep(const.NO_JOB_WAIT) return try: job_doc = db.lock_import_job(job_id) except Exception as e: log("unable to lock, skip: ", e) # random backoff time time.sleep(0.01 * random.randint(1, 40)) return try: download(job_doc) add_to_db(job_doc, db) except Exception as e: log("execution error: ", e) # release lock job_doc["lock_timestamp"] = 0 job_doc.save() except Exception as e: log("unknown error: ", e) finally: log("clean up tmp folder")
def get_xxx_detail(text, table_name): dbHelp = DBHelper(user_config.db_host, user_config.db_port, user_config.db_user, user_config.db_password, user_config.db_database) soup = BeautifulSoup(text, "html.parser") # 如果不指定name,指定attrs,那么会找所有好友该属性的标签。 menu_lst_tag = soup.find(name="div", attrs={"class": "rstdtl-menu-lst"}) #找一层就可以了。 menu_head_content_tags = menu_lst_tag.findChildren(recursive=False) menu_type = "" for menu_head_content_tag in menu_head_content_tags: #print(menu_head_content_tag) if menu_head_content_tag.get("class")[0] == "rstdtl-menu-lst__heading": menu_type = my_util.getTagText(menu_head_content_tag) continue info = {} info["type"] = menu_type info["name"] = my_util.getTagText( menu_head_content_tag.find( name="p", attrs={"class": "rstdtl-menu-lst__menu-title"})) img_tag = menu_head_content_tag.find(name="img") img_href = img_tag.get("src") if img_tag else "" if img_href != "": info["img"] = down_file(img_href, img_href[img_href.rfind("/") + 1:]) info["price"] = my_util.getTagText( menu_head_content_tag.find( name="p", attrs={"class": "rstdtl-menu-lst__price"})) info["description"] = my_util.getTagText( menu_head_content_tag.find(name="p", attrs={"class": "rstdtl-menu-lst__ex"})) #print(info) save_data(table_name, info, dbHelp) dbHelp.closeDB()
def get_project_by_id(id): projects = DBHelper().fetch(u"SELECT * FROM projects WHERE id=%s;" % (id)) if (len(projects)): return projects[0] return None
def add_import_job(start_date_s: str, end_date_s: str) -> None: start_date = date.fromisoformat(start_date_s) end_date = date.fromisoformat(end_date_s) db = DBHelper() while start_date <= end_date: date_str = "{},{},{}".format(start_date.year, start_date.month, start_date.day) cmd = config.curl_command_template.format(date_str, date_str).strip() data = { '_id': start_date.isoformat(), 'curl_cmd': cmd, 'finished': False, 'lock_timestamp': 0, 'work_node': None, 'total_num': None, 'import_num': None } if start_date.isoformat() not in db.client["import_job"]: print(start_date) db.client["import_job"].create_document(data) start_date += timedelta(days=1)
def get_requirement_by_id(id): requirements = DBHelper().fetch( u"SELECT * FROM requirements WHERE id=%s;" % (id)) if (len(requirements)): return requirements[0] return None
def get_project_by_rand(): projects = DBHelper().fetch( u"SELECT * FROM projects ORDER BY RAND() LIMIT 1;") if (len(projects)): return projects[0] return None
def delete_recommendations(distance, sample, steps, type): DBHelper().execute( u" DELETE " u" FROM recommendations " u" WHERE CAST(distance AS DECIMAL(5,3))=CAST(%s AS DECIMAL(5,3))" u" AND CAST(sample AS DECIMAL(5,3))=CAST(%s AS DECIMAL(5,3))" u" AND CAST(steps AS DECIMAL(5,3))=CAST(%s AS DECIMAL(5,3))" u" AND type='%s';" % (distance, sample, steps, type))
def get_requirements_distance(req_a_id, req_b_id): distance = DBHelper().fetch(u" SELECT * " u" FROM requirements_distance " u" WHERE req_a_id=%s " u" AND req_b_id=%s;" % (req_a_id, req_b_id)) if (len(distance)): return distance[0] return None
def __init__(self): self.db = DBHelper() self.bot = Bot() self.controls = {} self.bot.message_loop( { 'chat': self.handle, 'callback_query': self.on_callback } )
def dump(filename: str, keep_auth: bool = False) -> None: data = {} dbh = DBHelper() dbs = list(filter(lambda x: x[0] != '_', dbh.client.all_dbs())) for db in dbs: dds = dbh.client[db].design_documents() if not keep_auth: dds = list(filter(lambda x: x['id'] != '_design/auth', dds)) data[db] = dds with open(filename, 'w') as outfile: json.dump(data, outfile)
def get_risks_distance(risk_a_id, risk_b_id): distance = DBHelper().fetch(u" SELECT * " u" FROM risks_distance " u" WHERE risk_a_id=%s " u" AND risk_b_id=%s;" % (risk_a_id, risk_b_id)) if (len(distance)): return distance[0] return None
def get_project_by_rand(): project = DBHelper().fetch(u" SELECT *" u" FROM projects" u" WHERE id < 18" u" ORDER BY rand()" u" LIMIT 1") if (len(project)): return project[0] return None
def get_projects_non_processed(distance, sample, steps, type): return DBHelper().fetch( u" SELECT p.*" u" FROM projects p" u" WHERE p.id NOT IN(" u" SELECT project_id" u" FROM recommendations" u" WHERE CAST(distance AS DECIMAL(5,1)) = %s" u" AND CAST(sample AS DECIMAL(5,1)) = %s" u" AND CAST(steps AS DECIMAL(5,1)) = %s" u" AND type = '%s'" u" )" u" ORDER BY p.id ASC" % (distance, sample, steps, type))
def get_party_detail(links): dbHelp = DBHelper(user_config.db_host, user_config.db_port, user_config.db_user, user_config.db_password, user_config.db_database) for link in links: info = {} info["link"] = link text = get_html(link) if text == "EOF" or text == "ERR": print("获取失败:" + link) continue soup = BeautifulSoup(text, "html.parser") #如果不指定name,指定attrs,那么会找所有好友该属性的标签。 title_tag = soup.find(name="h3", attrs={"class": "course-dtl__course-title"}) info["name"] = my_util.getTagText(title_tag) img_div_tag = soup.find(name="div", attrs={"class": "course-dtl__img"}) img_tag = img_div_tag.find(name="img") if img_div_tag else None img_href = img_tag.get("src") if img_tag else "" if img_href != "": info["img"] = down_file(img_href, img_href[img_href.rfind("/") + 1:]) desc_tag = soup.find(name="div", attrs={"class": "course-dtl__desc"}) info["description"] = my_util.getTagText(desc_tag) table_tag = soup.find( name="table", attrs={"class": "c-table c-table--form course-dtl__data-table"}) info_map = { "コース料金": "price", "品数": "num", "滞在可能時間": "free_time", "コース内容": "content" } if table_tag: trs = table_tag.select("tbody tr") for tr in trs: th = tr.find(name="th") th_text = my_util.getTagText(th) if th_text in info_map: info[info_map.get(th_text)] = my_util.getTagText( tr.find(name="td")) # print(info) save_data("STORE_PARTY", info, dbHelp) dbHelp.closeDB()
def harvest_twitter_tweet_process_meta_update() -> None: db = DBHelper() count = 0 for doc in db.client["harvest_twitter_tweet"]: if "locked" in doc["process_meta"]: # old version doc["process_meta"] = {'lock_timestamp': 0, 'processed': False} doc.save() count += 1 if count % 100 == 0: print(count) print("finished.", count)
def dump_all(dump_dir: str) -> None: data = {} dbh = DBHelper() dbs = list(filter(lambda x: x[0] != '_', dbh.client.all_dbs())) url_obj = furl(config.couchdb_host) url_obj.username = config.couchdb_user url_obj.password = config.couchdb_auth_token url = url_obj.url for db in dbs: _url = url + db + "/_all_docs?include_docs=true&attachments=true" print(_url) cmd = r"""curl "{}" -G -o "{}" """ cmd = cmd.format(_url, os.path.join(dump_dir, db + ".json")) exit_code = subprocess.call(cmd, shell=True) if exit_code != 0: raise Exception("unable to download")
def tweet_data_melb_time_update() -> None: db = DBHelper() count = 0 for doc in tqdm.tqdm(db.client["tweet_data"], total=db.client["tweet_data"].doc_count()): if "created_at_melb_time" not in doc["data"]: # old version time = doc["data"]["created_at"] melb_time = datetime.strptime(time, '%a %b %d %H:%M:%S %z %Y')\ .replace(tzinfo=timezone.utc).astimezone(pytz.timezone('Australia/Melbourne')) doc["data"]["created_at_melb_time"] = \ [melb_time.year, melb_time.month, melb_time.day, melb_time.hour, melb_time.minute, melb_time.second] doc.save() count += 1 if count % 100 == 0: print(count) print("finished.", count)
from conf import TOKEN, DB_NAME from db_helper import DBHelper BTN_TODAY, BTN_TOMORROW, BTN_MONTH, BTN_REGION, BTN_DUA = ('⌛️ Bgun', '⏳ Erta', "📅 To'liq taqvim", '🇺🇿 Mintaqalar', '🤲 Duo') main_buttons = ReplyKeyboardMarkup( [[BTN_TODAY], [BTN_TOMORROW, BTN_MONTH], [BTN_REGION], [BTN_DUA]], resize_keyboard=True) STATE_REGION = 1 STATE_CALENDAR = 2 user_region = dict() db = DBHelper(DB_NAME) def region_buttons(): regions = db.get_regions() buttons = [] tmp_b = [] for region in regions: tmp_b.append( InlineKeyboardButton(region['name'], callback_data=region['id'])) if len(tmp_b) == 2: buttons.append(tmp_b) tmp_b = [] return buttons
import time from db_helper import DBHelper from data_center import DataCenter from Utils import utils if __name__ == "__main__": db = DBHelper() dc = DataCenter(db) last_day = "" while True: if last_day == "": last_day = db.get_max_day() day = str(utils.today()) if day != last_day: dc.record(day) last_day = day time.sleep(60)
from google.cloud import language from operator import attrgetter from db_helper import DBHelper try: projects = DBHelper().fetch(u"SELECT * FROM projects WHERE domain IS NULL OR domain = '';") for i, p in enumerate(projects): language_client = language.LanguageServiceClient() document = language.types.Document(content=p['description_en'], type=language.enums.Document.Type.PLAIN_TEXT) response = language_client.classify_text(document) categories = response.categories if (len(categories) > 0): category = max(categories, key=attrgetter('confidence')) DBHelper().execute(u"UPDATE projects SET domain='%s' WHERE id=%s;" % (category.name, p['id'])) except Exception as ex: print(ex) finally: print(u'classify done!')
def worker(thread_name, thread_idx): print(thread_name) print(thread_idx) print("################") # page_idx 从1开始 page_idx = thread_idx + 1 #page start idx handled_item = 0 start_idx_in_page = 0 #如果有之前有错误,现在从错误地方开始。 # 使用readline()读文件 # f = open("err.txt",'r') # line = f.readline() # if line: # handled_item = int(line) # page_idx = handled_item//PAGE_ITEM + 1 # f.close() file_name = thread_name + "_err.txt" if os.path.exists(file_name): with open(file_name, "r") as f: line = f.readline() if line: handled_item = int(line) start_idx_in_page = handled_item % PAGE_ITEM page_idx = page_idx + (handled_item // PAGE_ITEM) * THREAD_NUM os.remove(file_name) # with open("err.txt", "a+") as f: # f.seek(0) # seek to file head. # line = f.readline() # if line: # handled_item = int(line) # page_idx = handled_item // PAGE_ITEM + 1 # os.remove("err.txt") # print(page_idx) #多线程。 每个线程持有一个数据库对象。 dbHelp = DBHelper(user_config.db_host, user_config.db_port, user_config.db_user, user_config.db_password, user_config.db_database) while True: url = user_config.ginza_url + str(page_idx) + user_config.search_opt print(thread_name + " Get:" + url) text = get_html(url) if text == "EOF": break if text == "ERR": with open(file_name, "w") as f: f.write(str(handled_item)) break # print(text) links = parse_store_link(text, start_idx_in_page) # print(links) result = get_detail_info(links, dbHelp) if result == -1: with open(file_name, "w") as f: f.write(str(handled_item)) break handled_item += result page_idx += THREAD_NUM start_idx = 0 # 新的一页 当然从0开始。 dbHelp.closeDB()
def get_all_projects(): return DBHelper().fetch(u'SELECT * FROM projects;')
def update_project(project_id): DBHelper().execute( u"UPDATE projects SET methodology='%s' WHERE id=%s;" % (random.choice([ 'TRADITIONAL', 'TRADITIONAL', 'AGILE', 'AGILE', 'AGILE', 'HYBRID' ]), project_id))
import os, random from dc_gan import * from db_helper import DBHelper generator = Generator() trainer = AutoEncoderTrainer(generator) EPOCH_NUM = 100 BATCH_SIZE = 1 DIS_TRAIN_NUM = 1 GEN_TRAIN_NUM = 1 load_path = None output_path = './result/auto_encoder' dataset = DBHelper('./downloads/bing2/preprocessed_256', 2) # if load_path is not None: # generator.load_weights('./%s/model_g' % load_path) # discriminator.load_weights('./%s/model_d' % load_path) for epoch in range(EPOCH_NUM): ae_loss = 0 iter_num = 0 for x_batch in dataset.train_ds: data_batch = dataset.get_data(x_batch) ae_loss += trainer.train(data_batch) iter_num += 1 break
def update_project(project_id): DBHelper().execute( u"UPDATE projects SET size='%s' WHERE id=%s;" % (random.choice([ 'SMALL', 'SMALL', 'STANDARD', 'STANDARD', 'STANDARD', 'LARGE' ]), project_id))
def update_risk(code, risk_id): DBHelper().execute(u"UPDATE risks SET code='%s' WHERE id=%s;" % (code, risk_id))