def import_push_his(pushed_item_list, each_case): case_id = each_case[u'case_id'] count = each_case[u'count'] user_email = each_case[u'e_mail'] with db.connection(): for each_pushed_item in pushed_item_list: temp = { 'article_id': each_pushed_item['article_id'], 'case_id': case_id, 'user_email': user_email, 'inserted_timesort': int(time.time()) } try: count += 1 db.insert(r'smzdm_his', **temp) db.update( r'update smzdm_case_info set count=? where case_id=?', count, case_id) SMZDMDBLogger.info( "Import pushed item push_id:{} articlel_id:{}".format( case_id, each_pushed_item['article_id'])) except Exception as e: # 这个错误已经不能忍,要退出;否则,不能更新smzdm_his表的话,会一直在推送 traceback.print_exc() SMZDMDBLogger.error("ErrInfo:" + str(e)) exit()
def import_db(item_dict): try: with db.connection(): for name, item in item_dict.items(): if name == 'youhui' or name == 'haitao': for each in item: if db.select_one( r'select * from smzdm_item where article_id=?', each['article_id']): # 每次获取都要更新 db.update( r'update smzdm_item set article_comment=?, article_collection=?,article_worthy=?,article_unworthy=? ,worthy_percentage=? where article_id=?', each['article_comment'], each['article_collection'], each['article_worthy'], each['article_unworthy'], each['worthy_percentage'], each['article_id']) else: db.insert('smzdm_item', **each) elif name == 'faxian': for each in item: if db.select_one( r'select * from smzdm_item where article_id=?', each['article_id']): db.update( r'update smzdm_item set article_comment=?, article_collection=? where article_id=?', each['article_comment'], each['article_collection'], each['article_id']) else: db.insert('smzdm_item', **each) SMZDMDBLogger.info("Import db ok. ") except Exception as e: traceback.print_exc() SMZDMDBLogger.warning("ErrInfo: " + str(e))
def do_certspotter(): certspotter_api = certspotter.api() domain_data = certspotter_api.search(domain) if not verify(domain_data): logger.red('Failed to obtain data from %s' % logger.RED('certspotter')) return False else: logger.green('Successfully validated %s response' % logger.GREEN('certspotter')) crunched_data = crunch.get_certspotter_data(domain_data) if verify(crunched_data): db.insert(crunched_data) return crunched_data else: return False
def do_bufferoverrun(): bufferoverrun_api = bufferoverrun.api() domain_data = bufferoverrun_api.search(domain) if not verify(domain_data): logger.red('Failed to obtain data from %s' % logger.RED('bufferover.run')) return False else: logger.green('Successfully validated %s response' % logger.GREEN('bufferover.run')) crunched_data = crunch.get_bufferoverrun_data(domain_data) if verify(crunched_data): db.insert(crunched_data) return crunched_data else: return False
def do_crtsh(): crtsh_api = crtsh.api( ) # create an instance of the crtsh class. isnt really required but it was just incase multiple domains were going to be added domain_data = crtsh_api.search( domain ) # go to crtsh and return a tuple. index 0 being the 'source' string, and 1 being the json blob. if not verify(domain_data): logger.red('Failed to obtain data from %s' % logger.RED('crt.sh')) return False else: logger.green('Successfully validated %s response' % logger.GREEN('crt.sh')) crunched_data = crunch.get_crtsh_data(domain_data) if verify(crunched_data): db.insert(crunched_data) return crunched_data else: return False
def find_similarity(): global db_similarity for acronyms in db_acronyms: acronym_document_id = acronyms['document_id'] acronym_striped = acronyms['acronym']['striped'] acronym_context = acronyms['acronym']['context'] for full_forms in db_full_forms: full_form_acronym = full_forms['acronym'] if acronym_striped == full_form_acronym: full_form_document_id = full_forms['document_id'] full_form = full_forms['full_form']['full_form'] full_form_context = full_forms['full_form']['context'] similarity = calculate_similarity(acronym_context, full_form_context) insert(acronym_document_id, acronym_striped, acronym_context, full_form_document_id, full_form, full_form_context, similarity[OWN_COSINE_SIM], similarity[SKLEARN_COSINE_SIM], similarity[OWN_JACCARD_SIM], similarity[SKLEARN_JACCARD_SIM])