def export_users_schema_process(): dump_date = get_dump_date(MONGO, ARCHIVES_BASE_FOLDER) if is_dump_date_valid(dump_date): export_destination = os.path.join(ARCHIVES_BASE_FOLDER, f"{MONGO}-{USERS}-{dump_date}.csv") mongodb = MongoDB() mongodb.export_users_schema(export_destination, USERS_SCHEMA)
def restore_dump_process(): dump_date = get_dump_date(MONGO, ARCHIVES_BASE_FOLDER) if is_dump_date_valid(dump_date): mongodb = MongoDB() bson_file = os.path.join( get_dump_folder_endpoint(ARCHIVES_BASE_FOLDER, MONGO, dump_date), f'{USERS}.bson') mongodb.restore_db(bson_file, USERS_SCHEMA)
def __init__(): find_time = int((time.time())*1000)-300000 filetime = time.strftime("%Y%m%d%H%M%S", time.localtime()) filename = 'inc_app_{}.dat'.format(filetime) # f = open(r'./app_full.dat', 'a', encoding='utf-8') mog_content = MongoDB(environment=environment,db_name=db_name_contents).client mog_topic = MongoDB(environment=environment,db_name=db_name_topic).client mys=MysqlDB(environment=environment,db_name=da_name_sql).getClient() cur = mys.cursor()
def mongodb(self) -> MongoDB: if not self.has_mongodb(): raise NoConnectionError('No MongoDB configuration') mongodb = MongoDB(self._mongodb_configuration) mongodb.connect() self._instances.append(mongodb) return mongodb
def remove_duplicates_process(): dump_date = get_dump_date(MONGO, ARCHIVES_BASE_FOLDER) if is_dump_date_valid(dump_date): mongodb = MongoDB() mongodb.connect() mongodb.remove_duplicates() mongodb.disconnect()
def drop_database_process(): dump_date = get_dump_date(MONGO, ARCHIVES_BASE_FOLDER) if is_dump_date_valid(dump_date): mongodb = MongoDB() mongodb.connect() mongodb.drop_database() mongodb.disconnect()
def remove_documents_with_null_values_process(): dump_date = get_dump_date(MONGO, ARCHIVES_BASE_FOLDER) if is_dump_date_valid(dump_date): mongodb = MongoDB() mongodb.connect() mongodb.remove_documents_with_null_values() mongodb.disconnect()
def start_requests(self): self.client = MongoDB(environment=environment, db_name=db_name).client zds = self.client.dailypops.hotword.find({"article_state": 0}) for k in zds: parm = k.get("hotword", "") url = "http://newssearch.chinadaily.com.cn/rest/en/search?keywords={}&sort=dp&page=0&curType=story&type=&channel=&source=".format( parm) yield scrapy.Request(url=url, callback=self.parse, meta={"data": k})
import uvicorn from fastapi import FastAPI, Path, Query, HTTPException from starlette.responses import JSONResponse from typing import Optional from fastapi.middleware.cors import CORSMiddleware from database.mongodb import MongoDB from config.development import config from model.Tree import createShoptreeModel, updateShoptreeModel, createNameshoptreeModel, updateNameshoptreeModel mongo_config = config["mongo_config"] mongo_db = MongoDB( mongo_config["host"], mongo_config["port"], mongo_config["user"], mongo_config["password"], mongo_config["auth_db"], mongo_config["db"], mongo_config["collection"], ) mongo_db._connect() mongo_config_area = config["mongo_config_area"] mongo_db_area = MongoDB( mongo_config_area["host"], mongo_config_area["port"], mongo_config_area["user"], mongo_config_area["password"], mongo_config_area["auth_db"], mongo_config_area["db"], mongo_config_area["collection"],
#? { #? "host": "localhost", #? "port": 27017, #? "user": "******", #? "password": "******", #? "auth_db": "admin", #? "db": "bakerypj", #? "collection": "bakery", #? } mongo_db = MongoDB( mongo_config["host"], mongo_config["port"], mongo_config["user"], mongo_config["password"], mongo_config["auth_db"], mongo_config["db"], mongo_config["collection"], ) mongo_db._connect() #!เชื่อมต่อกับDBแล้ว app = FastAPI() #!ทำการใช้API app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], )
import uvicorn from fastapi import FastAPI, Path, Query, HTTPException from starlette.responses import JSONResponse from typing import Optional from fastapi.middleware.cors import CORSMiddleware from database.mongodb import MongoDB from config.development import config from model.student import createStudentModel, updateStudentModel mongo_config = config["mongo_config"] mongo_db = MongoDB( # เรียกใช้MongoDBเเละส่งค่า mongo_config["host"], mongo_config["port"], mongo_config["user"], mongo_config["password"], mongo_config["auth_db"], mongo_config["db"], mongo_config["collection"], ) mongo_db._connect() # mongodb app = FastAPI() app.add_middleware( # ตัวคั่นกลาง CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], )
from starlette.responses import JSONResponse from typing import Optional from fastapi.middleware.cors import CORSMiddleware from database.mongodb import MongoDB from config.development import config from model.student import createStudentModel, updateStudentModel mongo_config = config["mongo_config"] mongo_db = MongoDB( # import MongoDB จาก mongodb.py mongo_config["host"], mongo_config["port"], mongo_config["user"], mongo_config["password"], mongo_config["auth_db"], mongo_config["db"], mongo_config["collection"], ) mongo_db._connect() # เชื่อมกับ mongodb app = FastAPI() app.add_middleware( # middleare เหมือนตัวคั่นกลาง เป็นส่วนนึงของ api CORSMiddleware, allow_origins=["*"], # * คือ ทุกคนสามารถเข้าถึงได้หมด allow_credentials=True, allow_methods=["*"], allow_headers=["*"], )
mongo_config = config["mongo_config"] print("Mongo_config", mongo_config) # "host" :"location", # "port" : "27017", # "user" : "root", # "password" : "root", # "auth" : "admin", # "db" : "waterpark", # "collection" : "rides", mongo_db = MongoDB( mongo_config["host"], mongo_config["port"], mongo_config["user"], mongo_config["password"], mongo_config["auth_db"], mongo_config["db"], mongo_config["collection"], ) mongo_db._connect() # กับdb # use API app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"],
def search(): client = MongoDB(environment=environment, db_name=db_name).client zds = client.dailypops.hotword.find({"question_state": 0}).limit(1000) # driver = webdriver.Firefox(options=options) # driver = webdriver.Firefox() path = r'C:\Users\EDZ\Documents\WeChat Files\wodexinwolai\FileStorage\File\2019-05/chromedriver' chrome_options = Options() chrome_options.add_argument('--headless') driver = webdriver.Chrome(executable_path=path, chrome_options=chrome_options) begin_url = "https://www.google.com/" driver.get(begin_url) driver.find_element_by_id("gb_70").click() driver.find_element_by_id("identifierId").send_keys( "*****@*****.**") driver.find_element_by_id("identifierNext").click() time.sleep(3) driver.find_element_by_xpath("//input[@name='password']").send_keys( "jiexin88") driver.find_element_by_id("passwordNext").click() time.sleep(5) for k in zds: # url = 'https://www.google.com/search?sxsrf=ACYBGNRgCAf2dRIVd6dwrtD4B82G2GPK7A%3A1569392173168&ei=LQaLXe35CcmmmAX5k50o&q=trump&oq={}&gs_l=psy-ab.3..35i39l2j0i131j0i3j0i131j0j0i3j0i131l2j0.4634.7621..8636...1.2..3.398.1517.0j7j1j1......0....1..gws-wiz.....10..0i71j0i67j0i131i67j35i362i39j0i131i273j0i273.jn_vf2Z0qbo&ved=0ahUKEwitxPq3qevkAhVJE6YKHflJBwUQ4dUDCAs&uact=5' url = "https://www.google.com/search?biw=1536&bih=890&ei=ZomJXceaOtCbmAWyi7egCg&q={}&oq=commp&gs_l=psy-ab.3.1.0i67l3j0i10l7.8182.9766..12758...0.0..0.135.565.0j5......0....1..gws-wiz.......0.KfvdJE90Egw" print('参数', k) hotword = k.get("hotword", "") hotword_id = k.get("hotword_id", "") event_id = k.get("event_id", "") hotword = hotword.split(" ") print('hotword_list', hotword) parms = '+'.join(hotword) # parms = 'competition' print('parms', parms) url = url.format(parms) # url_ = url.format(parms) print('url_', url) driver.get(url) # driver.find_element_by_class_name("related-question-pair").click() response = driver.page_source # print(response) html = etree.HTML(response) titles = html.xpath( '//div[@class="related-question-pair"]//div[@class="match-mod-horizontal-padding hide-focus-ring cbphWd"]//text()' ) print('titles', titles) contents = Selector(text=response).xpath( '//div[@class="related-question-pair"]//div[@class="gy6Qzb kno-ahide"]' ).extract() for title, content in zip(titles, contents): con = Selector(text=content).xpath( '//div[contains(@class,"mod")]//text()').extract() con = ' '.join(con) items = {} items['question_id'] = md5_(title + hotword_id) items['event_id'] = event_id items['hotword_id'] = hotword_id items['question'] = title items['answer'] = con items['source'] = '' items['release_time'] = '2019-09-25' items['time_stamp'] = int(time.time()) items['entity'] = [] items['label'] = [] items['static_page'] = 0 items['nlp_state'] = 0 print(items) client.dailypops.question.update( {'question_id': items['question_id']}, items, True) s1 = {'hotword_id': hotword_id} s2 = {'$set': {'question_state': 1}} client.dailypops.hotword.update(s1, s2) time.sleep(3)