def __init__(self, base_url, start_url, domain_name): self.base_url = base_url self.start_url = start_url self.domain_name = domain_name self.max_count = 10 self.crawled = set() self.db = DataBase(self.start_url)
def load_data(path, participant, name_seq, name_track, labels_folder, list_features=[]): """ Load data from csv file """ list_states = [] data_base = pd.read_csv(path + 'xsens/allFeatures_csv/' + participant + '/' + name_seq + '.csv') ref_data = DataBase(path + '/', name_seq) list_all_features = list(data_base.columns.values) del list_all_features[0:2] dim_features = np.ones(len(list_all_features)) time = data_base['timestamp'] if (len(list_features) == 0): list_features = list_all_features labels, states = ref_data.load_labels_ref3A(time, name_track, participant, 1) data = data_base[list_features].as_matrix() for state in states: if (state not in list_states): list_states.append(state) list_states = sorted(list_states) return data, labels, time, list_states, list_features
def load_data(path, name_seq, name_track, labels_folder): """ Load data from csv file """ list_states = [] data_base = pd.read_csv(path + '/data_csv/' + name_seq + '.csv') ref_data = DataBase(path + '/', name_seq) list_features = list(data_base.columns.values) del list_features[0:2] dim_features = np.ones(len(list_features)) time = data_base['timestamps'] labels, states = ref_data.load_labels_refGT(time, name_track, 'labels_3A') data = data_base[list_features].as_matrix() for state in states: if (state not in list_states): list_states.append(state) list_states = sorted(list_states) return data, labels, time, list_states, list_features
class Processor: """Reads data from the queue and puts it to the database Work in progress: Needs to design data model. Decide on protocols being supported. Shutdowns on SIGINT that is send from the main function. """ def __init__(self, data_queue): self.data_queue = data_queue self.data_base = DataBase() def process_data(self): try: while True: geo_data = self.data_queue.get() log.debug("Received data from queue") self.normalize_data(geo_data) log.debug("Writing data to the base...") self.data_base.write_data(geo_data) log.debug("Data writen") except KeyboardInterrupt: log.debug("Closing processor") sys.exit(0) def normalize_data(self, data): """Place holder for implementing additional data parsing""" time.sleep(2)
def run(self): data_base = DataBase() print(data_base.create_table()) root = tk.Tk(className="Simoni") root.geometry("300x250") root.resizable(False, False) home = HomePage(master=root, lab=self._lab) home.mainloop()
class Table(metaclass=ABCMeta): """ Abstract class describes a table of database Contains some basic methods """ DB_PATH = '../data/movie_ticket_system.sqlite' DATABASE = DataBase.connect(DB_PATH) def __init__(self, db: DataBase = None): self.data_base = db or Table.DATABASE @abstractmethod def add(self, *args, **kwargs): raise NotImplemented @abstractmethod def remove(self, *args, **kwargs): raise NotImplemented def get_all(self, name: str): request = f""" SELECT * FROM {name} """ return self.data_base.select_all(request) def get_by_field(self, name: str, field_pair: FieldPair): request = f""" SELECT * FROM {name} WHERE {field_pair.field_name} = ? """ return self.data_base.select_one(request, (field_pair.field_value,))
def main(): error_msg = "Please enter a valid command from the following " \ "['GET', 'SET', 'INCR', 'DEL', 'DELVALUE', 'MULTI', 'DISCARD', 'EXEC', 'DECR']" db = DataBase() while True: var = input() try: command = var.split()[0] if command in [ 'GET', 'SET', 'INCR', 'DEL', 'DELVALUE', 'MULTI', 'DISCARD', 'EXEC', 'DECR' ]: db.execute_command(var) except (IndexError, KeyError, ValueError): print(error_msg) continue
def browse_user(id_user): browse_user_query= """ select * from users where id_user= {id_user}""".format(id_user=id_user) db = DataBase() ps_connection = db.session() ps_cursor = ps_connection.cursor() ps_cursor.execute(browse_user_query) user = ps_cursor.fetchone() ps_cursor.close() return user
def bot_tag(payload, web_client, *args, **kwargs): """get author user id [user if new thread or parent_user_id if old thread]""" author_user_id = payload.get('user', '') if payload.get('parent_user_id') is None \ else payload.get('parent_user_id') """get randomly chosen user for answering with module > author_user_id.module""" data_base = DataBase() lead_user = data_base.get_lead_user(user=author_user_id) data_base.end_work() reply_data = { 'thread_ts': payload.get('ts', '') if payload.get('thread_ts') is None else payload.get('thread_ts'), 'lead_user': lead_user if lead_user is not None else '', } web_client.chat_postMessage( **(HelloMsg(payload.get('channel', '')).please_message(**reply_data)))
def main(): D = DB('Poland') last_update = D.get_last_record_date() try: pickle_file = open( os.path.dirname(os.path.abspath(__file__)) + '\Process_Object.pickle', 'rb') Pl = pickle.load(pickle_file) pickle_file.close() new_cases_pred = Pl.cases_pred new_deaths_pred = Pl.deaths_pred tomorrow_date = datetime.date.today() + datetime.timedelta(days=1) except FileNotFoundError: print('File doesnt exist') new_cases_pred = 'Error' new_deaths_pred = 'Error' tomorrow_date = 'Error' if os.path.isfile(Pl.path + '\Process_Object.pickle'): os.remove() return render_template('index.html', **locals())
def module_check(payload, web_client): module_number = int(re.findall(r'\d+', payload['actions'][0]['text']['text'])[-1]) """add to payload -> to form dict -> to pass it to the DataBase method (with user data)""" sql_data = {'user': payload['user']['id'], 'module': module_number} data_base = DataBase() data_base.update_module(**sql_data) data_base.end_work() web_client.chat_postMessage(**ThxMessage(payload['user']['id'])('modules', module_number)) """response to the user with correct module selected"""
def task_done(payload, web_client, *args, **kwargs): lead_user = payload.get('item_user') check_user = payload.get('user') if check_user != lead_user: data_base = DataBase() data_base.update_points(user=lead_user) data_base.end_work() web_client.chat_postMessage( **AlertMessage(lead_user).success_message()) else: web_client.chat_postMessage(**AlertMessage(check_user)())
def insert_user(self): from datetime import datetime data_base = DataBase() try: cursor = data_base.connection.cursor() cursor.execute( """INSERT INTO users (name, last_name, email, face_encodings, created_in) VALUES (?,?,?,?,?)""", (self._name, self._last_name, self._email, self._face_encodings, datetime.today().strftime('%Y-%m-%d-%H:%M:%S'))) data_base.connection.commit() cursor.close() return "Usuário cadastrado com sucesso!" except: return "Ocorreu um erro na inserção do usuário"
def setUp(self): self.database = DataBase()
class DataBaseTest(unittest.TestCase): def setUp(self): self.database = DataBase() def test_set(self): self.database.SET('A', '10') self.assertEqual(self.database._storage, {'A': '10'}) def test_set_twice(self): self.database.SET('A', '10') self.database.SET('A', '10') self.assertEqual(self.database._storage, {'A': '10'}) def test_set_two_values(self): self.database.SET('A', '10') self.database.SET('B', '10') self.assertEqual(self.database._storage, {'A': '10', 'B': '10'}) def test_set_in_transaction(self): self.database._storage = {'A': '10'} self.database._transaction_number = 1 self.database._rollback_cache[1] = [] self.database.SET('B', '20') self.assertEqual(self.database._storage, {'A': '10', 'B': '20'}) self.assertEqual(self.database._rollback_cache, {1: [('UNSET', 'B')]}) def test_set_in_transaction_twice(self): self.database._storage = {'A': '10'} self.database._transaction_number = 1 self.database._rollback_cache[1] = [] self.database.SET('B', '20') self.database.SET('B', '20') self.assertEqual(self.database._storage, {'A': '10', 'B': '20'}) self.assertEqual(self.database._rollback_cache, {1: [('UNSET', 'B'), ('SET', 'B', '20')]}) def test_set_in_transaction_rolling_back(self): self.database._storage = {'A': '10'} self.database._transaction_number = 1 self.database._rollback_cache[1] = [] self.database._rolling_back = True self.database.SET('B', '20') self.assertEqual(self.database._storage, {'A': '10', 'B': '20'}) self.assertEqual(self.database._rollback_cache, {1: []}) def test_get(self): self.database._storage = {'A': '10'} capturedOutput = StringIO() sys.stdout = capturedOutput self.database.GET('A') sys.stdout = sys.__stdout__ self.assertEqual(capturedOutput.getvalue(), '10\n') def test_get_null(self): capturedOutput = StringIO() sys.stdout = capturedOutput self.database.GET('A') sys.stdout = sys.__stdout__ self.assertEqual(capturedOutput.getvalue(), 'NULL\n') def test_unset(self): self.database._storage = {'A': '10'} self.database.UNSET('A') self.assertEqual(self.database._storage, {}) def test_unset_no_key(self): self.database._storage = {} self.database.UNSET('A') self.assertEqual(self.database._storage, {}) def test_unset_in_transaction(self): self.database._storage = {'A': '10'} self.database._transaction_number = 1 self.database._rollback_cache[1] = [] self.database.UNSET('A') self.assertEqual(self.database._storage, {}) self.assertEqual(self.database._rollback_cache, {1: [('SET', 'A', '10')]}) def test_unset_in_transaction_twice(self): self.database._storage = {'A': '10'} self.database._transaction_number = 1 self.database._rollback_cache[1] = [] self.database.UNSET('A') self.database.UNSET('A') self.assertEqual(self.database._storage, {}) self.assertEqual(self.database._rollback_cache, {1: [('SET', 'A', '10')]}) def test_unset_in_transaction_rolling_back(self): self.database._storage = {'A': '10'} self.database._transaction_number = 1 self.database._rollback_cache[1] = [] self.database._rolling_back = True self.database.UNSET('A') self.assertEqual(self.database._storage, {}) self.assertEqual(self.database._rollback_cache, {1: []}) def test_counts(self): self.database._storage = {'A': '10'} capturedOutput = StringIO() sys.stdout = capturedOutput self.database.COUNTS('10') sys.stdout = sys.__stdout__ self.assertEqual(capturedOutput.getvalue(), '1\n') def test_counts_no_value(self): self.database._storage = {'A': '11'} capturedOutput = StringIO() sys.stdout = capturedOutput self.database.COUNTS('10') sys.stdout = sys.__stdout__ self.assertEqual(capturedOutput.getvalue(), '0\n') def test_counts_two_values(self): self.database._storage = {'A': '10', 'B': '10'} capturedOutput = StringIO() sys.stdout = capturedOutput self.database.COUNTS('10') sys.stdout = sys.__stdout__ self.assertEqual(capturedOutput.getvalue(), '2\n') def test_find(self): self.database._storage = {'A': '10'} capturedOutput = StringIO() sys.stdout = capturedOutput self.database.FIND('10') sys.stdout = sys.__stdout__ self.assertEqual(capturedOutput.getvalue(), 'A\n') def test_find_no_value(self): self.database._storage = {'A': '11'} capturedOutput = StringIO() sys.stdout = capturedOutput self.database.FIND('10') sys.stdout = sys.__stdout__ self.assertEqual(capturedOutput.getvalue(), '\n') def test_find_two_values(self): self.database._storage = {'A': '10', 'B': '10'} capturedOutput = StringIO() sys.stdout = capturedOutput self.database.FIND('10') sys.stdout = sys.__stdout__ self.assertEqual(capturedOutput.getvalue(), 'A B\n') def test_begin(self): self.assertEqual(self.database._transaction_number, 0) self.assertEqual(self.database._rollback_cache, {}) self.database.BEGIN() self.assertEqual(self.database._transaction_number, 1) self.assertEqual(self.database._rollback_cache, {1: []}) def test_begin_twice(self): self.assertEqual(self.database._transaction_number, 0) self.assertEqual(self.database._rollback_cache, {}) self.database.BEGIN() self.assertEqual(self.database._transaction_number, 1) self.assertEqual(self.database._rollback_cache, {1: []}) self.database.BEGIN() self.assertEqual(self.database._transaction_number, 2) self.assertEqual(self.database._rollback_cache, {1: [], 2: []}) def test_rollback(self): self.assertEqual(self.database._transaction_number, 0) self.assertEqual(self.database._rollback_cache, {}) self.assertEqual(self.database._rolling_back, False) self.database._transaction_number = 1 self.database._storage = {'A': '10', 'B': '10'} self.database._rollback_cache = {1: [('UNSET', 'B')]} self.database.ROLLBACK() self.assertEqual(self.database._transaction_number, 0) self.assertEqual(self.database._rollback_cache, {}) self.assertEqual(self.database._rolling_back, False) self.assertEqual(self.database._storage, {'A': '10'}) def test_rollback_same_key_edited_twice(self): self.assertEqual(self.database._transaction_number, 0) self.assertEqual(self.database._rollback_cache, {}) self.assertEqual(self.database._rolling_back, False) self.database._transaction_number = 1 self.database._storage = {'A': '10'} self.database._rollback_cache = { 1: [('UNSET', 'B'), ('SET', 'B', '10')] } self.database.ROLLBACK() self.assertEqual(self.database._transaction_number, 0) self.assertEqual(self.database._rollback_cache, {}) self.assertEqual(self.database._rolling_back, False) self.assertEqual(self.database._storage, {'A': '10'}) def test_rollback_nested(self): self.assertEqual(self.database._transaction_number, 0) self.assertEqual(self.database._rollback_cache, {}) self.assertEqual(self.database._rolling_back, False) self.database._transaction_number = 2 self.database._storage = {'A': '10', 'B': '10'} self.database._rollback_cache = { 1: [('UNSET', 'B')], 2: [('SET', 'C', '3')] } self.database.ROLLBACK() self.assertEqual(self.database._transaction_number, 1) self.assertEqual(self.database._rollback_cache, {1: [('UNSET', 'B')]}) self.assertEqual(self.database._rolling_back, False) self.assertEqual(self.database._storage, { 'A': '10', 'C': '3', 'B': '10' }) def test_commit(self): self.database._transaction_number = 1 self.database._rollback_cache = {1: [('UNSET', 'B')]} self.database.COMMIT() self.assertEqual(self.database._transaction_number, 0) self.assertEqual(self.database._rollback_cache, {})
def check_id(cls, idx: int): if DataBase.get_size() < idx or idx <= 0: raise ValueError("Table does not contain current id") return idx
def reason(form): msymptom = [form.biti.data*1, form.bisai.data*2, form.fashao.data*3 , form.toutong.data*4 , form.jirousuantong.data*5 , form.kesou.data*6 ] mallegy = [form.wmhjgm.data*1 , form.dyxajfgm.data*2 , form.ymsfgm.data*3] while 0 in msymptom: msymptom.remove(0) while 0 in mallegy: mallegy.remove(0) print msymptom print mallegy DataBase.clear() CnclsBase.clear() DataBase.ask(msymptom,mallegy) DataBase.insert() # 推理 while DataBase.newdatanum()>0: print "newdata还有数据条数:" + str(DataBase.newdatanum()) fact = DataBase.pickone(); #取出一个事实,进行推理 for i in range(KnowledgeBase.num()): rule = KnowledgeBase.pickone(i); if rule[0]==fact[0]: print "get conclusion:"+rule[1] #可信度计算 fea_fact = float(fact[1]) fea_rule = float(rule[2]) fea_concl = fea_rule*max(0,fea_fact) #结论归纳 if rule[1].encode('UTF-8') in ConclusionBase.param: # 如果是一个结论 # print "put into conclusion" CnclsBase.putone((rule[1],fea_concl))#放入结论库 else: #如果是一个推论 # print "put into newdata" DataBase.nputone((rule[1],fea_concl))#放入一个推论 #删除已使用的事实 DataBase.ndeleteone(fact) #把结论总结一下(之前的推理对同一个结论得出不同的可信度) for j in ConclusionBase.param: kind = CnclsBase.pickkind(j) if len(kind)==0: break; else: tmp = float(kind[0][2]) for i in range(len(kind)-1): nex = float(kind[i+1][2]) if tmp*nex<0: tmp = (tmp+nex)/(1-min(abs(tmp),abs(nex))) elif (tmp<0 and nex<0): tmp = tmp+nex+tmp*nex else: tmp = tmp+nex-tmp*nex CnclsBase.deletekind(kind[0][1]) CnclsBase.putone((kind[0][1],tmp)) # 打印结论,并解释 for i in range(CnclsBase.num()): #对所有结论检查一遍 clu = CnclsBase.pickone(i); for j in CnclsBase.interpretor: #对每个结论,检查所有解释字段,以找到匹配的字段 if j[0].decode('UTF-8')==clu: #结论匹配 print 'conclusion matched' print clu symptoms = [] for symptom in j[1:len(j)]: if DataBase.checksymptom(symptom): symptoms.append(symptom) CnclsBase.update((clu,symptoms)) string = CnclsBase.show() return string
import base64 import cv2 import os, sys import numpy import time import io import requests import string import random from data_base import DataBase from bson.objectid import ObjectId import matplotlib import matplotlib.pyplot as plt dbclass = DataBase() dbclass.see_collection() dbclass.create_collection("Brand") r1 = dbclass.see_all_post() dbclass.create_collection("Model") r2 = dbclass.see_all_post() #dbclass.create_collection("Stat") #r3 = dbclass.see_all_post() #dbclass.del_all_post() def func_stat(): for u in r1: dbclass.create_collection("Model") posts_ = dbclass.find_many_post({"brend_id":ObjectId(u['_id'])}) dbclass.create_collection("Brand") dbclass.upd_post(ObjectId(u['_id']), {"list":posts_ }) #print (posts_, len(posts_))
def __init__(self, data_queue): self.data_queue = data_queue self.data_base = DataBase()
for participant, nbr in zip(list_participant, range(len(list_participant))): path_data = path + '/' + participant + '/data_csv/' print('Loading: ' + participant) list_files = os.listdir(path_data) list_files.sort() for file in list_files: name_seq = os.path.splitext(file)[0] info_participant.append(participant) info_sequences.append(name_seq) data_base = pd.read_csv(path_data + file) ref_data = DataBase(path + '/' + participant, name_seq) list_features = list(data_base.columns.values) del list_features[0:2] dim_features = np.ones(len(list_features)) time = data_base['timestamps'] labels, states = ref_data.load_labels_refGT( time, name_track, 'labels_3A') real_labels.append(labels) data_win2.append(data_base[list_features].as_matrix()) timestamps.append(time) for state in states:
from data_base import DataBase from question_base import QuestionBase from help_question_base import HelpQuestionBase from aiogram import Bot, Dispatcher from aiogram.contrib.fsm_storage.memory import MemoryStorage from aiogram.dispatcher.filters.state import State, StatesGroup from secret import TOKEN is_test = True bot = Bot(TOKEN) storage = MemoryStorage() dispatcher = Dispatcher(bot, storage=storage) print('bot ready') users_db = DataBase('users.db') lessons_db = DataBase('lessons.db') questions_base = QuestionBase('question_base.data') help_questions_base = HelpQuestionBase('help_questions_base.data') class Lessons(StatesGroup): group = State() lesson = State() answer = State() message = State() class Message(StatesGroup): admin = State() question = State() user = State()
def __init__(self): self._parser = argparse.ArgumentParser(description="PyTorch Tutorial") self._parser.add_argument("--train", action="store_true") self._parser.add_argument("--test", action="store_true") self._args = self._parser.parse_args() def is_train(self): return self._args.train def is_test(self): return self._args.test if __name__ == "__main__": arg_parser = ArgParser() data_base = DataBase("./data") NET_PATH="./cifar_net.pth" if arg_parser.is_train(): net = Net() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) trainer = Trainer().\ set_optimizer(optimizer).\ set_num_epochs(2).\ set_train_loader(data_base.train_loader()).\ set_net(net).\ set_criterion(criterion) trainer.train() torch.save(net.state_dict(), NET_PATH)
def add_new_member(payload, web_client, *args, **kwargs): user_id = payload['user'] if user_id != cfg.bot_info['user_id']: """case when new member was added""" web_client.chat_postMessage(**HelloMsg(user_id)()) web_client.chat_postMessage(**ModuleMsg(user_id)()) data_base = DataBase() data_base.add_user(**payload) data_base.end_work() else: """case when bot was added [need to specify correct bot_user_id if config.py]""" answer = web_client.conversations_members(channel=payload['channel']) print(f'join action return data: {answer}') if answer.get('members') is not None: data_base = DataBase() for member in answer['members']: if member != cfg.bot_info['user_id']: web_client.chat_postMessage(**HelloMsg(member)()) web_client.chat_postMessage(**ModuleMsg(member)()) data_base.add_user(user=member) data_base.end_work()
i = 0 for participant, nbr in zip(list_participant, range(len(list_participant))): path_data = '/home/amalaise/Documents/These/experiments/ANDY_DATASET/AndyData-lab-onePerson/xsens/allFeatures_csv/Participant_909/' print('Loading: ' + participant) list_files = ['Participant_909_Setup_A_Seq_3_Trial_4.csv'] for file in list_files: name_seq = os.path.splitext(file)[0] info_participant.append(participant) info_sequences.append(name_seq) ref_data = DataBase(path + '/' + participant, name_seq) data, labels, time, list_s, list_features = tools.load_data( path, participant, name_seq, name_track, labels_folder) data_base = pd.DataFrame(data, columns=list_features) # time = data_base['timestamp'] # labels, states = ref_data.load_labels_refGT(time, name_track, 'labels_3A') # ref_data.load_labels_ref(name_track, labels_folder) # labels = ref_data.get_real_labels(time) # states = ref_data.get_list_states() real_labels.append(labels) data_win2.append(data_base[best_features].as_matrix())
#!/usr/bin/env python # -*- coding: UTF-8 -*- import chardet from data_base import DataBase from knowledge_base import KnowledgeBase from conclusion_base import ConclusionBase #初始化数据库,插入用户数据 DataBase = DataBase() KnowledgeBase = KnowledgeBase() CnclsBase = ConclusionBase() KnowledgeBase.insert() def reason(form): msymptom = [form.biti.data*1, form.bisai.data*2, form.fashao.data*3 , form.toutong.data*4 , form.jirousuantong.data*5 , form.kesou.data*6 ] mallegy = [form.wmhjgm.data*1 , form.dyxajfgm.data*2 , form.ymsfgm.data*3] while 0 in msymptom: msymptom.remove(0) while 0 in mallegy: mallegy.remove(0) print msymptom print mallegy DataBase.clear() CnclsBase.clear() DataBase.ask(msymptom,mallegy) DataBase.insert()
labels = [] list_seq = [] name_track = config[config_type]["name_track"] for participant in list_participant: path_seq = path_data + participant + '/' print('Loading: ' + participant) list_files = os.listdir(path_seq) list_files.sort() for file in list_files: name_seq = os.path.splitext(file)[0] data_base = DataBase(path_seq, name_seq) data_base.load_mvnx_data(path_seq) data, time, list_features, dim_features = tools.load_data_from_dataBase( data_base, config) file_label = path_labels + participant + '/' + os.path.splitext( name_seq)[0] + '.labels.csv' real_labels, list_states = tools.load_labels_ref(time, file_label, name_track, GT=1) time = np.expand_dims(time, axis=1) all_data = np.concatenate((time, data), axis=1) df = pd.DataFrame(all_data, index=range(len(time))) list_features.insert(0, 'timestamp')
def add_in_vocabulary(user_id, initial_word: str, result_word: str): base = DataBase() base.add_word(user_id, initial_word, result_word)
def show_term(): win32gui.ShowWindow(the_program_to_hide, win32con.SW_RESTORE) def hide_term(): win32gui.ShowWindow(the_program_to_hide, win32con.SW_HIDE) if __name__ == '__main__': scrap_time = '11:15' update_time = '18:30' today = datetime.datetime.today() today_ = today.strftime('%d.%m.%Y') keys = ['New cases', 'New deaths'] W = DB('Poland') last_day_db = W.get_last_record_date() message = 'Wait for a next scrap...' while (True): print(message) hour_now = datetime.datetime.now().hour min_now = datetime.datetime.now().minute time_now = str(hour_now) + ':' + str(min_now) try: if time_now >= scrap_time and W.get_last_record_date() != today_: os.system('cls') D = DR('Poland') D.show_raport() W.insert(D) Pl = P(W)
version="beta 0.0.2", description=""" Доброго времени суток! Мой сервис предоставляет следующие методы: - **/add**: добавляет запрос в базу; - **/stat**: выдает статистику по запросу за временной интервал (кол-во объявлений); - **/top5**: выдает статистику по запросу за временной интервал (топ 5 объявлений). """) db = DataBase() # ------------------------------------------------------- # Add request from requirement # The new record in "Requests" is formed # Then add record in the "Logs" table # After birth our request will be resending once per hour # ------------------------------------------------------- @app.post("/add", tags=['Add request'], description=""" На вход подается поисковый запрос (формат задан). Необходимо указать 'phrase' - поисковую фразу и 'region' - регион. Пример ввода: {"phrase": "Iphone 12 Max Pro", "region": "moskva"}
class Spider: def __init__(self, base_url, start_url, domain_name): self.base_url = base_url self.start_url = start_url self.domain_name = domain_name self.max_count = 10 self.crawled = set() self.db = DataBase(self.start_url) def get_response(self, url): try: response = requests.get(url) return response.status_code except: return False def get_html(self, url): try: html_body = requests.get(url) soup = BeautifulSoup(html_body.text, features="lxml") html = soup.prettify() return html except: return '' def gather_links(self, page_url): html_string = '' #import pdb; pdb.set_trace() try: response = urlopen(page_url) if 'text/html' in response.getheader('Content-Type'): soup = BeautifulSoup(response) html_string = soup.prettify() finder = LinkFinder(self.base_url, page_url) finder.feed(html_string) except Exception as e: print(str(e)) return set() #import pdb; pdb.set_trace() urls = finder.page_links() result = set() for i in urls: if self.domain_name in i: result.add(i) return result def get_domain_name(self, url): try: domain = urlparse(url).netloc.split('.') if len(domain) < 2: return domain[-1] else: return domain.split('.')[-2] + '.' + domain.split('.')[-1] except: return '' def remove_other_links(self, links): result = copy.deepcopy(links) for url in links: domain = self.get_domain_name(url) if self.domain_name not in domain: result.remove(url) return result def get_link_from_db_for_crawling(self): data = self.db.get_uncrawled_links() for i in data: return i[0] else: pass def add_links_to_db(self): c = self.db.link_count() if c < self.max_count: url = self.get_link_from_db_for_crawling() #import pdb; pdb.set_trace() links = self.gather_links(url) self.db.set_link_to_crawled(url) for link in links: self.db.add_link(link) if self.db.link_count() >= self.max_count: return True else: pass new_c = self.db.link_count() if new_c < self.max_count: crawled_cursor = self.db.execute_query( "select count(*) from links_html where crawled = 'no'") for i in crawled_cursor: crawled_links = i[0] print(crawled_links) if crawled_links > 0: return self.add_links_to_db() else: print('no more links to crawl') return True else: return True def add_html(self, link): status = self.get_response(link) html = str(self.get_html(link)) self.db.add_html_db(html, link, status) return True def crawl_and_store(self): self.add_links_to_db() pending_count = self.db.get_number_of_pending_links() while pending_count > 0: link = self.db.get_link_from_db() print("crawling link : " + str(link)) print('pending links:' + str(pending_count - 1)) self.add_html(link) pending_count = self.db.get_number_of_pending_links() return True