def __init__(self): self.parser = MessageParser() self.visited_links = set() # to keep track of links that we are going to visit during next iteration self.links_to_visit = set() # to keep track of links that we are traveling through during current iteration self.index = {} self.doc_lengths = {} self.database_empty = True # create logger if not os.path.exists('../logs'): os.makedirs('../logs') self.logger = logging.getLogger("urls_extractor") self.logger.setLevel(logging.INFO) fh = logging.FileHandler("../logs/indexer.log") formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) self.logger.addHandler(fh) try: client = MongoClient("mongodb://127.0.0.1:27017/") self.logger.info('Connected to MongoDB successfully!!') except: self.logger.error('Could not connect to MongoDB') self.database = client.TelegramIndexerDB #client.TelegramMusicIndexerDB
def treat_data_from_receive(self, data): try: sender = socket.socket(AF_INET, SOCK_DGRAM) sender.connect((self._next_dns_addr, self._port)) sender.settimeout(5) sender.send(data) data = sender.recv(1024) sender.close() except socket.error: answer = MessageParser.to_bytes( self._message_parser.transaction_id, MessageType.QUERY, self._message_parser.opcode, HaveStatus.NO, self._message_parser.recursion_required, self._message_parser.recursion_available, RCode.REFUSED, self._message_parser.message[12:], answers_num=0, answers=[]) self._dns_listener.sendto(answer, self._client_addr) return self._dns_listener.sendto(data, self._client_addr) answer_info = MessageParser() answer_info.from_bytes(data) for container in [ answer_info.answers, answer_info.resources_rights, answer_info.additional_resources ]: for resource in container: self._cache.add_record(resource.ttl, resource.name.decode(), resource.resource_type, resource.resource_data, resource.resource_class) self._client_addr = None
def __init__(self, next_dns_addr, cash_file_name='cache.txt'): self._port = 53 self._cache = Cache(cash_file_name) self._dns_listener = Resolver.bind_server() self._next_dns_addr = next_dns_addr self._message_parser = MessageParser() self._client_addr = None
def __init__(self, host, server_port): """ This method is run when creating a new Client object """ # Set up the socket connection to the server self.connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.host = host self.server_port = server_port self.parser = MessageParser() self.run()
class Client: """ This is the chat client class """ def __init__(self, host, server_port): """ This method is run when creating a new Client object """ # Set up the socket connection to the server self.connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.host = host self.server_port = server_port self.parser = MessageParser() self.run() def run(self): # Initiate the connection to the server and receiver self.connection.connect((self.host, self.server_port)) print("-------- : Connected to host: {}".format(self.host)) MessageReceiver(self, self.connection) d = {} # new dictonary for creating the payload while True: # get message from user message = input().split(' ', 1) d['request'] = message[0] try: d['content'] = message[1] except: # empty content d['content'] = '' payload = json.dumps(d) # convert to json self.send_payload(payload) # send payload if d['request'] == 'logout': return self.disconnect() def disconnect(self): # Close connection self.connection.close() print("Disconnected") return 0 def receive_message(self, message): # Handle incoming message self.parser.parse(message) def send_payload(self, data): # Send payload to server self.connection.send(data.encode())
def receive_eap_messages(self): """receive eap messages from supplicant.""" while not self.shut_down: time.sleep(0) self.logger.debug("Waiting for eap messages") packed_message = self.eap_socket.receive() if not packed_message: continue self.logger.debug("Received packed_message: %s", str(packed_message)) try: eap, dst_mac = MessageParser.ethernet_parse(packed_message) except MessageParseError as exception: self.logger.warning( "MessageParser.ethernet_parse threw exception.\n" " packed_message: '%s'.\n" " exception: '%s'.", packed_message, exception) continue self.logger.debug("Received eap message: %s" % (str(eap))) is_eapol = False if isinstance(eap, EapolStartMessage): is_eapol = True self.authenticator_mac = dst_mac if self.auth_callback: self.auth_callback(str(eap.src_mac), eap, is_eapol) self.logger.info('Done receiving EAP messages')
class UserParser: def __init__(self, parser_output_dir): self.message_parser = MessageParser() self.parser_output_dir = parser_output_dir def parse_user(self, user_directory): user_name = os.path.basename(user_directory) user_folders = UserParser.__list_folders(user_directory) user_contents = {} for folder in user_folders: current_directory_name = os.path.basename(folder) folder_contents = self.__parse_user_folder(os.path.join(folder)) user_contents[current_directory_name] = folder_contents user_dict = {user_name: user_contents} save_path = os.path.join(os.getcwd(), self.parser_output_dir, user_name + ".gz") save_to_disk_util.save_to_disk(save_path, user_dict) def __parse_user_folder(self, user_folder): messages = {} message_paths = [ message_path for message_path in glob.glob(os.path.join(user_folder, "**"), recursive=True) if not os.path.isdir(message_path) ] for message_path in message_paths: message_filename = os.path.basename(message_path) try: subdirectory_path = UserParser.__get_subdirectory_path( message_path, user_folder) message_filename_key = subdirectory_path + "/" + message_filename message_contents = self.message_parser.parse_message( message_path, user_folder) messages[message_filename_key] = message_contents except UnicodeDecodeError: print("Could not parse '" + message_path + "'\n") return messages @staticmethod def __get_subdirectory_path(full_path, base_dir): return os.path.dirname(full_path).replace(base_dir, '') @staticmethod def __list_folders(directory): return [ os.path.join(directory, d) for d in os.listdir(directory) if os.path.isdir(os.path.join(directory, d)) ]
def __init__(self): self.parser = MessageParser() try: client = MongoClient("mongodb://127.0.0.1:27017/") print('Connected to MongoDB successfully!!') except: print('Could not connect to MongoDB') self.database = client.TelegramIndexerDB # client.TelegramMusicIndexerDB # define separate logger for searcher and bot logger if not os.path.exists('../../logs'): os.makedirs('../../logs') self.logger = logging.getLogger("searcher") self.logger.setLevel(logging.INFO) fh = logging.FileHandler("../../logs/user_search.log") formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) self.logger.addHandler(fh)
class SMTPMailer(object): def __init__(self, contents): self.message = MessageParser(contents) def send(self, settings): smtp = smtplib.SMTP() smtp.set_debuglevel(0) smtp.connect(settings["smtp_server"], settings["use_port"]) smtp.ehlo() if settings["smtp_tls"]: smtp.starttls() smtp.ehlo() smtp.login(settings["smtp_username"], settings["smtp_password"]) self.message.begin_parsing() try: for recipient in self.message.recipients: smtp.sendmail(self.message.sender, recipient, self.message.body) except Exception as error: print error finally: smtp.close()
def start_listening(self): lis = keyboard.Listener(on_press=self.on_press) lis.start() try: while True: self._message_parser = MessageParser() self._cache.update_cache() data, self._client_addr = self._dns_listener.recvfrom(1024) self._message_parser.from_bytes(data) have_all_data = True answers = [] for query in self._message_parser.queries: have_info = self.try_find_info(query, answers) if query.query_type == ResourceType.PTR and\ self._message_parser.questions_num > 1: continue elif not have_info: self.treat_data_from_receive(data) have_all_data = False break if have_all_data and len(answers) > 0: answer = MessageParser.to_bytes( self._message_parser.transaction_id, MessageType.ANSWER, self._message_parser.opcode, HaveStatus.NO, self._message_parser.recursion_required, self._message_parser.recursion_available, RCode.NO_ERROR, self._message_parser.message[12:], questions_num=self._message_parser.questions_num, answers_num=len(answers), answers=answers) self._dns_listener.sendto(answer, self._client_addr) self._client_addr = None finally: sys.stderr.write('\nThe server was stopped. ' 'All useful data will be serialize\n') self._cache.serialize_cache()
def __init__( self, group_id, token, table_name, room_list, service_account_file, vk_bot: str = "bot", ): """Creates a new bot with parameters from the setup file""" self.logger = Logger('app').logger self.logger.info('Start of bot initialization') self.vk_bot = vk_bot self.group_id = group_id self.vk_session = vk_api.VkApi(token=token) self.long_poll = VkBotLongPoll(self.vk_session, group_id) self.vk = self.vk_session.get_api() self.master_id = GOSHA_ID self.admins_ids = GOSHA_ID self.vk_link = VK_LINK self.room_list = room_list self.table_name = table_name self.gs = GoogleSheets(table_name, service_account_file) self.gd = GoogleDriveConnector(service_account_file) self.keyboard = Keyboard() self.logger.info("Completion of bot initialization") self.logger.info('Start of answers download') self.mp = MessageParser(self.vk, self.gs) self.data_to_upload = {'room': [], 'fullname': [], 'id': []} self.logger.info("Completion of answer download")
class testMessageParser(unittest.TestCase): def setUp(self): f = open("tests.txt", 'r') data = f.read() self.mp = MessageParser(data) self.parse_result = self.mp.begin_parsing() def testBeginParser(self): self.assertEqual(self.parse_result, True) def testParseSender(self): self.assertEqual(self.mp.sender, "*****@*****.**") def testParseRecipients(self): self.assertEqual(self.mp.recipients, ["*****@*****.**", "*****@*****.**", "*****@*****.**"]) def testParseBody(self): self.assertEqual(self.mp.body, "This is the mail\n")
def webhook(): req = request.get_json(silent=True, force=True) print("received message: {}".format(req)) # parse message msg = MessageParser().parse_message_event(req) # set state dependent on parameters msg.set_state() print("state set to: {}".format(msg.get_state())) # handle message dependent on state res = msg.handle_message() print("returning msg: {}".format(res)) response = make_response(json.dumps(res)) response.headers["Content-Type"] = "application/json" return response
def data(self): from message_parser import MessageParser return MessageParser.eap_parse(self._data_type.data(), None)
class ConnectionProtocol(asyncio.Protocol): def __init__(self): self._message_parser = MessageParser() self._logger = Logger() self._tcp_transport = None self._message = None def connection_made(self, transport: transports.Transport) -> None: self._tcp_transport = transport self._logger.log_connection(self._get_sender()) def _get_sender(self) -> str: return self._tcp_transport.get_extra_info('peername')[0] def _send_response(self, response: Response) -> None: message = self._message_parser.get_response_message(response) self._tcp_transport.write(message) self._logger.log_response(message, self._get_sender()) @staticmethod def _check_for_secret_change(request: Request) -> bool: return request.code == RequestTypes.DIFFIE_HELLMAN def _change_secret_for_victim(self, address: str) -> None: victim = Database().get_victim_by_address(address) self._message_parser.change_secret(victim.secret) def _respond(self) -> None: sender = self._get_sender() try: request = self._message_parser.get_request(sender, self._message) except CryptoError: raise InvalidRequestError() if self._check_for_secret_change(request): self._change_secret_for_victim(sender) response = RequestDispatcher(request).dispatch() self._send_response(response) def _send_error_message(self) -> None: message = self._message_parser.get_error_message() self._tcp_transport.write(message) def _close_connection(self) -> None: self._logger.log_connection_close(self._get_sender()) self._tcp_transport.close() def data_received(self, data: bytes) -> None: try: self._message = data self._logger.log_message(self._get_sender(), self._message) except UnicodeDecodeError: self._logger.log_invalid_encoding(self._get_sender()) self._send_error_message() self._close_connection() try: self._respond() except InvalidRequestError: self._logger.log_invalid_message(self._get_sender(), self._message) self._send_error_message() self._close_connection()
def __init__(self): self._message_parser = MessageParser() self._logger = Logger() self._tcp_transport = None self._message = None
class Search: def __init__(self): self.parser = MessageParser() try: client = MongoClient("mongodb://127.0.0.1:27017/") print('Connected to MongoDB successfully!!') except: print('Could not connect to MongoDB') self.database = client.TelegramIndexerDB # client.TelegramMusicIndexerDB # define separate logger for searcher and bot logger if not os.path.exists('../../logs'): os.makedirs('../../logs') self.logger = logging.getLogger("searcher") self.logger.setLevel(logging.INFO) fh = logging.FileHandler("../../logs/user_search.log") formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) self.logger.addHandler(fh) def search(self, query): query_terms, links = self.parser.parse_message(query) # now organize the searcher in the index # relevant_documents = self.boolean_retrieval(query_terms) relevant_documents = self.okapi_scoring(query_terms) return relevant_documents def boolean_retrieval(self, query): # 1. first get data from index in database postings = [] for term in query.keys(): posting = [] cursor = self.database.Index.find({'key': term}) for record in cursor: posting += record['postings'] # extract document info only posting = [i[0] for i in posting] postings.append(posting) if not len(postings): return [] docs = list(set.intersection(*map(set, postings))) return docs def okapi_scoring(self, query, k1=1.2, b=0.75): scores = Counter() N = self.database.Index.count() avgdl = 100 # constant for all documents, not gonna calculate for term in query.keys(): # extract postings lists from Index postings = [] cursor = self.database.Index.find({'key': term}) for record in cursor: postings += record['postings'] if not len(postings): continue # ignore absent terms # if term is present in the database, then we calculate okapi # score for each document n_docs = len(postings) - 1 idf = math.log10((N - n_docs + 0.5) / (n_docs + 0.5)) for posting in postings: doc_id = posting[0] doc_tf = posting[1] doc_len = 0 cursor = self.database.DocLengths.find({'doc_url':doc_id}) for record in cursor: doc_len = record['length'] if not doc_len: doc_len = 0 score = idf * doc_tf * (k1 + 1) / (doc_tf + k1 * ( 1 - b + b * (doc_len / avgdl))) scores[doc_id] += score # sort according to the score value scores = scores.most_common() documents = [doc_url for doc_url, _ in scores] return documents
class Resolver: def __init__(self, next_dns_addr, cash_file_name='cache.txt'): self._port = 53 self._cache = Cache(cash_file_name) self._dns_listener = Resolver.bind_server() self._next_dns_addr = next_dns_addr self._message_parser = MessageParser() self._client_addr = None @staticmethod def bind_server(addr='localhost', port=53): server = socket.socket(AF_INET, SOCK_DGRAM) server.bind((addr, port)) return server def on_press(self, key): try: k = key.char if k == 'c': self._dns_listener.close() except: pass def try_find_info(self, query, answers): new_answers = [] domain_addr = query.name.decode() for record in self._cache.records: if domain_addr == record.address and query.query_type == record.resource_type: new_answers.append(record) answers.extend(new_answers) return len(new_answers) > 0 def start_listening(self): lis = keyboard.Listener(on_press=self.on_press) lis.start() try: while True: self._message_parser = MessageParser() self._cache.update_cache() data, self._client_addr = self._dns_listener.recvfrom(1024) self._message_parser.from_bytes(data) have_all_data = True answers = [] for query in self._message_parser.queries: have_info = self.try_find_info(query, answers) if query.query_type == ResourceType.PTR and\ self._message_parser.questions_num > 1: continue elif not have_info: self.treat_data_from_receive(data) have_all_data = False break if have_all_data and len(answers) > 0: answer = MessageParser.to_bytes( self._message_parser.transaction_id, MessageType.ANSWER, self._message_parser.opcode, HaveStatus.NO, self._message_parser.recursion_required, self._message_parser.recursion_available, RCode.NO_ERROR, self._message_parser.message[12:], questions_num=self._message_parser.questions_num, answers_num=len(answers), answers=answers) self._dns_listener.sendto(answer, self._client_addr) self._client_addr = None finally: sys.stderr.write('\nThe server was stopped. ' 'All useful data will be serialize\n') self._cache.serialize_cache() def treat_data_from_receive(self, data): try: sender = socket.socket(AF_INET, SOCK_DGRAM) sender.connect((self._next_dns_addr, self._port)) sender.settimeout(5) sender.send(data) data = sender.recv(1024) sender.close() except socket.error: answer = MessageParser.to_bytes( self._message_parser.transaction_id, MessageType.QUERY, self._message_parser.opcode, HaveStatus.NO, self._message_parser.recursion_required, self._message_parser.recursion_available, RCode.REFUSED, self._message_parser.message[12:], answers_num=0, answers=[]) self._dns_listener.sendto(answer, self._client_addr) return self._dns_listener.sendto(data, self._client_addr) answer_info = MessageParser() answer_info.from_bytes(data) for container in [ answer_info.answers, answer_info.resources_rights, answer_info.additional_resources ]: for resource in container: self._cache.add_record(resource.ttl, resource.name.decode(), resource.resource_type, resource.resource_data, resource.resource_class) self._client_addr = None
def __init__(self, contents): self.message = MessageParser(contents)
from message_parser import MessageParser from naive_bayes import NaiveBayes from non_naive_bayes import NonNaiveBayes __author__ = 'danylofitel' features_filename = "C:\\spam\\features_short.txt" training_directory = "C:\\spam\\training\\*.txt" testing_directory = "C:\\spam\\testing\\*.txt" parser = MessageParser(features_filename) training_set = parser.extract_feature_vectors(training_directory) testing_set = parser.extract_feature_vectors(testing_directory) bayesian_classifier = NonNaiveBayes(parser.feature_count(), training_set[0], training_set[1]) N_L = len(testing_set[0]) N_S = len(testing_set[1]) N = N_L + N_S N_L_S = 0 # false positives for legitimate in testing_set[0]: if bayesian_classifier.is_spam(legitimate): N_L_S += 1 N_S_L = 0 # false negatives for spam in testing_set[1]: if not bayesian_classifier.is_spam(spam): N_S_L += 1
def setUp(self): f = open("tests.txt", 'r') data = f.read() self.mp = MessageParser(data) self.parse_result = self.mp.begin_parsing()
class VkBot: def __init__( self, group_id, token, table_name, room_list, service_account_file, vk_bot: str = "bot", ): """Creates a new bot with parameters from the setup file""" self.logger = Logger('app').logger self.logger.info('Start of bot initialization') self.vk_bot = vk_bot self.group_id = group_id self.vk_session = vk_api.VkApi(token=token) self.long_poll = VkBotLongPoll(self.vk_session, group_id) self.vk = self.vk_session.get_api() self.master_id = GOSHA_ID self.admins_ids = GOSHA_ID self.vk_link = VK_LINK self.room_list = room_list self.table_name = table_name self.gs = GoogleSheets(table_name, service_account_file) self.gd = GoogleDriveConnector(service_account_file) self.keyboard = Keyboard() self.logger.info("Completion of bot initialization") self.logger.info('Start of answers download') self.mp = MessageParser(self.vk, self.gs) self.data_to_upload = {'room': [], 'fullname': [], 'id': []} self.logger.info("Completion of answer download") def write_about_exception(self, event): """Sends an exception message to the user and developer. :param event: Event that caused the exception. :return: None. """ self.write_msg( self.master_id, 'Что-то пошло не так с командой {} от пользователя {}.'.format( event.obj.text, ' '.join(self.mp.get_full_name(event.obj.from_id))), self.keyboard.user_keyboard) self.write_msg( event.obj.peer_id, 'Что-то пошло не так, попробуйте повторить попытку позднее.', self.keyboard.user_keyboard) self.logger.exception('Exception:') def write_events(self, event): """Specifies the type of event to write to the file. :param event: The event that you want to write to a file. :param time: Time when the event occurred. :return: None. """ if event.type == VkBotEventType.GROUP_JOIN: self.logger.info('New user group membership') elif event.type == VkBotEventType.GROUP_LEAVE: self.logger.info('Leaving a group') elif event.type == VkBotEventType.MESSAGE_NEW: self.logger.info('New incoming message: "{}" from user {}'.format( event.obj.text, ' '.join(self.mp.get_full_name(event.obj.peer_id)))) self.logger.info(event.obj) elif event.type == VkBotEventType.MESSAGE_REPLY: self.logger.info('New outgoing message "{}.." for user {}'.format( event.obj.text[:20], ' '.join(self.mp.get_full_name(event.obj.peer_id)))) def write_msg(self, peer_id, message, keyboard): """Sends a message to the user. :param peer_id: Id of the user to send the message to. :param message: Text of the message to send. :return: None """ self.vk.messages.send(peer_id=peer_id, random_id=get_random_id(), message=message, keyboard=keyboard) def send_msg_about_duty(self): """"Sends a message to students who are on duty at a certain time today.""" self.logger.info('Duty thread is started') while True: if '19.00.00' <= time.strftime("%H.%M.%S", time.localtime()) <= '19.10.00': rooms = self.gs.get_duty_room() if rooms: self.logger.info('Duty rooms {} detected'.format(rooms)) for room in rooms: ids = self.gs.get_duty_ids_by_room(room) for id in ids: self.write_msg( int(id), 'Сегодня дежурит {} комната.'.format(room), self.keyboard.user_keyboard) else: self.logger.info('Duty rooms not detected') # if self.update_time['begin'] <= time.strftime("%H.%M.%S", time.localtime()) <= self.update_time['end']: time.sleep(600) self.update_data() def update_data(self): """Updates information from Google Sheets""" self.logger.info('Start of data update') self.upload_links() self.gs.update_data() self.mp.about_commandant = self.gs.get_answer_text('ABOUT_MILENA')[0] self.mp.about_castellan = self.gs.get_answer_text('ABOUT_MARGO')[0] self.mp.about_gym = self.gs.get_answer_text('ABOUT_GYM')[0] self.mp.about_study_room = self.gs.get_answer_text( 'ABOUT_STUDY_ROOM')[0] self.mp.about_guests = self.gs.get_answer_text('ABOUT_GUESTS')[0] self.mp.about_shower = self.gs.get_answer_text('ABOUT_SHOWER')[0] self.mp.about_laundry = self.gs.get_answer_text('ABOUT_LAUNDRY')[0] self.mp.about_duty = self.gs.get_answer_text('REMINDER_ABOUT_DUTY')[0] self.mp.question = self.gs.get_answer_text('ABOUT_STUDSOVET')[0] self.mp.about_bot = self.gs.get_answer_text('ABOUT_BOT')[0] self.mp.parting = self.gs.get_answer_text('PARTING') self.mp.opportunities = self.gs.get_answer_text('OPPORTUNITIES')[0] self.mp.rude_commands = self.gs.get_answer_text('RUDE_COMMANDS') self.mp.good_room = self.gs.get_answer_text('GOOD_ROOM')[0] self.mp.bad_room = self.gs.get_answer_text('BAD_ROOM')[0] self.mp.unknown_commands = self.gs.get_answer_text('UNKNOWN_COMMANDS') self.mp.topical = self.gs.get_answer_text('TOPICAL')[0] self.mp.about_invoice = self.gs.get_answer_text('ABOUT_INVOICE')[0] self.logger.info('Completion of data update') def send_answer(self, event, msg, keyboard): try: self.write_msg(event.obj.peer_id, msg, keyboard) except Exception: self.write_about_exception(event) self.logger.exception('Answer Exception') def upload_links(self): self.logger.info('Start of uploading links') if len(self.data_to_upload['room']) > 0: try: for i in range(len(self.data_to_upload['room'])): self.gs.add_student(self.data_to_upload['room'][i], self.data_to_upload['id'][i], self.data_to_upload['fullname'][i]) self.write_msg(self.data_to_upload['id'][i], self.mp.good_room, self.keyboard.user_keyboard) except Exception: self.logger.exception('Links are not uploaded') else: self.logger.info('Links uploaded') self.data_to_upload = {'room': [], 'fullname': [], 'id': []} self.gs.links_dataframe = pd.DataFrame( self.gs.sheet_links.get_all_records()) def get_events(self): self.write_msg(self.master_id, 'Бот запущен', self.keyboard.user_keyboard) self.logger.info('Main thread is started') for event in self.long_poll.listen(): if event.type == VkBotEventType.MESSAGE_NEW: self.write_events(event) request = event.obj.text.upper() forwarded_request = event.obj.text is_answered = False if event.obj.peer_id == self.master_id: if request.startswith('НАПИШИ'): try: room = forwarded_request.split()[1] mes = ' '.join(forwarded_request.split()[2:]) ids = self.gs.get_duty_ids_by_room(int(room)) # print(ids) for id in ids: self.write_msg(int(id), mes, self.keyboard.user_keyboard) except Exception: self.write_about_exception(event) else: if ids: self.write_msg(self.master_id, 'Сделано', self.keyboard.user_keyboard) else: self.write_msg(self.master_id, 'Никого не оказалось', self.keyboard.user_keyboard) is_answered = True if request.startswith("ОБНОВИ"): try: self.write_msg( self.master_id, 'Это займет некоторое время, подождите', self.keyboard.user_keyboard) self.update_data() except Exception: self.write_msg( self.master_id, 'Данные не обновлены, повторите попытку позднее', self.keyboard.user_keyboard) self.logger.exception('Information is not updated') else: self.write_msg(self.master_id, 'Данные успешно обновлены', self.keyboard.user_keyboard) self.logger.info('Information updated') is_answered = True if event.obj.reply_message: # print(difflib.SequenceMatcher(None, self.about_studsovet, event.obj.reply_message['text']).ratio()) # print(self.about_studsovet) # print(event.obj.reply_message['text']) if difflib.SequenceMatcher( None, self.mp.question, event.obj.reply_message['text']).ratio() >= 0.99: try: self.write_msg( event.obj.peer_id, 'Спасибо за вопрос, я передал его.', self.keyboard.user_keyboard) self.write_msg( self.master_id, '{} спросил:\n{} \nСсылка на страницу: {}{}'. format( ' '.join( self.mp.get_full_name( event.obj.from_id)), forwarded_request, self.vk_link, event.obj.peer_id), self.keyboard.user_keyboard) except Exception: self.write_about_exception(event) is_answered = True elif difflib.SequenceMatcher( None, self.mp.about_duty, event.obj.reply_message['text']).ratio() >= 0.99: if request.isdigit() and int( request) in self.room_list: if len(self.data_to_upload['room']) == len( self.data_to_upload['id']) == len( self.data_to_upload['fullname']): self.data_to_upload['room'].append(request) self.data_to_upload['id'].append( event.obj.peer_id) self.data_to_upload['fullname'].append( ' '.join( self.mp.get_full_name( event.obj.from_id))) self.logger.info('Link is added') if len(self.data_to_upload['room']) >= 5: self.upload_links() else: self.logger.info( 'Current length of links is {}. Data is not loaded' .format( len(self.data_to_upload['room']))) self.write_msg( event.obj.peer_id, 'Ты ввел корректную комнату.\n\nОжидай добавления в базу :)', self.keyboard.user_keyboard) else: self.write_msg( self.master_id, 'Нарушение структуры данных для загрузки', self.keyboard.user_keyboard) else: try: self.write_msg(event.obj.from_id, self.mp.bad_room, self.keyboard.user_keyboard) except Exception: self.write_about_exception(event) is_answered = True elif event.obj.reply_message[ 'text'] == self.mp.about_invoice: try: if event.obj.attachments: for i in event.obj.attachments: from_id = event.obj.from_id name = self.mp.get_full_name(from_id) if i['type'] == 'doc' or i[ 'type'] == 'photo': if i['type'] == 'doc': ext = i['doc']['ext'] url = i['doc']['url'] filename = "{} {}.{}".format( name[1], name[0], ext) else: url = self.mp.get_max_image_url( i['photo']['sizes']) filename = "{} {}.jpg".format( name[1], name[0]) if self.gd.upload_invoice( url, filename): self.write_msg( from_id, "Твой чек успешно загружен", self.keyboard.user_keyboard) else: self.write_msg( from_id, "Твой чек не загружен. Повтори попытку позднее", self.keyboard.user_keyboard) except Exception: self.write_about_exception(event) self.logger.exception('Invoice is not loaded') is_answered = True if not is_answered: command = self.mp.get_msg_type(request) if command == 'duty': self.logger.info('Command "Дежурство" detected' ) # напоминание о дежурстве room = self.gs.get_room_by_id(event.obj.from_id) # print(room) if room: floor = room // 100 if floor < 2: floor = 2 # print(floor) try: date = self.gs.get_duty_date_by_room_number( room) # print('date', date) duty_rooms = self.gs.get_duty_room() # print(duty_rooms) # if duty_rooms: for room in duty_rooms: # print(type(room)) # print(type(floor)) if room // 100 == floor: if date: # print(room) self.write_msg( event.obj.peer_id, 'Сегодня дежурит {} комната.\n\n' 'Следующее дежурство твоей команты будет {}.' .format(room, date), self.keyboard.user_keyboard) is_answered = True else: # print(room) self.write_msg( event.obj.peer_id, 'Сегодня дежурит {} комната.\n\n' 'Следующее дежурство твоей комнаты будет в следующем месяце.' .format(room), self.keyboard.user_keyboard) is_answered = True except Exception: self.write_about_exception(event) if not is_answered: self.write_msg( event.obj.peer_id, 'Пока данных нет, но они скоро появятся!', self.keyboard.user_keyboard) else: try: self.write_msg(event.obj.peer_id, self.mp.about_duty, self.keyboard.user_keyboard) except Exception: self.write_about_exception(event) else: self.logger.info( 'Command "{}" detected'.format(command)) try: self.send_answer( event, self.mp.get_answer_by_msg_type(command), self.keyboard.user_keyboard) except Exception: self.write_about_exception(event) # # elif command == 'invoice': # чек # self.logger.info('Command "Чек" detected') # # if event.obj.fwd_messages: # # from_id = event.obj.fwd_messages[0]['from_id'] # # name = self.mp.get_full_name(from_id) # # # # # if event.obj.fwd_messages[0]['attachments'][0]['type'] == 'doc': # # # url = event.obj.fwd_messages[0]['attachments'][0]['doc']['url'] # # # ext = event.obj.fwd_messages[0]['attachments'][0]['doc']['ext'] # # # # # # filename = "{} {}.{}".format(name[1], name[0], ext) # # # # # # elif event.obj.fwd_messages[0]['attachments'][0]['type'] == 'photo': # # # # # # url = self.mp.get_max_image_url([msg['photo']['sizes'] for msg in event.obj.fwd_messages[0]['attachments']]) # # # filename = "{} {}.jpg".format(name[1], name[0]) # # # # # # self.gd.upload_invoice(url, filename) # # else: # self.write_msg(event.obj.peer_id, self.about_invoice, self.keyboard.user_keyboard) # elif event.type == VkBotEventType.MESSAGE_REPLY: self.write_events(event) elif event.type == VkBotEventType.GROUP_JOIN: self.write_events(event)
class TelegramIndexer: def __init__(self): self.parser = MessageParser() self.visited_links = set() # to keep track of links that we are going to visit during next iteration self.links_to_visit = set() # to keep track of links that we are traveling through during current iteration self.index = {} self.doc_lengths = {} self.database_empty = True # create logger if not os.path.exists('../logs'): os.makedirs('../logs') self.logger = logging.getLogger("urls_extractor") self.logger.setLevel(logging.INFO) fh = logging.FileHandler("../logs/indexer.log") formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) self.logger.addHandler(fh) try: client = MongoClient("mongodb://127.0.0.1:27017/") self.logger.info('Connected to MongoDB successfully!!') except: self.logger.error('Could not connect to MongoDB') self.database = client.TelegramIndexerDB #client.TelegramMusicIndexerDB def index_one_url(self, url, messages): if url in self.visited_links: self.links_to_visit.discard(url) self.logger.info(f'Url {url} was already indexed. Move on to another url') return if not len(messages): self.links_to_visit.discard(url) self.logger.info(f'Message list is empty for url {url}') return self.logger.info(f'Indexing messages from url {url}') for msg_url, msg_text in messages.items(): words, links = self.parser.parse_message(msg_text) self.links_to_visit.update(links) self.links_to_visit -= self.visited_links self.doc_lengths[msg_url] = len(words) # not precise but its okay # add words to index for w in words.keys(): msg_freq = words[w] # how many times occur in this particular message if w not in self.index: self.index[w] = [msg_freq, (msg_url, msg_freq)] else: self.index[w][0] += msg_freq self.index[w].append((msg_url, msg_freq)) self.visited_links.add(url) self.links_to_visit.discard(url) def dump_index(self): self.logger.info(f'Index is being dumped to DB') if self.database_empty: cursor = self.database.Index.find() i = 0 for record in cursor: i += 1 break if i > 0: self.database_empty = False if self.database_empty: self.logger.info(f'Database was empty, writing {len(self.index)} new items') for word, postings in self.index.items(): try: self.database.Index.insert_one( {'key': word, 'frequency': postings[0], 'postings': postings[1:]} ) except: self.logger.error(f'Unable to add new items to Index in database') for msg_url, doc_len in self.doc_lengths.items(): try: self.database.DocLengths.insert_one( {'doc_url':msg_url, 'length': doc_len} ) except: self.logger.error('Unable to add new items to DocLengths in database') self.index = {} # local index is gonna be empty self.doc_lengths = {} return # Else, we have to merge new changes to existing index self.logger.info(f'Updating Index in database with {len(self.index)} new items') for word, postings in self.index.items(): cursor = self.database.Index.find({'key': word}) # 1. get existing index from db db_index = {} for record in cursor: db_index = record if not len(db_index): self.database.Index.insert_one( {'key': word, 'frequency': postings[0], 'postings': postings[1:]} ) else: self.logger.info(f'Changing existing postings') db_postings = db_index['postings'] db_postings = {u: f for u, f in db_postings} for doc_url, doc_freq in postings[1:]: db_postings[doc_url] = doc_freq db_postings = [[u, f] for u, f in db_postings.items()] frequency = 0 for _, freq in db_postings: frequency += freq myquery = {'key': word} newvalues = {"$set": {'frequency': frequency, 'postings': db_postings}} try: self.database.Index.update_one(myquery, newvalues) self.logger.info('Postings changed successfully') except: self.logger.error('Postings were not changes, error while writing to database') self.logger.info('Updating DocLengths in the database') for doc_url, doc_len in self.index.items(): cursor = self.database.Index.find({'doc_url': doc_url}) # 1. get existing lengths from db db_index = {} for record in cursor: db_index = record if not len(db_index): self.database.DocLengths.insert_one( {'doc_url': doc_url, 'length': doc_len} ) else: self.logger.info(f'Changing doc lenths') myquery = {'doc_url': doc_url} newvalues = {"$set": {'length': doc_len}} try: self.database.DocLengths.update_one(myquery, newvalues) self.logger.info('DocLengths changed successfully') except: self.logger.error('DocLengths were not changes, error while writing to database')
import logging from typing import Dict from vk_api import VkApi, ApiError from database_models import ChatData, KarmaUpdate, UserInfo from message_parser import MessageParser logger = logging.getLogger('handlers') message_parser = MessageParser(command_symbol='/') vk = None def set_token(token: str): global vk vk_session = VkApi(token=token) vk = vk_session.get_api() def get_user_id(user_data: str) -> str: return ''.join(c for c in user_data.split('|')[0] if c.isdigit()) or 0 def show_stats(message: Dict, chat_data: ChatData): stats = KarmaUpdate.get_statistics(chat_data) if len(stats) == 0: return 'No karma here'
def __init__(self, parser_output_dir): self.message_parser = MessageParser() self.parser_output_dir = parser_output_dir
def handle(self): debugLevel = 0 cacheFile = 'cache.txt' localFile = 'dnsrelay.txt' foreignServer = '10.3.9.4' helpDoc = ('usage: dnsrelay [OPTION]...\n' ' -h, --help\t帮助文档\n' ' -d, --debug=LEVEL\t调试等级1或2\n' ' -c, --cache=PATH\t指定缓存文件路径\n' ' -f, --filename=PATH\t指定配置文件路径\n' ' -s, --server=IPADDR\t外部DNS服务器的IP地址') # 获取命令行参数 try: opts, args = getopt.getopt( sys.argv[1:], 'hd:c:f:s:', ['help', 'debug=', 'cache=', 'filename=', 'server=']) for opt, arg in opts: if opt in ('-d', '--debug'): debugLevel = int(arg) elif opt in ('-c', '--cache'): cacheFile = arg elif opt in ('-f', '--filename'): localFile = arg elif opt in ('-s', '--server'): foreignServer = arg except getopt.GetoptError: print(helpDoc) sys.exit(1) queryMsg = self.request[0] #获取报文 querySock = self.request[1] #保存socket信息 msgParser = MessageParser(queryMsg, cacheFile, localFile, foreignServer) #解析报文,构造回复报文 # 构造debug信息 debugInfo = '' if debugLevel == 1: debugInfo += datetime.now().strftime('%Y-%m-%d %H:%M:%S') debugInfo += '\tClient: %s' % self.client_address[0] debugInfo += '\t%s' % msgParser.queryMsg['question']['QNAME'] elif debugLevel == 2: debugInfo += '****************************FROM****************************\n' debugInfo += datetime.now().strftime('%Y-%m-%d %H:%M:%S') debugInfo += '\tClient: %s\n' % self.client_address[0] debugInfo += '****************************QUERY***************************\n' debugInfo += 'MESSAGE: %s\n' % msgParser.msg debugInfo += '***************************HEADER***************************\n' debugInfo += 'ID: %s\n' % msgParser.respMsg['header']['ID'] debugInfo += 'QDCOUNT: %d\t' % msgParser.queryMsg['header'][ 'QDCOUNT'] debugInfo += 'ANCOUNT: %d\t' % msgParser.queryMsg['header'][ 'ANCOUNT'] debugInfo += 'NSCOUNT: %d\t' % msgParser.queryMsg['header'][ 'NSCOUNT'] debugInfo += 'ARCOUNT: %d\n' % msgParser.queryMsg['header'][ 'ARCOUNT'] debugInfo += '**************************QUESTION**************************\n' debugInfo += 'QNAME: %s\n' % msgParser.queryMsg['question']['QNAME'] debugInfo += 'QTYPE: %d\t' % msgParser.queryMsg['question']['QTYPE'] debugInfo += 'QCLASS: %d\n' % msgParser.queryMsg['question'][ 'QCLASS'] debugInfo += '***************************RESPONSE*************************\n' debugInfo += 'MESSAGE: %s\n' % msgParser.resp debugInfo += '***************************HEADER***************************\n' debugInfo += 'ID: %s\n' % msgParser.respMsg['header']['ID'] debugInfo += 'QDCOUNT: %d' % msgParser.respMsg['header']['QDCOUNT'] debugInfo += '\tANCOUNT: %d' % msgParser.respMsg['header'][ 'ANCOUNT'] debugInfo += '\tNSCOUNT: %d' % msgParser.respMsg['header'][ 'NSCOUNT'] debugInfo += '\tARCOUNT: %d\n' % msgParser.respMsg['header'][ 'ARCOUNT'] debugInfo += '***************************ANSWER***************************\n' debugInfo += 'ANAME: %s\n' % msgParser.respMsg['answer']['ANAME'] debugInfo += 'ATYPE: %d\t' % msgParser.respMsg['answer']['ATYPE'] debugInfo += 'ACLASS: %d\n' % msgParser.respMsg['answer']['ACLASS'] debugInfo += 'TTL: %d\n' % msgParser.respMsg['answer']['TTL'] debugInfo += 'RDLENGTH: %d\n' % msgParser.respMsg['answer'][ 'RDLENGTH'] debugInfo += 'RDATA: %s\n\n\n' % msgParser.respMsg['answer'][ 'RDATA'] if debugLevel > 0: atype = msgParser.respMsg['answer']['ATYPE'] if atype == 1 or atype == 28: print(debugInfo) querySock.sendto(msgParser.resp, self.client_address) # 回传报文