Пример #1
0
 def __update_synonym(self, verb, parent_verb, synonym_id):
     try:
         # verb = verb.replace("'", '"')
         # parent_verb = parent_verb.replace("'", '"')
         query = f"UPDATE synonyms_dictionary SET synonym = '{verb}', synonym_parent = '{parent_verb}' WHERE synonym_id = {synonym_id};"
         print_magenta(f"update => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {synonym_id}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Пример #2
0
 def __update_message(self, event_id, text, sender_id, fallback_name,
                      timestamp, conversation_id, event_type):
     try:
         text = text.replace("'", '"')
         query = f"UPDATE hangouts_chat SET event_id = '{event_id}', text = '{text}', sender_id = {sender_id}, fallback_name = '{fallback_name}', timestamp = '{timestamp}', conversation_id = '{conversation_id}', type = {event_type} WHERE event_id = '{event_id}';"
         print_magenta(f"update => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {event_id}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Пример #3
0
 def __update_message(self, guid, sender_name, timestamp_ms, message_type,
                      content, photos_list):
     try:
         content = content.replace("'", '"')
         query = f"UPDATE facebook_chat SET guid = '{guid}', sender_name = '{sender_name}', timestamp_ms = {timestamp_ms}, type = '{message_type}', text = '{content}', photos_list = '{photos_list}' WHERE guid = '{guid}';"
         print_magenta(f"update => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {guid}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Пример #4
0
 def __update_translation(self, translate, non_translate, language,
                          translate_language, translate_id):
     try:
         translate = translate.replace("'", '"')
         non_translate = non_translate.replace("'", '"')
         query = f"UPDATE translator SET translate = '{translate}', non_translate = '{non_translate}', language = '{language}', translate_language = '{translate_language}' WHERE translate_id = {translate_id};"
         print_magenta(f"update => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {translate_id}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Пример #5
0
 def decompress_folder(self, compress_folder):
     f_list = self._get_dir_files(compress_folder)
     dataset = pd.DataFrame()
     for file in f_list:
         try:
             j = self.decompress_file(file, ".json")
             csv, part_dataset = self.json_to_csv(j, True)
             dataset.join(part_dataset)
         except Exception as e:
             print_red(f"cannot decompress file {file}, {e}")
     return dataset
Пример #6
0
 def __set_synonym(self, verb, parent_verb):
     try:
         # verb = verb.replace('"', "'")
         # parent_verb = parent_verb.replace('"', "'")
         query = """INSERT INTO synonyms_dictionary(synonym, synonym_parent) VALUES ("{}","{}")""".format(
             verb, parent_verb)
         print_cyan(f"set => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {verb}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Пример #7
0
 def insert_or_replace_comment(self, comment_id, parent_id, parent, comment,
                               subreddit, time, score):
     try:
         query = """UPDATE reddit_comments SET parent_id = ?, comment_id = ?, parent = ?, comment = ?, subreddit = ?, unix = ?, score = ? WHERE parent_id = ?;""".format(
             parent_id, comment_id, parent, comment, subreddit, int(time),
             score, parent_id)
         print_magenta(f"update => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update comment on id {comment_id}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Пример #8
0
 def __set_translation(self, translate, non_translate, language,
                       translate_language):
     try:
         translate = translate.replace('"', "'")
         non_translate = non_translate.replace('"', "'")
         query = """INSERT INTO translator(translate, non_translate, language, translate_language) VALUES ("{}","{}","{}","{}")""".format(
             translate, non_translate, language, translate_language)
         print_cyan(f"set => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {translate}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Пример #9
0
 def __set_message(self, guid, sender_name, timestamp_ms, message_type,
                   content, photos_list):
     try:
         content = content.replace('"', "'")
         query = """INSERT INTO facebook_chat VALUES ("{}","{}","{}","{}","{}","{}")""".format(
             guid, content, sender_name, timestamp_ms, message_type,
             photos_list)
         print_cyan(f"set => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {guid}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Пример #10
0
 def __set_message(self, event_id, text, sender_id, fallback_name,
                   timestamp, conversation_id, event_type):
     try:
         text = text.replace('"', "'")
         query = """INSERT INTO hangouts_chat VALUES ("{}","{}","{}","{}","{}","{}","{}")""".format(
             event_id, text, int(sender_id), fallback_name, int(timestamp),
             conversation_id, event_type)
         print_cyan(f"set => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {event_id}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Пример #11
0
 def __update_message(self, question_id, question, document_id, document_title,
                      sentence_id, sentence, label):
     try:
         if type(question) is str:
             question = question.replace("'", '"')
         if type(sentence) is str:
             sentence = sentence.replace('"', '"')
         query = f"UPDATE wiki_questions_and_answers SET question_id = '{question_id}', question = '{question}', document_id = '{document_id}', document_title = '{document_title}', sentence_id = '{sentence_id}', sentence = '{sentence}', label = '{label}' WHERE document_id = '{document_id}' AND question = '{question}' AND answer = '{answer}';"
         print_magenta(f"update => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {document_id}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Пример #12
0
 def __update_message(self, article_title, question, answer,
                      difficulty_from_questioner, difficulty_from_answerer,
                      article_file):
     try:
         if type(question) is str:
             question = question.replace("'", '"')
         if type(answer) is str:
             answer = answer.replace('"', '"')
         query = f"UPDATE questions_and_answers SET article_title = '{article_title}', question = '{question}', answer = '{answer}', difficulty_from_questioner = '{difficulty_from_questioner}', difficulty_from_answerer = '{difficulty_from_answerer}', article_file = '{article_file}' WHERE article_title = '{article_title}' AND question = '{question}' AND answer = '{answer}';"
         print_magenta(f"update => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {article_title}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Пример #13
0
 def __set_message(self, question_id, question, document_id, document_title,
                   sentence_id, sentence, label):
     try:
         if type(question) is str:
             question = question.replace('"', "'")
         if type(sentence) is str:
             sentence = sentence.replace('"', "'")
         query = """INSERT INTO wiki_questions_and_answers VALUES ("{}","{}","{}","{}","{}","{}","{}")""".format(
             question_id, question, document_id, document_title,
             sentence_id, sentence, label)
         print_cyan(f"set => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {document_id}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Пример #14
0
 def __find_message(self, pid):
     try:
         query = "SELECT text FROM imessage_chat WHERE guid = '{}' LIMIT 1".format(pid)
         if self.cursor is None:
             self.cursor, self.connection = self.get_cursor()
         self.cursor.execute(query)
         result = self.cursor.fetchone()
         if result is not None:
             return result[0]
         else:
             return False
     except Exception as e:
         print_red(f"cannot find message {str(e)}")
         self.cursor, self.connection = self.get_cursor()
         return False
Пример #15
0
    def __update_message(self, ROWID, guid, text, handle_id, service, account, date, date_read,
                         date_delivered,
                         is_delivered,
                         is_finished, is_emote, is_from_me, is_empty, is_delayed, is_auto_reply, is_prepared, is_read,
                         is_system_message, is_sent, has_dd_results, cache_has_attachments, item_type, group_title,
                         is_expirable, message_source, ck_record_id, destination_caller, is_spam):
        try:
            text = text.replace("'", '"')
            query = f"UPDATE imessage_chat SET ROWID = {ROWID}, guid = '{guid}', text = '{text}', handle_id = {int(handle_id)}, `date` = {int(date)}, date_read = {int(date_read)}, date_delivered = {int(date_delivered)}, is_delivered = {int(is_delivered)}, is_finished = {int(is_finished)}, is_emote = {int(is_emote)}, is_from_me = {int(is_from_me)}, is_empty = {int(is_empty)}, is_delayed = {int(is_delayed)}, is_auto_reply = {int(is_auto_reply)}, is_prepared = {int(is_prepared)}, is_read = {int(is_read)}, is_system_message = {int(is_system_message)}, is_sent = {int(is_sent)}, has_dd_results = {int(has_dd_results)}, is_spam = {int(is_spam)}, cache_has_attachments = {int(cache_has_attachments)}, item_type = {int(item_type)}, group_title = '{group_title}', is_expirable = {int(is_expirable)}, message_source = {int(message_source)}, destination_caller_id = '{destination_caller}', ck_record_id = '{ck_record_id}', account = '{account}', service = '{service}' WHERE guid = '{guid}';"

            print_magenta(f"update => {query}")
            self.transaction_bldr(query)
        except Exception as e:
            print_red(f"cannot update message on id {guid}, {str(e)}")
            self.cursor, self.connection = self.get_cursor()
Пример #16
0
 def __set_message(self, article_title, question, answer,
                   difficulty_from_questioner, difficulty_from_answerer,
                   article_file):
     try:
         if type(question) is str:
             question = question.replace('"', "'")
         if type(answer) is str:
             answer = answer.replace('"', "'")
         query = """INSERT INTO questions_and_answers VALUES ("{}","{}","{}","{}","{}","{}")""".format(
             article_title, question, answer, difficulty_from_questioner,
             difficulty_from_answerer, article_file)
         print_cyan(f"set => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {article_title}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Пример #17
0
 def __find_message(self, article_name, question, answer):
     try:
         query = "SELECT answer, question FROM questions_and_answers WHERE article_title = '{}' AND question = '{}' AND answer = '{}' LIMIT 1".format(
             article_name, question, answer)
         if self.cursor is None:
             self.cursor, self.connection = self.get_cursor()
         self.cursor.execute(query)
         result = self.cursor.fetchone()
         if result is not None:
             return result[0]
         else:
             return False
     except Exception as e:
         print_red(f"cannot find message {str(e)}")
         self.cursor, self.connection = self.get_cursor()
         return False
Пример #18
0
 def download_dataset(self, url=dataset_source_url):
     datasets = self.__get_dataset(url)
     for dataset in datasets:
         directory = Path(f"data/packed/{dataset}")
         try:
             if not directory.exists():
                 print_blue(f"downloading file {dataset}")
                 urllib2.urlretrieve(f"{url}{dataset}",
                                     f"data/packed/{dataset}",
                                     reporthook=report_hook)
                 print_green(f"{dataset} file downloaded successfully")
             else:
                 print_red(f"{dataset} already exists!")
         except Exception as e:
             print_red(
                 f"cannot download data from url {url}{dataset}, {str(e)}")
Пример #19
0
 def __find_synonym(self, verb):
     try:
         query = "SELECT synonym_id FROM synonyms_dictionary WHERE synonym = '{}' LIMIT 1".format(
             verb)
         if self.cursor is None:
             self.cursor, self.connection = self.get_cursor()
         self.cursor.execute(query)
         result = self.cursor.fetchone()
         if result is not None:
             return result[0]
         else:
             return False
     except Exception as e:
         print_red(f"cannot find synonym {str(e)}")
         self.cursor, self.connection = self.get_cursor()
         return False
Пример #20
0
 def __find_message(self, document_id):
     try:
         query = "SELECT sentence FROM wiki_questions_and_answers WHERE document_id = '{}' LIMIT 1".format(
             document_id)
         if self.cursor is None:
             self.cursor, self.connection = self.get_cursor()
         self.cursor.execute(query)
         result = self.cursor.fetchone()
         if result is not None:
             return result[0]
         else:
             return False
     except Exception as e:
         print_red(f"cannot find message {str(e)}")
         self.cursor, self.connection = self.get_cursor()
         return False
Пример #21
0
 def __find_parent(self, pid):
     try:
         query = "SELECT comment FROM reddit_comments WHERE comment_id = '{}' LIMIT 1".format(
             pid)
         if self.cursor is None:
             self.cursor, self.connection = self.get_cursor()
         self.cursor.execute(query)
         result = self.cursor.fetchone()
         if result is not None:
             return result[0]
         else:
             return False
     except Exception as e:
         print_red(f"cannot find parent {str(e)}")
         self.cursor, self.connection = self.get_cursor()
         return False
Пример #22
0
 def insert_parent(self, has_parent, parent_id, comment_id, parent, comment,
                   subreddit, time, score):
     try:
         query = """INSERT INTO reddit_comments """
         if has_parent:
             query += """(parent_id, comment_id, parent, comment, subreddit, unix, score) VALUES ("{}", "{}", "{}", "{}", "{}", "{}", "{}")""".format(
                 parent_id, comment_id, parent, comment, subreddit,
                 int(time), score)
         else:
             query += """(parent_id, comment_id, comment, subreddit, unix, score) VALUES ("{}", "{}", "{}", "{}", "{}", "{}")""".format(
                 parent_id, comment_id, comment, subreddit, int(time),
                 score)
         print_cyan(f"insert => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(
             f"cannot insert parent comment of id {comment_id}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Пример #23
0
 def __find_translation(self, translate, non_translate):
     try:
         translate = translate.replace("'", '"')
         non_translate = non_translate.replace("'", '"')
         query = "SELECT translate_id FROM translator WHERE translate = '{}' AND non_translate = '{}' LIMIT 1".format(
             translate, non_translate)
         if self.cursor is None:
             self.cursor, self.connection = self.get_cursor()
         self.cursor.execute(query)
         result = self.cursor.fetchone()
         if result is not None:
             return result[0]
         else:
             return False
     except Exception as e:
         print_red(f"cannot find synonym {str(e)}")
         self.cursor, self.connection = self.get_cursor()
         return False
Пример #24
0
 def __set_message(self, ROWID, guid, text, handle_id, service, account, date, date_read,
                   date_delivered,
                   is_delivered,
                   is_finished, is_emote, is_from_me, is_empty, is_delayed, is_auto_reply, is_prepared, is_read,
                   is_system_message, is_sent, has_dd_results, cache_has_attachments, item_type, group_title,
                   is_expirable, message_source, ck_record_id, destination_caller, is_spam):
     try:
         text = text.replace('"', "'")
         query = """INSERT INTO imessage_chat VALUES ("{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}")""".format(
             guid,
             text,
             int(handle_id),
             int(date),
             int(date_read),
             int(date_delivered),
             int(is_delivered),
             int(is_finished),
             int(is_emote),
             int(is_from_me),
             int(is_empty),
             int(is_delayed),
             int(is_auto_reply),
             int(is_prepared),
             int(is_read),
             int(is_system_message),
             int(is_sent),
             int(has_dd_results),
             int(is_spam),
             int(cache_has_attachments),
             int(item_type),
             group_title,
             int(is_expirable),
             int(message_source),
             destination_caller,
             ck_record_id,
             service,
             account,
             ROWID)
         print_cyan(f"set => {query}")
         self.transaction_bldr(query)
     except Exception as e:
         print_red(f"cannot update message on id {guid}, {str(e)}")
         self.cursor, self.connection = self.get_cursor()
Пример #25
0
    def set_values_to_db(self):
        row_counter = 0
        paired_rows = 0
        f_list = self._get_dir_files(self.destination_folder)
        dir_path = Path(__file__).parent.parent.parent.parent
        for f_name in f_list:
            if f_name.suffix == '.json':
                file = f"{dir_path}/{f_name}"
                print_blue(file)

                with open(file, buffering=1000) as f:
                    for data in f:
                        element = json.loads(data)
                        # for element in data:
                        row_counter += 1
                        print_blue(element)
                        parent_id = element['parent_id']
                        body = self.__format_data(element['body'])
                        created_utc = element['created_utc']
                        score = element['score']
                        try:
                            comment_id = element['name']
                        except Exception as e:
                            print_yellow(
                                f"comment id by name do not exists, {e}")
                        try:
                            comment_id = element['id']
                        except Exception as e:
                            print_yellow(
                                f"comment id by id do not exists, {e}")
                        subreddit = element['subreddit']
                        parent_data = self.__find_parent(parent_id)
                        print_green(
                            f"parent_id => {parent_id}, body => {body}, created_utc => {created_utc}, comment_id => {comment_id}, subreddit => {subreddit}, parent_data => {parent_data}"
                        )
                        if score >= 2:
                            comment_score = self.__find_score(parent_id)
                            if comment_score:
                                if score > comment_score:
                                    if self.__acceptable(body):
                                        self.insert_or_replace_comment(
                                            comment_id, parent_id, parent_data,
                                            body, subreddit, created_utc,
                                            score)
                            else:
                                if self.__acceptable(body):
                                    if parent_data:
                                        self.insert_parent(
                                            True, comment_id, parent_id,
                                            parent_data, body, subreddit,
                                            created_utc, score)
                                        paired_rows += 1
                                    else:
                                        self.insert_parent(
                                            False, comment_id, parent_id, None,
                                            body, subreddit, created_utc,
                                            score)
                        self.display_rows(row_counter, data, paired_rows)
                        self.clean_rows(row_counter, data)
            else:
                print_red(f"file of name {f_name} is not a json file")