Exemplo n.º 1
0
 def run(self):       
     n_files = sum([len(files) for r, d, files in os.walk(self.read_source.folder)])
     i = 0
     for root, dirs, files in os.walk(self.read_source.folder):
         if root == '.report':
             continue
         for f in files:
             path = os.path.join(root, f)
             file_ = File()
             file_.size = os.path.getsize(path)
             file_.filename = f
             file_.extracted_path = os.path.relpath(path, self.read_source.folder)
             try:
                 file_.creation_time = datetime.fromtimestamp(os.path.getctime(path))
             except:
                 file_.creation_time = None
             try:
                 file_.modify_time = datetime.fromtimestamp(os.path.getmtime(path))
             except:
                 file_.modify_time = None
             try:
                 file_.access_time = datetime.fromtimestamp(os.path.getatime(path))
             except:
                 file_.access_time = None
             if self.calculate_hash:
                 file_.sha256 = self.sha256(path)
                 file_.md5 = self.md5(path)
             file_.deleted_state = 'Intact'
             self.add(file_)
             i += 1
             progress(i, n_files)
     self.commit()
Exemplo n.º 2
0
    def parse_user_acccounts(self):
        user_accounts_el = self.root.findall(
            f".//{self.namespace}model[@type='UserAccount']")
        if user_accounts_el:
            n = len(user_accounts_el)
            print("\nLendo contas de usuário")
            for i, user_account_el in enumerate(user_accounts_el):
                progress(i, n)
                user_account = UserAccount()

                user_account.deleted_state = user_account_el.attrib[
                    'deleted_state']

                field = user_account_el.find(
                    f"{self.namespace}field[@name='Name']")
                value = field.find(f"{self.namespace}value") if field else None
                user_account.name = value.text if value is not None else None

                field = user_account_el.find(
                    f"{self.namespace}field[@name='Username']")
                value = field.find(f"{self.namespace}value") if field else None
                user_account.username = value.text if value is not None else None

                field = user_account_el.find(
                    f"{self.namespace}field[@name='Password']")
                value = field.find(f"{self.namespace}value") if field else None
                user_account.password = value.text if value is not None else None

                field = user_account_el.find(
                    f"{self.namespace}field[@name='ServiceType']")
                value = field.find(f"{self.namespace}value") if field else None
                user_account.service_type = value.text if value is not None else None

                self.add(user_account)
            self.commit()
Exemplo n.º 3
0
    def run(self):

        # self.lista = self.getChatsFilename()
        # print("Lendo chats...")
        # n = len(self.lista)
        # for i, item in enumerate(self.lista):
        #     progress(i, n)
        #     self.read_chat(item)

        self.lista = self.getChatsFilename()
        n = len(self.lista)
        print("Lendo chats")
        pool = Pool(processes=config_manager.data['n_workers'])

        procs = ({
            'read_source_id': self.read_source.id,
            'exp': self.exp,
            'chats_path': self.chats_path,
            'att_path': self.att_path,
            'filename': f
        } for f in self.lista)
        for i, _ in enumerate(pool.imap_unordered(chat_worker, procs)):
            progress(i, n)
        pool.close()
        pool.join()
Exemplo n.º 4
0
    def parse_sms(self):
        smss_el = self.root.find(f".//{self.namespace}modelType[@type='SMS']")
        if smss_el:
            n = len(smss_el)
            print("\nLendo SMS")
            for i, sms_el in enumerate(smss_el):
                progress(i, n)
                sms = Sms()

                sms.deleted_state = sms_el.attrib['deleted_state']

                field = sms_el.find(
                    f"{self.namespace}field[@name='TimeStamp']")
                value = field.find(f"{self.namespace}value") if field else None
                sms.timestamp = parser.parse(
                    value.text) if value is not None else None

                field = sms_el.find(f"{self.namespace}field[@name='Folder']")
                value = field.find(f"{self.namespace}value") if field else None
                sms.folder = value.text if value is not None else None

                field = sms_el.find(f"{self.namespace}field[@name='Status']")
                value = field.find(f"{self.namespace}value") if field else None
                sms.status = value.text if value is not None else None

                field = sms_el.find(f"{self.namespace}field[@name='Body']")
                value = field.find(f"{self.namespace}value") if field else None
                sms.body = value.text if value is not None else None

                parts_el = sms_el.find(
                    f"{self.namespace}multiModelField[@name='Parties']")
                if parts_el:
                    for part_el in parts_el:
                        part = SmsPart()
                        part.deleted_state = part_el.attrib['deleted_state']

                        field = part_el.find(
                            f"{self.namespace}field[@name='Identifier']")
                        value = field.find(
                            f"{self.namespace}value") if field else None
                        part.identifier = value.text if value is not None else None

                        field = part_el.find(
                            f"{self.namespace}field[@name='Name']")
                        value = field.find(
                            f"{self.namespace}value") if field else None
                        part.name = value.text if value is not None else None

                        field = part_el.find(
                            f"{self.namespace}field[@name='Role']")
                        value = field.find(
                            f"{self.namespace}value") if field else None
                        part.role = value.text if value is not None else None

                        self.add(part)
                        sms.parties.append(part)
                self.add(sms)
            self.commit()
Exemplo n.º 5
0
    def parse_call(self):
        calls_el = self.root.find(
            f".//{self.namespace}modelType[@type='Call']")
        if calls_el:
            n = len(calls_el)
            print("\nLendo chamadas")
            for i, call_el in enumerate(calls_el):
                progress(i, n)
                call = Call()

                call.deleted_state = call_el.attrib['deleted_state']

                field = call_el.find(f"{self.namespace}field[@name='Type']")
                value = field.find(f"{self.namespace}value") if field else None
                call.type_ = value.text if value is not None else None

                field = call_el.find(
                    f"{self.namespace}field[@name='TimeStamp']")
                value = field.find(f"{self.namespace}value") if field else None
                call.timestamp = parser.parse(
                    value.text) if value is not None else None

                field = call_el.find(
                    f"{self.namespace}field[@name='Duration']")
                value = field.find(f"{self.namespace}value") if field else None
                call.duration = duration_parse(
                    value.text) if value is not None else None

                parts_el = call_el.find(
                    f"{self.namespace}multiModelField[@name='Parties']")
                if parts_el:
                    for part_el in parts_el:
                        part = CallPart()

                        part.deleted_state = part_el.attrib['deleted_state']

                        field = part_el.find(
                            f"{self.namespace}field[@name='Identifier']")
                        value = field.find(
                            f"{self.namespace}value") if field else None
                        part.identifier = value.text if value is not None else None

                        field = part_el.find(
                            f"{self.namespace}field[@name='Name']")
                        value = field.find(
                            f"{self.namespace}value") if field else None
                        part.name = value.text if value is not None else None

                        field = part_el.find(
                            f"{self.namespace}field[@name='Role']")
                        value = field.find(
                            f"{self.namespace}value") if field else None
                        part.role = value.text if value is not None else None

                        self.add(part)
                        call.parties.append(part)
                self.add(call)
            self.commit()
Exemplo n.º 6
0
    def parse_files(self):
        tagged_fiels_el = self.root.find(f".//{self.namespace}taggedFiles")
        files_el = tagged_fiels_el.findall(
            f"{self.namespace}file") if tagged_fiels_el else None
        if files_el:
            n = len(files_el)
            print("\nLendo arquivos")
            for i, file_el in enumerate(files_el):
                progress(i, n)
                file_ = File()

                try:
                    file_.deleted_state = file_el.attrib['deleted']
                except:
                    file_.deleted_state = "Unknow"

                file_.original_path = file_el.attrib['path']

                file_.size = file_el.attrib['size']

                value = file_el.find(
                    f".//{self.namespace}timestamp[@name='CreationTime']")
                file_.creation_time = parser.parse(
                    value.text) if value is not None else None

                value = file_el.find(
                    f".//{self.namespace}timestamp[@name='ModifyTime']")
                file_.modify_time = parser.parse(
                    value.text) if value is not None else None

                value = file_el.find(
                    f".//{self.namespace}timestamp[@name='AccessTime']")
                file_.access_time = parser.parse(
                    value.text) if value is not None else None

                value = file_el.find(
                    f".//{self.namespace}item[@name='Local Path']")
                file_.extracted_path = value.text if value is not None else None

                value = file_el.find(
                    f".//{self.namespace}item[@name='SHA256']")
                file_.sha256 = value.text if value is not None else None

                value = file_el.find(f".//{self.namespace}item[@name='MD5']")
                file_.md5 = value.text if value is not None else None

                value = file_el.find(f".//{self.namespace}item[@name='Tags']")
                file_.type_ = value.text if value is not None else None

                value = file_el.find(
                    f".//{self.namespace}item[@name='ContentType']")
                file_.content_type = value.text if value is not None else None

                self.add(file_)
            self.commit()
Exemplo n.º 7
0
 def run(self):
     print("Excluindo entradas de arquivos duplicadas no DB")
     attachments = db_session.query(File.id).filter(
         File.message_id != None,
         File.read_source_id == self.read_source.id).all()
     chunks_ = list(chunks(attachments, 100))
     n = len(chunks_)
     pool = Pool(processes=config_manager.data['n_workers'])
     for i, _ in enumerate(pool.imap_unordered(worker, chunks_)):
         progress(i, n)
     pool.close()
     pool.join()
Exemplo n.º 8
0
    def render_chats(self):
        print("Renderizando chats")

        query = db_session.query(Chat.id)
        chats = self.report_bundle.filter(Chat, query).all()
        n = len(chats)
        pool = Pool(processes=config_manager.data['n_workers'])
        procs = ({'report_bundle': self.report_bundle, 'chat_id': item[0]} for item in chats)
        for i, _ in enumerate(pool.imap_unordered(chat_worker, procs)):
            progress(i, n)
        pool.close()
        pool.join()
Exemplo n.º 9
0
 def calculate_hashes(self):
     self.hash_dict = {}
     print("Calculando hash dos arquivos na pasta de anexos")
     pool = Pool(processes=config_manager.data['n_workers'])
     path = Path(self.attachments_folder)
     files = [path for path in path.glob('**/*') if path.is_file()]
     n = len(files)
     for i, data in enumerate(pool.imap_unordered(hash_worker, files)):
         progress(i, n)
         self.hash_dict[data[0]] =  data[1]
     pool.close()
     pool.join()
     print("Hashes calculados")
 def run(self):
     if not os.path.exists(self.chats_path):
         os.mkdir(self.chats_path)
     if not os.path.exists(self.att_path):
         os.mkdir(self.att_path)
     instruct_continue(
         f"Mova os arquivos que tem o texto das mensagens para a pasta '{self.chats_path}' e os anexos para a pasta '{self.att_path}'")
     self.lista = self.getChatsFilename()
     print("Lendo chats...")
     n = len(self.lista)
     for i, item in enumerate(self.lista):
         print(f"\nLendo conversa '{item}'")
         progress(i, n)
         self.read_chat(item)
Exemplo n.º 11
0
    def run(self):
        self.path_sqlite = Path(self.read_source.folder) / "ChatStorage.sqlite"
        self.attachments_folder = Path(self.read_source.folder) / "Media"
        self.conn = sqlite3.connect(str(self.path_sqlite))
        self.df_messages = pd.read_sql("SELECT * FROM ZWAMESSAGE", self.conn)
        self.df_chat_session = pd.read_sql("SELECT * FROM ZWACHATSESSION",
                                           self.conn)
        self.df_profile_push_name = pd.read_sql(
            "SELECT * FROM ZWAPROFILEPUSHNAME", self.conn)
        self.df_media_item = pd.read_sql("SELECT * FROM ZWAMEDIAITEM",
                                         self.conn)
        self.df_group_member = pd.read_sql("SELECT * FROM ZWAGROUPMEMBER",
                                           self.conn)

        current_chat = self.df_messages.iloc[0]['ZCHATSESSION']
        chat_id, chat_name = self.__get_chat_id_name(current_chat)
        chat = self.add_chat(chat_id, chat_name)
        n = self.df_messages.shape[0]

        for i, item in enumerate(
                self.df_messages.sort_values(by=['ZCHATSESSION']).iterrows()):
            row = item[1]
            progress(i, n)
            if current_chat != row['ZCHATSESSION']:
                self.add(chat)
                chat_id, chat_name = self.__get_chat_id_name(
                    row['ZCHATSESSION'])
                chat = self.add_chat(chat_id, chat_name)
                current_chat = row['ZCHATSESSION']
            message = Message()
            jid, from_name = self.__get_from_name(row['ZFROMJID'],
                                                  row['ZGROUPMEMBER'])
            message.from_ = self.add_participant(jid, from_name)
            if not message.from_ in chat.participants and message.from_:
                chat.participants.append(message.from_)
            message.timestamp = convert_timestamp(row['ZMESSAGEDATE'])
            message.read_source = self.read_source
            message.chat_id = chat.id
            message.body = row['ZTEXT']
            if row['ZMEDIAITEM']:
                res = self.add_file(row['ZMEDIAITEM'])
                if res:
                    file, title = res
                    file.message = message
                    message.body = title
                    self.add(file)
            self.add(message)
        self.commit()
Exemplo n.º 12
0
 def parse_chats(self):
     chats_el = self.root.find(
         f".//{self.namespace}modelType[@type='Chat']")
     if chats_el:
         n = len(chats_el)
         print("Lendo chats")
         pool = Pool(processes=config_manager.data['n_workers'])
         procs = ({
             'read_source_id': self.read_source.id,
             'namespace': self.namespace,
             'chat_el': chat_el
         } for chat_el in chats_el)
         for i, _ in enumerate(pool.imap_unordered(chat_worker, procs)):
             progress(i, n)
         pool.close()
         pool.join()
Exemplo n.º 13
0
    def parse_contact(self):
        contacts_el = self.root.find(
            f".//{self.namespace}modelType[@type='Contact']")
        if contacts_el:
            n = len(contacts_el)
            print("\nLendo Contatos")
            for i, contact_el in enumerate(contacts_el):
                progress(i, n)
                contact = Contact()

                contact.deleted_state = contact_el.attrib['deleted_state']

                field = contact_el.find(f"{self.namespace}field[@name='Name']")
                value = field.find(f"{self.namespace}value") if field else None
                contact.name = value.text if value is not None else None

                field = contact_el.find(
                    f"{self.namespace}field[@name='Source']")
                value = field.find(f"{self.namespace}value") if field else None
                contact.source = value.text if value is not None else None

                entries_el = contact_el.find(
                    f"{self.namespace}multiModelField[@name='Entries']")
                if entries_el:
                    for entry_el in entries_el:
                        entry = ContactEntry()

                        entry.deleted_state = entry_el.attrib['deleted_state']

                        field = entry_el.find(
                            f"{self.namespace}field[@name='Category']")
                        value = field.find(
                            f"{self.namespace}value") if field else None
                        entry.category = value.text if value is not None else None

                        field = entry_el.find(
                            f"{self.namespace}field[@name='Value']")
                        value = field.find(
                            f"{self.namespace}value") if field else None
                        entry.value = value.text if value is not None else None

                        self.add(entry)
                        # self.commit()
                        contact.entries.append(entry)
                self.add(contact)
            self.commit()
Exemplo n.º 14
0
 def parse_files(self):
     tagged_fiels_el = self.root.find(f".//{self.namespace}taggedFiles")
     files_el = tagged_fiels_el.findall(
         f"{self.namespace}file") if tagged_fiels_el else None
     if files_el:
         chunks_ = list(chunks(files_el, 50))
         n = len(chunks_)
         print("\nLendo arquivos")
         pool = Pool(processes=config_manager.data['n_workers'])
         procs = ({
             'read_source_id': self.read_source.id,
             'namespace': self.namespace,
             'chunk': chunk
         } for chunk in chunks_)
         for i, _ in enumerate(pool.imap_unordered(files_worker, procs)):
             progress(i, n)
         pool.close()
         pool.join()
Exemplo n.º 15
0
    def run(self):
        if not config_manager.data['thumbnails']['video']:
            return
        folder = Path(self.read_source.folder) / "sinf_thumbs"
        thumbs_generator = ThumbsGenerator(folder)
        thumbs_generator.set_config(
            n_rows=config_manager.data['thumbnails']['n_rows'], n_cols=config_manager.data['thumbnails']['n_cols'],
            thumb_size=config_manager.data['thumbnails']['image_thumb_size'],
            extension=config_manager.data['thumbnails']['extension'])

        files = db_session.query(File.id).filter_by(type_='video').filter(
            File.extracted_path != None, File.read_source == self.read_source).all()
       
        print("\nGerando thumbs dos vídeos")
        chunks_ = list(chunks(files, 2))
        n = len(chunks_)
        pool = Pool(processes=config_manager.data['n_workers'])
        procs = ((thumbs_generator, chunk) for chunk in chunks_)
        for i, _ in enumerate(pool.imap_unordered(worker, procs)):
            progress(i, n)
        pool.close()
        pool.join()
Exemplo n.º 16
0
 def parse_chats(self):
     n = self.messages.shape[0]
     print("Lendo mensagens do sqlite")
     for i, mess in enumerate(self.get_messages()):
         progress(i, n)
         chat = self.add_chat(mess['chat'])
         message = Message()
         message.from_ = self.add_participant(
             mess['from']['identifier'], mess['from']['name'])
         message.body = mess['body']
         message.deleted_state = 'Intact'
         message.timestamp = mess['timestamp']
         message.chat_id = chat.id
         for att in mess['attachments']:
             attachment = File()
             attachment.filename = att['name']
             attachment.content_type = att['mime_type']
             attachment.size = att['size']
             attachment.extracted_path = os.path.relpath(att['extracted_path'], self.read_source.folder)
             attachment.meta_data = att['caption']
             self.add(attachment)
             message.attachments.append(attachment)
         self.add(message)
     self.commit()
Exemplo n.º 17
0
    def run(self):
        self.__read_database()
        stm = self.table_messages.select().order_by(
            sa.asc(self.table_messages.c.thread_key),
            sa.asc(self.table_messages.c.timestamp_ms))
        res_messages = self.conn.execute(stm).fetchall()

        m = res_messages[0]
        chat = self.add_chat(m['thread_key'], m['thread_key'])
        current_thread_key = m['thread_key']
        n = len(res_messages)
        for i, m in enumerate(res_messages):
            progress(i, n)
            if current_thread_key != m['thread_key']:
                db_session.add(chat)
                chat = self.add_chat(m['thread_key'], m['thread_key'])
                current_thread_key = m['thread_key']
            message = Message()
            sender = json.loads(
                m['sender']) if m['sender'] is not None else None
            if sender:
                message.from_ = self.add_participant(sender['user_key'],
                                                     sender['name'])
                if not message.from_ in chat.participants:
                    chat.participants.append(message.from_)
            try:
                timestamp = datetime.fromtimestamp(
                    int(m['timestamp_ms']) / 1000)
            except:
                timestamp = None
            message.timestamp = timestamp
            message.read_source = self.read_source
            message.chat_id = chat.id
            message.body = m['text']
            db_session.add(message)
        db_session.commit()
Exemplo n.º 18
0
    def run(self):
        self.__read_database()
        stm = select(self.cols).select_from(
            sa.join(self.table_messages,
                    self.table_threads,
                    self.table_messages.c.thread_key ==
                    self.table_threads.c.thread_key,
                    isouter=True)).order_by(
                        sa.asc(self.table_messages.c.thread_key),
                        sa.asc(self.table_messages.c.timestamp))
        res_messages = self.conn.execute(stm).fetchall()

        m = res_messages[0]
        chat = self.add_chat(m['thread_key'], m['thread_name'])
        current_thread_key = m['thread_key']
        n = len(res_messages)
        for i, m in enumerate(res_messages):
            progress(i, n)
            if current_thread_key != m['thread_key']:
                db_session.add(chat)
                chat = self.add_chat(m['thread_key'], m['thread_name'])
                current_thread_key = m['thread_key']
            message = Message()
            message.from_ = self.add_participant(m['user_id'], m['sender'])
            if not message.from_ in chat.participants:
                chat.participants.append(message.from_)
            try:
                timestamp = datetime.fromtimestamp(int(m['timestamp']) / 1000)
            except:
                timestamp = None
            message.timestamp = timestamp
            message.read_source = self.read_source
            message.chat_id = chat.id
            message.body = m['snippet']
            db_session.add(message)
        db_session.commit()
Exemplo n.º 19
0
    def parse_chats(self):
        chats_el = self.root.find(
            f".//{self.namespace}modelType[@type='Chat']")
        if chats_el:
            n = len(chats_el)
            print("Lendo chats")
            for i, chat_el in enumerate(chats_el):
                progress(i, n)
                chat = Chat()

                chat.deleted_state = chat_el.attrib['deleted_state']

                field = chat_el.find(f"{self.namespace}field[@name='Id']")
                value = field.find(f"{self.namespace}value") if field else None
                chat.identifier = value.text if value is not None else ""

                field = chat_el.find(f"{self.namespace}field[@name='Name']")
                value = field.find(f"{self.namespace}value") if field else None
                chat.name = value.text if value is not None else ""

                field = chat_el.find(f"{self.namespace}field[@name='Source']")
                value = field.find(f"{self.namespace}value") if field else None
                chat.source = value.text if value is not None else ""

                field = chat_el.find(
                    f"{self.namespace}field[@name='StartTime']")
                value = field.find(f"{self.namespace}value") if field else None
                chat.start_time = parser.parse(
                    value.text) if value is not None else None

                field = chat_el.find(
                    f"{self.namespace}field[@name='LastActivity']")
                value = field.find(f"{self.namespace}value") if field else None
                chat.last_activity = parser.parse(
                    value.text) if value is not None else None

                field = chat_el.find(
                    f"{self.namespace}multiModelField[@name='Participants']")
                for participant_el in field:
                    field = participant_el.find(
                        f"{self.namespace}field[@name='Identifier']")
                    value = field.find(
                        f"{self.namespace}value") if field else None
                    identifier = value.text if value is not None else None

                    field = participant_el.find(
                        f"{self.namespace}field[@name='Name']")
                    value = field.find(
                        f"{self.namespace}value") if field else None
                    name = value.text if value is not None else None

                    participant = self.add_participant(identifier, name)
                    if not participant in chat.participants:
                        chat.participants.append(participant)

                field = chat_el.find(
                    f"{self.namespace}multiModelField[@name='Messages']")
                if field:
                    for j, message_el in enumerate(field):
                        message = Message()

                        message.deleted_state = message_el.attrib[
                            'deleted_state']

                        field = message_el.find(
                            f"{self.namespace}field[@name='Body']")
                        value = field.find(
                            f"{self.namespace}value") if field else None
                        message.body = value.text if value is not None else None

                        field = message_el.find(
                            f"{self.namespace}field[@name='TimeStamp']")
                        value = field.find(
                            f"{self.namespace}value") if field else None
                        message.timestamp = parser.parse(
                            value.text) if value is not None else None

                        from_el = message_el.find(
                            f"{self.namespace}modelField[@name='From']").find(
                                f"{self.namespace}model")
                        from_identifier = from_name = None
                        if from_el:
                            field = from_el.find(
                                f"{self.namespace}field[@name='Identifier']")
                            value = field.find(
                                f"{self.namespace}value") if field else None
                            from_identifier = value.text if value is not None else None
                            field = from_el.find(
                                f"{self.namespace}field[@name='Name']")
                            value = field.find(
                                f"{self.namespace}value") if field else None
                            from_name = value.text if value is not None else None
                            message.from_ = self.add_participant(
                                from_identifier, from_name)

                        attachments_el = message_el.find(
                            f"{self.namespace}multiModelField[@name='Attachments']"
                        )
                        if attachments_el:
                            for attachment_el in attachments_el:
                                attachment = File()

                                attachment.deleted_state = attachment_el.attrib[
                                    'deleted_state']

                                field = attachment_el.find(
                                    f"{self.namespace}field[@name='Filename']")
                                value = field.find(f"{self.namespace}value"
                                                   ) if field else None
                                attachment.filename = value.text if value is not None else None

                                field = attachment_el.find(
                                    f"{self.namespace}field[@name='attachment_extracted_path']"
                                )
                                value = field.find(f"{self.namespace}value"
                                                   ) if field else None
                                attachment.extracted_path = value.text if value is not None else None

                                field = attachment_el.find(
                                    f"{self.namespace}field[@name='ContentType']"
                                )
                                value = field.find(f"{self.namespace}value"
                                                   ) if field else None
                                attachment.content_type = value.text if value is not None else None

                                field = attachment_el.find(
                                    f"{self.namespace}field[@name='MetaData']")
                                value = field.find(f"{self.namespace}value"
                                                   ) if field else None
                                attachment.meta_data = value.text if value is not None else None

                                self.add(attachment)
                                message.attachments.append(attachment)
                        self.add(message)
                        chat.messages.append(message)
                    self.add(chat)
            self.commit()