Пример #1
0
 def _redirect_messages_to_save(self, data, slip):
     data = data.read().decode()
     json_data = json.loads(data)
     if "messages" in json_data:
         for message in json_data["messages"]:
             download_uri = self.get_message_uri(message)
             _filetitle = message["id"] + ".txt"
             filetitle = Common.safe_file_name(_filetitle)
             self.file_size_bytes += int(message["sizeEstimate"])
             if filetitle != filetitle:
                 self.project.log(
                     "exception", "Normalized '{}' to '{}'".format(
                         _filetitle, filetitle), "warning", True)
             self.content_downloader.put(
                 Downloader.DownloadSlip(download_uri, message, filetitle,
                                         "snippet"))
Пример #2
0
    def sync(self):
        d1 = datetime.now()
        self.d = Downloader.Downloader
        self.content_downloader = Downloader.Downloader
        self.meta_downloader = Downloader.Downloader(
            self.project, self.oauth_provider.http_intercept,
            self._save_metadata, self.oauth_provider.get_auth_header,
            self.project.threads)

        if self.project.args.mode == "full":
            self.project.log("transaction", "Full acquisition initiated",
                             "info", True)
            self.d = Downloader.Downloader(self.project,
                                           self.oauth_provider.http_intercept,
                                           self._redirect_messages_to_save,
                                           self.oauth_provider.get_auth_header,
                                           self.project.threads)
            self.content_downloader = Downloader.Downloader(
                self.project, self.oauth_provider.http_intercept,
                self._save_raw_mail, self.oauth_provider.get_auth_header,
                self.project.threads)
        else:
            self.project.log("transaction", "Metadata acquisition initiated",
                             "info", True)

        self.initialize_items()
        cnt = len(self.threads)
        self.project.log(
            "transaction",
            "Total threads queued for acquisition: {}".format(cnt), "info",
            True)
        self.metadata()

        for thread in self.threads:
            self.project.log("transaction",
                             'Calculating "{}"'.format(thread['snippet']),
                             "info", True)
            savepath = ""
            if self.project.args.mode == "full":
                download_uri = self.get_thread_uri(thread, "minimal")
                self.d.put(
                    Downloader.DownloadSlip(download_uri, thread, savepath,
                                            'id'))

            meta_uri = self.get_thread_uri(thread, "metadata")
            self.meta_downloader.put(
                Downloader.DownloadSlip(meta_uri, thread, savepath, 'id'))

        if self.project.args.mode == "full":
            self.d.start()
            self.d.wait_for_complete()
            self.project.log(
                "transaction",
                "Total size of mail to be acquired is {}".format(
                    Common.sizeof_fmt(self.file_size_bytes,
                                      "B")), "highlight", True)
            self.mbox_dir = os.path.join(self.project.acquisition_dir, "mbox")
            os.makedirs(self.mbox_dir, exist_ok=True)

        if self.project.args.prompt:
            IO.get("Press ENTER to begin acquisition...")

        if self.project.args.mode == "full":
            self.content_downloader.start()
            self.content_downloader.wait_for_complete()

        self.meta_downloader.start()
        self.meta_downloader.wait_for_complete()

        d2 = datetime.now()
        delt = d2 - d1
        self.project.log("transaction",
                         "Acquisition completed in {}".format(str(delt)),
                         "highlight", True)
Пример #3
0
    def sync(self):
        d1 = datetime.now()
        d = Downloader.Downloader
        if self.project.args.mode == "full":
            self.project.log("transaction", "Full acquisition initiated",
                             "info", True)
            d = Downloader.Downloader(self.project,
                                      self.oauth_provider.http_intercept,
                                      self._save_file,
                                      self.oauth_provider.get_auth_header,
                                      self.project.threads)
        else:
            self.project.log("transaction", "Metadata acquisition initiated",
                             "info", True)

        self.initialize_items()
        cnt = len(self.files)
        self.project.log("transaction",
                         "Total items queued for acquisition: " + str(cnt),
                         "info", True)
        self.metadata()

        trash_folder = os.path.join(self.project.acquisition_dir, "trash")
        trash_metadata_folder = os.path.join(self.project.acquisition_dir,
                                             "trash_metadata")

        for file in self.files:
            self.project.log("transaction", "Calculating " + file['title'],
                             "info", True)
            download_uri = self._get_download_url(file)
            parentmap = self._get_parent_mapping(file, self.files)

            filetitle = self._get_file_name(file)
            if filetitle != file['title']:
                self.project.log(
                    "exception", "Normalized '" + file['title'] + "' to '" +
                    filetitle + "'", "warning", True)

            if file['labels']['trashed'] == True:
                save_download_path = os.path.join(trash_folder, parentmap)
                save_metadata_path = os.path.join(trash_metadata_folder,
                                                  parentmap)
                save_download_path = os.path.normpath(
                    os.path.join(save_download_path, filetitle))
                save_metadata_path = os.path.normpath(
                    os.path.join(save_metadata_path, filetitle + '.json'))
            else:
                save_download_path = os.path.normpath(
                    os.path.join(
                        os.path.join(self.project.project_folders["data"],
                                     parentmap), filetitle))
                save_metadata_path = os.path.normpath(
                    os.path.join(
                        os.path.join(self.project.project_folders["metadata"],
                                     parentmap), filetitle + ".json"))

            save_download_path = Common.assert_path(save_download_path,
                                                    self.project)
            save_metadata_path = Common.assert_path(save_metadata_path,
                                                    self.project)

            if self.project.args.mode == "full":
                if save_download_path:
                    v = {
                        "remote_file": os.path.join(parentmap, file['title']),
                        "local_file": save_download_path
                    }

                    download_file = True
                    if 'md5Checksum' in file:
                        v['remote_hash'] = file['md5Checksum']

                    if os.path.isfile(save_download_path):
                        if 'md5Checksum' in file:
                            file_hash = Common.hashfile(
                                open(save_download_path, 'rb'), hashlib.md5())
                            if file_hash == file['md5Checksum']:
                                download_file = False
                                self.project.log(
                                    "exception",
                                    "Local and remote hash matches for " +
                                    file['title'] + " ... Skipping download",
                                    "warning", True)
                            else:
                                self.project.log(
                                    "exception",
                                    "Local and remote hash differs for " +
                                    file['title'] +
                                    " ... Queuing for download", "critical",
                                    True)

                        else:
                            self.project.log(
                                "exception",
                                "No hash information for file ' " +
                                file['title'] + "'", "warning", True)

                    if download_file and download_uri:
                        self.project.log(
                            "transaction",
                            "Queueing " + file['title'] + " for download...",
                            "info", True)
                        d.put(
                            Downloader.DownloadSlip(download_uri, file,
                                                    save_download_path,
                                                    'title'))
                        if 'fileSize' in file:
                            self.file_size_bytes += int(file['fileSize'])

                    # If it's a file we can add it to verification file
                    if download_uri:
                        self.verification.append(v)

            if save_metadata_path:
                self._save_file(
                    json.dumps(file, sort_keys=True, indent=4),
                    Downloader.DownloadSlip(download_uri, file,
                                            save_metadata_path, 'title'),
                    False)

        self.project.log(
            "transaction", "Total size of files to be acquired is {}".format(
                Common.sizeof_fmt(self.file_size_bytes, "B")), "highlight",
            True)

        if self.project.args.prompt:
            IO.get("Press ENTER to begin acquisition...")

        d.start()
        d.wait_for_complete()
        d2 = datetime.now()
        delt = d2 - d1
        self.verify()
        self.project.log("transaction",
                         "Acquisition completed in {}".format(str(delt)),
                         "highlight", True)
Пример #4
0
    def sync(self):
        d1 = datetime.now()
        d = Downloader.Downloader(self.project,
                                  self.oauth_provider.http_intercept,
                                  self._save_file,
                                  self.oauth_provider.get_auth_header,
                                  self.project.threads)
        if self.project.args.mode == "full":
            self.project.log("transaction", "Full acquisition initiated",
                             "info", True)
        else:
            self.project.log("transaction", "Metadata acquisition initiated",
                             "info", True)

        self.initialize_items()
        cnt = len(self.files)

        self.project.log("transaction",
                         "Total items queued for acquisition: " + str(cnt),
                         "info", True)
        self.metadata()

        for file in self.files:
            self.project.log("transaction", "Calculating " + file['path'],
                             "info", True)

            if file['is_dir'] == False:
                download_uri = lambda f=file: self._get_download_uri(f)
                metadata_download_uri = self.oauth_provider.config[
                    'API_ENDPOINT'] + '/metadata/auto' + file['path']
                parentmap = self._get_parent_mapping(file)
                filetitle = self._get_file_name(file)
                orig = os.path.basename(file['path'])
                if filetitle != orig:
                    self.project.log(
                        "exception",
                        "Normalized '{}' to '{}'".format(orig, filetitle),
                        "warning", True)

                if 'bytes' in file:
                    self.file_size_bytes += int(file['bytes'])

                save_metadata_path = Common.assert_path(
                    os.path.normpath(
                        os.path.join(
                            os.path.join(
                                self.project.project_folders['metadata'],
                                parentmap), filetitle + ".json")),
                    self.project)
                if save_metadata_path:
                    self.project.log(
                        "transaction",
                        "Queueing {} for download...".format(orig), "info",
                        True)
                    d.put(
                        Downloader.DownloadSlip(metadata_download_uri, file,
                                                save_metadata_path, 'path'))

                if self.project.args.mode == "full":
                    save_download_path = Common.assert_path(
                        os.path.normpath(
                            os.path.join(
                                os.path.join(
                                    self.project.project_folders['data'],
                                    parentmap), filetitle)), self.project)
                    if save_download_path:
                        self.project.log(
                            "transaction",
                            "Queueing {} for download...".format(orig), "info",
                            True)
                        d.put(
                            Downloader.DownloadSlip(download_uri, file,
                                                    save_download_path,
                                                    'path'))

        self.project.log(
            "transaction", "Total size of files to be acquired is {}".format(
                Common.sizeof_fmt(self.file_size_bytes, "B")), "highlight",
            True)
        if self.project.args.prompt:
            IO.get("Press ENTER to begin acquisition...")

        d.start()
        d.wait_for_complete()
        d2 = datetime.now()
        delt = d2 - d1

        self.project.log("transaction",
                         "Acquisition completed in {}".format(str(delt)),
                         "highlight", True)