def sync(self): d1 = datetime.now() d = Downloader.Downloader(self.project, self.oauth_provider.http_intercept, self._save_file, self.oauth_provider.get_auth_header, self.project.threads) if self.project.args.mode == "full": self.project.log("transaction", "Full acquisition initiated", "info", True) else: self.project.log("transaction", "Metadata acquisition initiated", "info", True) self.initialize_items() cnt = len(self.files) self.project.log("transaction", "Total items queued for acquisition: " + str(cnt), "info", True) self.metadata() for file in self.files: self.project.log("transaction", "Calculating " + file['path'], "info", True) if file['is_dir'] == False: download_uri = lambda f=file: self._get_download_uri(f) metadata_download_uri = self.oauth_provider.config['API_ENDPOINT'] + '/metadata/auto' + file['path'] parentmap = self._get_parent_mapping(file) filetitle = self._get_file_name(file) orig = os.path.basename(file['path']) if filetitle != orig: self.project.log("exception", "Normalized '{}' to '{}'".format(orig, filetitle), "warning", True) if 'bytes' in file: self.file_size_bytes += int(file['bytes']) save_metadata_path = Common.assert_path(os.path.normpath(os.path.join(os.path.join(self.project.project_folders['metadata'], parentmap), filetitle + ".json")), self.project) if save_metadata_path: self.project.log("transaction", "Queueing {} for download...".format(orig), "info", True) d.put(Downloader.DownloadSlip(metadata_download_uri, file, save_metadata_path, 'path')) if self.project.args.mode == "full": save_download_path = Common.assert_path(os.path.normpath(os.path.join(os.path.join(self.project.project_folders['data'], parentmap), filetitle)), self.project) if save_download_path: self.project.log("transaction", "Queueing {} for download...".format(orig), "info", True) d.put(Downloader.DownloadSlip(download_uri, file, save_download_path, 'path')) self.project.log("transaction", "Total size of files to be acquired is {}".format(Common.sizeof_fmt(self.file_size_bytes, "B")), "highlight", True) if self.project.args.prompt: IO.get("Press ENTER to begin acquisition...") d.start() d.wait_for_complete() d2 = datetime.now() delt = d2 - d1 self.project.log("transaction", "Acquisition completed in {}".format(str(delt)), "highlight", True)
def sync(self): d1 = datetime.now() self.d = Downloader.Downloader self.content_downloader = Downloader.Downloader self.meta_downloader = Downloader.Downloader(self.project, self.oauth_provider.http_intercept, self._save_metadata, self.oauth_provider.get_auth_header, self.project.threads) if self.project.args.mode == "full": self.project.log("transaction", "Full acquisition initiated", "info", True) self.d = Downloader.Downloader(self.project, self.oauth_provider.http_intercept, self._redirect_messages_to_save, self.oauth_provider.get_auth_header, self.project.threads) self.content_downloader = Downloader.Downloader(self.project, self.oauth_provider.http_intercept, self._save_raw_mail, self.oauth_provider.get_auth_header, self.project.threads) else: self.project.log("transaction", "Metadata acquisition initiated", "info", True) self.initialize_items() cnt = len(self.threads) self.project.log("transaction", "Total threads queued for acquisition: {}".format(cnt), "info", True) self.metadata() for thread in self.threads: self.project.log("transaction", 'Calculating "{}"'.format(thread['snippet']), "info", True) savepath = "" if self.project.args.mode == "full": download_uri = self.get_thread_uri(thread, "minimal") self.d.put(Downloader.DownloadSlip(download_uri, thread, savepath, 'id')) meta_uri = self.get_thread_uri(thread, "metadata") self.meta_downloader.put(Downloader.DownloadSlip(meta_uri, thread, savepath, 'id')) if self.project.args.mode == "full": self.d.start() self.d.wait_for_complete() self.project.log("transaction", "Total size of mail to be acquired is {}".format(Common.sizeof_fmt(self.file_size_bytes,"B")), "highlight", True) self.mbox_dir = os.path.join(self.project.acquisition_dir, "mbox") os.makedirs(self.mbox_dir, exist_ok=True) if self.project.args.prompt: IO.get("Press ENTER to begin acquisition...") if self.project.args.mode == "full": self.content_downloader.start() self.content_downloader.wait_for_complete() self.meta_downloader.start() self.meta_downloader.wait_for_complete() d2 = datetime.now() delt = d2 - d1 self.project.log("transaction", "Acquisition completed in {}".format(str(delt)), "highlight", True)
def log(self, type, message, level, stdout=False): levels = {} levels['info'] = 20 levels['warning'] = 30 levels['error'] = 40 levels['critical'] = 50 levels['highlight'] = 20 if stdout: IO.put(message, level) if type.lower() == "exception": self.exception_logger.log(levels[level], message) self.transaction_logger.log(levels[level], message)
def get_access_token(self, client_id, client_secret): response_type = 'code' query_string = {} if self.provider == "google": query_string = ( {'redirect_uri': self.config['REDIRECT_URI'], 'response_type': response_type, 'client_id': client_id, 'scope': self.project.config['OAUTH_SCOPE'], 'approval_prompt': 'force', 'access_type': 'offline'}) elif self.provider == "dropbox": query_string = ({'response_type': response_type, 'client_id': client_id}) params = urllib.parse.urlencode(query_string) step1 = self.config['OAUTH_ENDPOINT'] + '?' + params Common.launch_browser(step1) code = IO.get("Authorization Code:") query_string = ({'code': code, 'grant_type': 'authorization_code', 'client_id': client_id, 'client_secret': client_secret}) if self.provider == "google": query_string['scope'] = '' query_string['redirect_uri'] = self.config['REDIRECT_URI'] params = urllib.parse.urlencode(query_string) response = Common.webrequest(self.config['TOKEN_ENDPOINT'], {'content-type': 'application/x-www-form-urlencoded;charset=utf-8'}, self.http_intercept, params) json_response = json.loads(response) self.parse_token(json_response) self.project.save("OAUTH", self.oauth)
def authorize(self): self.project.log( "transaction", "Initiating OAUTH2 Protocol with " + self.config['TOKEN_ENDPOINT'], "info", True) key_exists = self.oauth.get(self.key_to_the_kingdom) if not key_exists: self.project.log( "transaction", "No valid {} found...".format(self.key_to_the_kingdom), "warning", True) c_id = self.project.config.get("CLIENT_ID") c_secret = self.project.config.get("CLIENT_SECRET") if not c_id or not c_secret: self.project.log( "transaction", "No CLIENT_ID or CLIENT_SECRET. Asking for user input", "warning", True) IO.put("You must configure your account for OAUTH 2.0") IO.put("Please visit {}".format( self.config["OAUTH_DASHBOARD"])) IO.put("& Create an OAUTH 2 API Application") Common.launch_browser(self.config['OAUTH_DASHBOARD']) client_id = IO.get("{}:".format( self.config["CLIENT_ID_ALIAS"])) client_secret = IO.get("{}:".format( self.config["CLIENT_SECRET_ALIAS"])) self.project.save("CLIENT_ID", client_id) self.project.save("CLIENT_SECRET", client_secret) self.project.log( "transaction", "Received {} and {} from user ({}) ({})".format( self.config['CLIENT_ID_ALIAS'], self.config['CLIENT_SECRET_ALIAS'], client_id, client_secret), "info", True) self.get_access_token(client_id, client_secret) else: self.get_access_token(self.project.config['CLIENT_ID'], self.project.config['CLIENT_SECRET']) else: self.refresh(self.project.config['CLIENT_ID'], self.project.config['CLIENT_SECRET']) self.project.save("OAUTH", self.oauth) self.project.log("transaction", "Authorization completed", "info", True)
def get_access_token(self, client_id, client_secret): response_type = 'code' query_string = {} if self.provider == "google": query_string = ({ 'redirect_uri': self.config['REDIRECT_URI'], 'response_type': response_type, 'client_id': client_id, 'scope': self.project.config['OAUTH_SCOPE'], 'approval_prompt': 'force', 'access_type': 'offline' }) elif self.provider == "dropbox": query_string = ({ 'response_type': response_type, 'client_id': client_id }) params = urllib.parse.urlencode(query_string) step1 = self.config['OAUTH_ENDPOINT'] + '?' + params Common.launch_browser(step1) code = IO.get("Authorization Code:") query_string = ({ 'code': code, 'grant_type': 'authorization_code', 'client_id': client_id, 'client_secret': client_secret }) if self.provider == "google": query_string['scope'] = '' query_string['redirect_uri'] = self.config['REDIRECT_URI'] params = urllib.parse.urlencode(query_string) response = Common.webrequest(self.config['TOKEN_ENDPOINT'], { 'content-type': 'application/x-www-form-urlencoded;charset=utf-8' }, self.http_intercept, params) json_response = json.loads(response) self.parse_token(json_response) self.project.save("OAUTH", self.oauth)
def authorize(self): self.project.log("transaction", "Initiating OAUTH2 Protocol with " + self.config['TOKEN_ENDPOINT'], "info", True) key_exists = self.oauth.get(self.key_to_the_kingdom) if not key_exists: self.project.log("transaction", "No valid {} found...".format(self.key_to_the_kingdom), "warning", True) c_id = self.project.config.get("CLIENT_ID") c_secret = self.project.config.get("CLIENT_SECRET") if not c_id or not c_secret: self.project.log("transaction", "No CLIENT_ID or CLIENT_SECRET. Asking for user input", "warning", True) IO.put("You must configure your account for OAUTH 2.0") IO.put("Please visit {}".format(self.config["OAUTH_DASHBOARD"])) IO.put("& Create an OAUTH 2 API Application") Common.launch_browser(self.config['OAUTH_DASHBOARD']) client_id = IO.get("{}:".format(self.config["CLIENT_ID_ALIAS"])) client_secret = IO.get("{}:".format(self.config["CLIENT_SECRET_ALIAS"])) self.project.save("CLIENT_ID", client_id) self.project.save("CLIENT_SECRET", client_secret) self.project.log("transaction", "Received {} and {} from user ({}) ({})".format(self.config['CLIENT_ID_ALIAS'], self.config['CLIENT_SECRET_ALIAS'], client_id, client_secret), "info", True) self.get_access_token(client_id, client_secret) else: self.get_access_token(self.project.config['CLIENT_ID'], self.project.config['CLIENT_SECRET']) else: self.refresh(self.project.config['CLIENT_ID'], self.project.config['CLIENT_SECRET']) self.project.save("OAUTH", self.oauth) self.project.log("transaction", "Authorization completed", "info", True)
def launch_browser(url): IO.put("Attempting to launch {} in a browser.".format(url)) try: webbrowser.open(url) except: IO.put("Please visit the following URL to continue: {}".format(url))
def sync(self): d1 = datetime.now() self.d = Downloader.Downloader self.content_downloader = Downloader.Downloader self.meta_downloader = Downloader.Downloader( self.project, self.oauth_provider.http_intercept, self._save_metadata, self.oauth_provider.get_auth_header, self.project.threads) if self.project.args.mode == "full": self.project.log("transaction", "Full acquisition initiated", "info", True) self.d = Downloader.Downloader(self.project, self.oauth_provider.http_intercept, self._redirect_messages_to_save, self.oauth_provider.get_auth_header, self.project.threads) self.content_downloader = Downloader.Downloader( self.project, self.oauth_provider.http_intercept, self._save_raw_mail, self.oauth_provider.get_auth_header, self.project.threads) else: self.project.log("transaction", "Metadata acquisition initiated", "info", True) self.initialize_items() cnt = len(self.threads) self.project.log( "transaction", "Total threads queued for acquisition: {}".format(cnt), "info", True) self.metadata() for thread in self.threads: self.project.log("transaction", 'Calculating "{}"'.format(thread['snippet']), "info", True) savepath = "" if self.project.args.mode == "full": download_uri = self.get_thread_uri(thread, "minimal") self.d.put( Downloader.DownloadSlip(download_uri, thread, savepath, 'id')) meta_uri = self.get_thread_uri(thread, "metadata") self.meta_downloader.put( Downloader.DownloadSlip(meta_uri, thread, savepath, 'id')) if self.project.args.mode == "full": self.d.start() self.d.wait_for_complete() self.project.log( "transaction", "Total size of mail to be acquired is {}".format( Common.sizeof_fmt(self.file_size_bytes, "B")), "highlight", True) self.mbox_dir = os.path.join(self.project.acquisition_dir, "mbox") os.makedirs(self.mbox_dir, exist_ok=True) if self.project.args.prompt: IO.get("Press ENTER to begin acquisition...") if self.project.args.mode == "full": self.content_downloader.start() self.content_downloader.wait_for_complete() self.meta_downloader.start() self.meta_downloader.wait_for_complete() d2 = datetime.now() delt = d2 - d1 self.project.log("transaction", "Acquisition completed in {}".format(str(delt)), "highlight", True)
def metadata(self): self.project.log("transaction", "Generating metadata CSV File...", "info", True) if not self.files: self.initialize_items() fname = Common.timely_filename("FileList", ".csv") metadata_file = os.path.join(self.project.working_dir, fname) IO.put("Writing CSV File '{}'".format(metadata_file)) f = open(metadata_file, "w") columns = ( "id,title,fileExtension,fileSize,createdDate,modifiedDate,modifiedByMeDate,md5Checksum," "kind,version,parents,restricted,hidden,trashed,starred,viewed,markedViewedByMeDate,lastViewedByMeDate," "lastModifyingUserName,writersCanShare,sharedWithMeDate,sharingUser,sharingUserEmail,ownerNames{}\n" ) f.write(columns) for i in self.files: row2 = [] # Data normalization row2.append('None' if 'id' not in i else repr(i['id'])) row2.append('None' if 'title' not in i else '"' + i['title'] + '"') row2.append('None' if 'fileExtension' not in i else repr(i['fileExtension'])) row2.append('None' if 'fileSize' not in i else i['fileSize']) row2.append('None' if 'createdDate' not in i else i['createdDate']) row2.append('None' if 'modifiedDate' not in i else i['modifiedDate']) row2.append('None' if 'modifiedByMeDate' not in i else i['modifiedByMeDate']) row2.append('None' if 'md5Checksum' not in i else '"' + i['md5Checksum'] + '"') row2.append('None' if 'kind' not in i else repr(i['kind'])) row2.append('None' if 'version' not in i else i['version']) if 'parents' not in i or len(i['parents']) == 0: row2.append('None') else: parStr = '"' for p in i['parents']: parStr = parStr + str(p['id']) + ',' parStr = parStr[:len(parStr) - 1] parStr = parStr + '"' row2.append(parStr) row2.append('None' if 'labels' not in i else repr(i['labels']['restricted'])) row2.append('None' if 'labels' not in i else repr(i['labels']['hidden'])) row2.append('None' if 'labels' not in i else repr(i['labels']['trashed'])) row2.append('None' if 'labels' not in i else repr(i['labels']['starred'])) row2.append('None' if 'labels' not in i else repr(i['labels']['viewed'])) row2.append('None' if 'markedViewedByMeDate' not in i else i['markedViewedByMeDate']) row2.append('None' if 'lastViewedByMeDate' not in i else i['lastViewedByMeDate']) row2.append('None' if 'lastModifyingUserName' not in i else '"' + i['lastModifyingUserName'] + '"') row2.append('None' if 'writersCanShare' not in i else i['writersCanShare']) row2.append('None' if 'sharedWithMeDate' not in i else i['sharedWithMeDate']) row2.append('None' if 'sharingUser' not in i else '"' + i['sharingUser']['displayName'] + '"') row2.append('None' if 'sharingUser' not in i else '"' + i['sharingUser']['emailAddress'] + '"') if 'ownerNames' not in i or len(i['ownerNames']) == 0: row2.append('None') else: ownStr = '"' for o in i['ownerNames']: ownStr = ownStr + str(o) + ',' ownStr = ownStr[:len(ownStr) - 1] ownStr = ownStr + '"' row2.append(ownStr) rowStr = "" for r in row2: rowStr = rowStr + str(r) + "," rowStr = rowStr[:len(rowStr) - 1] f.write(rowStr + '\n') f.close()
def sync(self): d1 = datetime.now() d = Downloader.Downloader if self.project.args.mode == "full": self.project.log("transaction", "Full acquisition initiated", "info", True) d = Downloader.Downloader(self.project, self.oauth_provider.http_intercept, self._save_file, self.oauth_provider.get_auth_header, self.project.threads) else: self.project.log("transaction", "Metadata acquisition initiated", "info", True) self.initialize_items() cnt = len(self.files) self.project.log("transaction", "Total items queued for acquisition: " + str(cnt), "info", True) self.metadata() trash_folder = os.path.join(self.project.acquisition_dir, "trash") trash_metadata_folder = os.path.join(self.project.acquisition_dir, "trash_metadata") for file in self.files: self.project.log("transaction", "Calculating " + file['title'], "info", True) download_uri = self._get_download_url(file) parentmap = self._get_parent_mapping(file, self.files) filetitle = self._get_file_name(file) if filetitle != file['title']: self.project.log( "exception", "Normalized '" + file['title'] + "' to '" + filetitle + "'", "warning", True) if file['labels']['trashed'] == True: save_download_path = os.path.join(trash_folder, parentmap) save_metadata_path = os.path.join(trash_metadata_folder, parentmap) save_download_path = os.path.normpath( os.path.join(save_download_path, filetitle)) save_metadata_path = os.path.normpath( os.path.join(save_metadata_path, filetitle + '.json')) else: save_download_path = os.path.normpath( os.path.join( os.path.join(self.project.project_folders["data"], parentmap), filetitle)) save_metadata_path = os.path.normpath( os.path.join( os.path.join(self.project.project_folders["metadata"], parentmap), filetitle + ".json")) save_download_path = Common.assert_path(save_download_path, self.project) save_metadata_path = Common.assert_path(save_metadata_path, self.project) if self.project.args.mode == "full": if save_download_path: v = { "remote_file": os.path.join(parentmap, file['title']), "local_file": save_download_path } download_file = True if 'md5Checksum' in file: v['remote_hash'] = file['md5Checksum'] if os.path.isfile(save_download_path): if 'md5Checksum' in file: file_hash = Common.hashfile( open(save_download_path, 'rb'), hashlib.md5()) if file_hash == file['md5Checksum']: download_file = False self.project.log( "exception", "Local and remote hash matches for " + file['title'] + " ... Skipping download", "warning", True) else: self.project.log( "exception", "Local and remote hash differs for " + file['title'] + " ... Queuing for download", "critical", True) else: self.project.log( "exception", "No hash information for file ' " + file['title'] + "'", "warning", True) if download_file and download_uri: self.project.log( "transaction", "Queueing " + file['title'] + " for download...", "info", True) d.put( Downloader.DownloadSlip(download_uri, file, save_download_path, 'title')) if 'fileSize' in file: self.file_size_bytes += int(file['fileSize']) # If it's a file we can add it to verification file if download_uri: self.verification.append(v) if save_metadata_path: self._save_file( json.dumps(file, sort_keys=True, indent=4), Downloader.DownloadSlip(download_uri, file, save_metadata_path, 'title'), False) self.project.log( "transaction", "Total size of files to be acquired is {}".format( Common.sizeof_fmt(self.file_size_bytes, "B")), "highlight", True) if self.project.args.prompt: IO.get("Press ENTER to begin acquisition...") d.start() d.wait_for_complete() d2 = datetime.now() delt = d2 - d1 self.verify() self.project.log("transaction", "Acquisition completed in {}".format(str(delt)), "highlight", True)
def metadata(self): self.project.log("transaction", "Generating metadata CSV File...", "info", True) if not self.files: self.initialize_items() fname = Common.timely_filename("FileList", ".csv") metadata_file = os.path.join(self.project.working_dir, fname) IO.put("Writing CSV File '{}'".format(metadata_file)) f = open(metadata_file, "w") columns = ("id,title,fileExtension,fileSize,createdDate,modifiedDate,modifiedByMeDate,md5Checksum," "kind,version,parents,restricted,hidden,trashed,starred,viewed,markedViewedByMeDate,lastViewedByMeDate," "lastModifyingUserName,writersCanShare,sharedWithMeDate,sharingUser,sharingUserEmail,ownerNames{}\n") f.write(columns) for i in self.files: row2 = [] # Data normalization row2.append('None' if 'id' not in i else repr(i['id'])) row2.append('None' if 'title' not in i else '"' + i['title'] + '"') row2.append('None' if 'fileExtension' not in i else repr(i['fileExtension'])) row2.append('None' if 'fileSize' not in i else i['fileSize']) row2.append('None' if 'createdDate' not in i else i['createdDate']) row2.append('None' if 'modifiedDate' not in i else i['modifiedDate']) row2.append('None' if 'modifiedByMeDate' not in i else i['modifiedByMeDate']) row2.append('None' if 'md5Checksum' not in i else '"' + i['md5Checksum'] + '"') row2.append('None' if 'kind' not in i else repr(i['kind'])) row2.append('None' if 'version' not in i else i['version']) if 'parents' not in i or len(i['parents']) == 0: row2.append('None') else: parStr = '"' for p in i['parents']: parStr = parStr + str(p['id']) + ',' parStr = parStr[:len(parStr) - 1] parStr = parStr + '"' row2.append(parStr) row2.append('None' if 'labels' not in i else repr(i['labels']['restricted'])) row2.append('None' if 'labels' not in i else repr(i['labels']['hidden'])) row2.append('None' if 'labels' not in i else repr(i['labels']['trashed'])) row2.append('None' if 'labels' not in i else repr(i['labels']['starred'])) row2.append('None' if 'labels' not in i else repr(i['labels']['viewed'])) row2.append('None' if 'markedViewedByMeDate' not in i else i['markedViewedByMeDate']) row2.append('None' if 'lastViewedByMeDate' not in i else i['lastViewedByMeDate']) row2.append('None' if 'lastModifyingUserName' not in i else '"' + i['lastModifyingUserName'] + '"') row2.append('None' if 'writersCanShare' not in i else i['writersCanShare']) row2.append('None' if 'sharedWithMeDate' not in i else i['sharedWithMeDate']) row2.append('None' if 'sharingUser' not in i else '"' + i['sharingUser']['displayName'] + '"') row2.append('None' if 'sharingUser' not in i else '"' + i['sharingUser']['emailAddress'] + '"') if 'ownerNames' not in i or len(i['ownerNames']) == 0: row2.append('None') else: ownStr = '"' for o in i['ownerNames']: ownStr = ownStr + str(o) + ',' ownStr = ownStr[:len(ownStr) - 1] ownStr = ownStr + '"' row2.append(ownStr) rowStr = "" for r in row2: rowStr = rowStr + str(r) + "," rowStr = rowStr[:len(rowStr) - 1] f.write(rowStr + '\n') f.close()
def sync(self): d1 = datetime.now() d = Downloader.Downloader if self.project.args.mode == "full": self.project.log("transaction", "Full acquisition initiated", "info", True) d = Downloader.Downloader(self.project, self.oauth_provider.http_intercept, self._save_file, self.oauth_provider.get_auth_header, self.project.threads) else: self.project.log("transaction", "Metadata acquisition initiated", "info", True) self.initialize_items() cnt = len(self.files) self.project.log("transaction", "Total items queued for acquisition: " + str(cnt), "info", True) self.metadata() trash_folder = os.path.join(self.project.acquisition_dir, "trash") trash_metadata_folder = os.path.join(self.project.acquisition_dir, "trash_metadata") for file in self.files: self.project.log("transaction", "Calculating " + file['title'], "info", True) download_uri = self._get_download_url(file) parentmap = self._get_parent_mapping(file, self.files) filetitle = self._get_file_name(file) if filetitle != file['title']: self.project.log("exception", "Normalized '" + file['title'] + "' to '" + filetitle + "'", "warning", True) if file['labels']['trashed'] == True: save_download_path = os.path.join(trash_folder, parentmap) save_metadata_path = os.path.join(trash_metadata_folder, parentmap) save_download_path = os.path.normpath(os.path.join(save_download_path, filetitle)) save_metadata_path = os.path.normpath(os.path.join(save_metadata_path, filetitle + '.json')) else: save_download_path = os.path.normpath(os.path.join(os.path.join(self.project.project_folders["data"], parentmap), filetitle)) save_metadata_path = os.path.normpath(os.path.join(os.path.join(self.project.project_folders["metadata"], parentmap), filetitle + ".json")) save_download_path = Common.assert_path(save_download_path, self.project) save_metadata_path = Common.assert_path(save_metadata_path, self.project) if self.project.args.mode == "full": if save_download_path: v = {"remote_file": os.path.join(parentmap, file['title']), "local_file": save_download_path} download_file = True if 'md5Checksum' in file: v['remote_hash'] = file['md5Checksum'] if os.path.isfile(save_download_path): if 'md5Checksum' in file: file_hash = Common.hashfile(open(save_download_path, 'rb'), hashlib.md5()) if file_hash == file['md5Checksum']: download_file = False self.project.log("exception", "Local and remote hash matches for " + file[ 'title'] + " ... Skipping download", "warning", True) else: self.project.log("exception", "Local and remote hash differs for " + file[ 'title'] + " ... Queuing for download", "critical", True) else: self.project.log("exception", "No hash information for file ' " + file['title'] + "'", "warning", True) if download_file and download_uri: self.project.log("transaction", "Queueing " + file['title'] + " for download...", "info", True) d.put(Downloader.DownloadSlip(download_uri, file, save_download_path, 'title')) if 'fileSize' in file: self.file_size_bytes += int(file['fileSize']) # If it's a file we can add it to verification file if download_uri: self.verification.append(v) if save_metadata_path: self._save_file(json.dumps(file, sort_keys=True, indent=4), Downloader.DownloadSlip(download_uri, file, save_metadata_path, 'title'), False) self.project.log("transaction", "Total size of files to be acquired is {}".format( Common.sizeof_fmt(self.file_size_bytes, "B")), "highlight", True) if self.project.args.prompt: IO.get("Press ENTER to begin acquisition...") d.start() d.wait_for_complete() d2 = datetime.now() delt = d2 - d1 self.verify() self.project.log("transaction", "Acquisition completed in {}".format(str(delt)), "highlight", True)
def __init__(self, args): # Meh... working_dir = args.project_dir project_name = args.service threads = args.threads # /Meh... self.args = args self.name = project_name self.threads = threads self.working_dir = os.path.join(working_dir, self.name) self.acquisition_dir = os.path.join(self.working_dir, "acquisition") if os.path.exists(self.working_dir): IO.put("Resuming project in " + self.working_dir, "highlight") else: os.makedirs(self.working_dir, exist_ok=True) IO.put("Initializing project in " + self.working_dir, "highlight") self.project_folders["data"] = os.path.join(self.acquisition_dir, "data") self.project_folders["logs"] = os.path.join(self.working_dir, "logs") self.project_folders["metadata"] = os.path.join(self.acquisition_dir, "metadata") #self.project_folders["trash"] = os.path.join(self.acquisition_dir, "trash") #self.project_folders["trash_metadata"] = os.path.join(self.acquisition_dir, "trash_metadata") self.config_file = os.path.join(self.working_dir, "config.cfg") for f in self.project_folders: IO.put("{} path is {}".format(f, self.project_folders[f])) if not os.path.exists(self.project_folders[f]): IO.put("{} directory not found, creating from scratch.", "warn") os.makedirs(self.project_folders[f], exist_ok=True) IO.put("Config file is " + self.config_file) if not os.path.isfile(self.config_file): IO.put("Config file not found, creating default config file", "warn") with open(self.config_file, 'w') as f: f.write(DefaultConfigs.defaults) self.config = ConfigLoader.ConfigLoader() self.config.from_file(self.config_file) self.transaction_log = os.path.join(self.project_folders["logs"], "transaction.log") self.exception_log = os.path.join(self.project_folders["logs"], "exception.log") self.transaction_logger = logging.getLogger(project_name + "_t") self.exception_logger = logging.getLogger(project_name + "_e") self.transaction_logger.setLevel(20) self.exception_logger.setLevel(20) tfh = FileHandler(self.transaction_log) efh = FileHandler(self.exception_log) fmt = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') fmt.converter = time.gmtime tfh.setFormatter(fmt) efh.setFormatter(fmt) self.transaction_logger.addHandler(tfh) self.exception_logger.addHandler(efh)
def __init__(self, args): # Meh... working_dir = args.project_dir project_name = args.service threads = args.threads # /Meh... self.args = args self.name = project_name self.threads = threads self.working_dir = os.path.join(working_dir, self.name) self.acquisition_dir = os.path.join(self.working_dir, "acquisition") if os.path.exists(self.working_dir): IO.put("Resuming project in " + self.working_dir, "highlight") else: os.makedirs(self.working_dir, exist_ok=True) IO.put("Initializing project in " + self.working_dir, "highlight") self.project_folders["data"] = os.path.join(self.acquisition_dir, "data") self.project_folders["logs"] = os.path.join(self.working_dir, "logs") self.project_folders["metadata"] = os.path.join( self.acquisition_dir, "metadata") #self.project_folders["trash"] = os.path.join(self.acquisition_dir, "trash") #self.project_folders["trash_metadata"] = os.path.join(self.acquisition_dir, "trash_metadata") self.config_file = os.path.join(self.working_dir, "config.cfg") for f in self.project_folders: IO.put("{} path is {}".format(f, self.project_folders[f])) if not os.path.exists(self.project_folders[f]): IO.put("{} directory not found, creating from scratch.", "warn") os.makedirs(self.project_folders[f], exist_ok=True) IO.put("Config file is " + self.config_file) if not os.path.isfile(self.config_file): IO.put("Config file not found, creating default config file", "warn") with open(self.config_file, 'w') as f: f.write(DefaultConfigs.defaults) self.config = ConfigLoader.ConfigLoader() self.config.from_file(self.config_file) self.transaction_log = os.path.join(self.project_folders["logs"], "transaction.log") self.exception_log = os.path.join(self.project_folders["logs"], "exception.log") self.transaction_logger = logging.getLogger(project_name + "_t") self.exception_logger = logging.getLogger(project_name + "_e") self.transaction_logger.setLevel(20) self.exception_logger.setLevel(20) tfh = FileHandler(self.transaction_log) efh = FileHandler(self.exception_log) fmt = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fmt.converter = time.gmtime tfh.setFormatter(fmt) efh.setFormatter(fmt) self.transaction_logger.addHandler(tfh) self.exception_logger.addHandler(efh)
def sync(self): d1 = datetime.now() d = Downloader.Downloader(self.project, self.oauth_provider.http_intercept, self._save_file, self.oauth_provider.get_auth_header, self.project.threads) if self.project.args.mode == "full": self.project.log("transaction", "Full acquisition initiated", "info", True) else: self.project.log("transaction", "Metadata acquisition initiated", "info", True) self.initialize_items() cnt = len(self.files) self.project.log("transaction", "Total items queued for acquisition: " + str(cnt), "info", True) self.metadata() for file in self.files: self.project.log("transaction", "Calculating " + file['path'], "info", True) if file['is_dir'] == False: download_uri = lambda f=file: self._get_download_uri(f) metadata_download_uri = self.oauth_provider.config[ 'API_ENDPOINT'] + '/metadata/auto' + file['path'] parentmap = self._get_parent_mapping(file) filetitle = self._get_file_name(file) orig = os.path.basename(file['path']) if filetitle != orig: self.project.log( "exception", "Normalized '{}' to '{}'".format(orig, filetitle), "warning", True) if 'bytes' in file: self.file_size_bytes += int(file['bytes']) save_metadata_path = Common.assert_path( os.path.normpath( os.path.join( os.path.join( self.project.project_folders['metadata'], parentmap), filetitle + ".json")), self.project) if save_metadata_path: self.project.log( "transaction", "Queueing {} for download...".format(orig), "info", True) d.put( Downloader.DownloadSlip(metadata_download_uri, file, save_metadata_path, 'path')) if self.project.args.mode == "full": save_download_path = Common.assert_path( os.path.normpath( os.path.join( os.path.join( self.project.project_folders['data'], parentmap), filetitle)), self.project) if save_download_path: self.project.log( "transaction", "Queueing {} for download...".format(orig), "info", True) d.put( Downloader.DownloadSlip(download_uri, file, save_download_path, 'path')) self.project.log( "transaction", "Total size of files to be acquired is {}".format( Common.sizeof_fmt(self.file_size_bytes, "B")), "highlight", True) if self.project.args.prompt: IO.get("Press ENTER to begin acquisition...") d.start() d.wait_for_complete() d2 = datetime.now() delt = d2 - d1 self.project.log("transaction", "Acquisition completed in {}".format(str(delt)), "highlight", True)