def upload_data_to_kaggle(self): files = [] for output_folder in self.config.output_folder: t.log_message('Uploading an output folder to the Kaggle: ' + output_folder) for item in os.listdir(output_folder): path = os.path.join(output_folder, item) if os.path.isfile(path): token = self.upload_file_to_kaggle(path) files.append({'token': token}) elif os.path.isdir(path) and self.kaggle_dirmode in [ 'zip', 'tar' ]: temp_dir = tempfile.mkdtemp() try: _, dir_name = os.path.split(path) archive_path = shutil.make_archive( os.path.join(temp_dir, dir_name), self.kaggle_dirmode, path) token = self.upload_file_to_kaggle(archive_path) files.append({'token': token}) finally: shutil.rmtree(temp_dir) t.log_message(output_folder + ' - uploaded.') dataset = self.prepare_dataset(files) self.kaggle_api_call(resource='/datasets/create/new', method='POST', body=dataset)
def upload_file_to_kaggle(self, file_path: str): file_token = None try: file_name = os.path.basename(file_path) content_length = os.path.getsize(file_path) last_modified_date_utc = int(os.path.getmtime(file_path)) post_params = [('fileName', file_name)] kaggle_response = self.kaggle_api_call( resource='/datasets/upload/file/' + str(content_length) + '/' + str(last_modified_date_utc), method='POST', post_params=post_params) kaggle_data = json.loads(kaggle_response.data.decode('utf8')) create_url = kaggle_data['createUrl'] with io.open(file_path, 'rb', buffering=0) as fp: reader = io.BufferedReader(fp) session = requests.Session() retries = Retry(total=10, backoff_factor=0.5) adapter = HTTPAdapter(max_retries=retries) session.mount('http://', adapter) session.mount('https://', adapter) response = session.put(create_url, data=reader) if response.status_code == 200 or response.status_code == 201: file_token = kaggle_data['token'] if file_token is None: t.log_message('Upload unsuccessful: ' + file_path) except Exception as error: t.log_message('Upload filed: ' + file_path + '\n' + str(error)) return file_token
def start_auth_telegram(self, client_config): if self.telegram_bot is None: t.log_message('telegram bot is None. Telegram auth canceled.') return auth = GoogleAuth() auth.LoadClientConfigFile(client_config_file=client_config) if auth.flow is None: auth.GetFlow() auth.flow.redirect_uri = OOB_CALLBACK_URN self.telegram_bot.send_message( 'Please go to the following link in your browser and send me a Google verification code. \nAuth url: ' + auth.GetAuthUrl()) dirty = False code = None save_credentials = auth.settings.get('save_credentials') if auth.credentials is None and save_credentials: auth.LoadCredentials() if auth.credentials is None: code = self.telegram_bot.get_code() dirty = True else: if auth.access_token_expired: if auth.credentials.refresh_token is not None: auth.Refresh() else: code = self.telegram_bot.get_code() dirty = True if code is not None: auth.Auth(code) if dirty and save_credentials: auth.SaveCredentials() return auth
def run(self, run_file_list: list): for item in run_file_list: if not os.path.exists(item): t.log_message('ERROR. File not found: ' + item) continue t.log_message("Executing file: " + item) os.system('python ' + item)
def send_output_to_gdrive(self, output_folders: list, drive_folders: list): if self.gauth is None and self.config.gdrive_folders is not None: t.log_message('GoogleDrive is unauthorised. Upload canceled.') return drive = GoogleDrive(self.gauth) t.log_message('Uploading an output folders to the Google Drive') for drive_folder in drive_folders: for folder in output_folders: self.upload_to_drive(folder, drive_folder, drive)
def download_list(self, url_list: list): for item in url_list: t.log_message('Downloading: ' + item) try: download = Download(item, retries=5) download.download() path = os.path.abspath(download.download_path) _, extension = os.path.splitext(path) if extension[1:] in dict(shutil.get_archive_formats()).keys( ) and self.config.extract_archives: shutil.unpack_archive(path) except Exception as e: t.log_message("ERROR. Download: " + item + ' FAILED.\n' + str(e))
def build_workspace(self): if self.config is None: return self.gauth = self.get_gauth() if self.config.dataset_list is not None: self.download_list(self.config.dataset_list) if self.config.repos is not None: self.clone_repos(self.config.repos) if self.config.script_files is not None: self.run(self.config.script_files) if self.config.gdrive_folders is not None: self.send_output_to_gdrive(self.config.output_folder, self.config.gdrive_folders) if self.config.kaggle is not None: self.upload_data_to_kaggle() if 'dirmode' in self.config.kaggle: self.kaggle_dirmode = self.config.kaggle['dirmode'] t.log_message('Done.') if self.telegram_bot is not None: self.telegram_bot.send_message('Workspace build done.')
def get_gauth(self): gauth = None packge_path, _ = os.path.split(__file__) client_config = os.path.join(packge_path, 'client_secrets.json') credentials_file = os.path.join(packge_path, 'drive_credentials') if os.path.exists(credentials_file): try: gauth = GoogleAuth() gauth.LoadClientConfigFile(client_config_file=client_config) gauth.LoadCredentialsFile(credentials_file=credentials_file) return gauth except Exception as e: t.log_message(str(e)) gauth = None if self.config.gdrive_folders is not None and self.config.telegram_channels is not None and self.telegram_bot is not None: try: gauth = self.start_auth_telegram(client_config=client_config) gauth.SaveCredentialsFile(credentials_file=credentials_file) except Exception as e: t.log_message(str(e)) gauth = None elif self.config.gdrive_folders is not None and self.telegram_bot is None or self.config.telegram_channels is None: try: gauth = GoogleAuth() gauth.LoadClientConfigFile(client_config_file=client_config) gauth.CommandLineAuth() gauth.SaveCredentialsFile(credentials_file=credentials_file) except Exception as e: t.log_message(str(e)) gauth = None return gauth
def clone_repos(self, repos: dict): for repo_name, repo_data in repos.items(): branch: str = None if 'branch' in repo_data: branch = repo_data['branch'] if 'url' in repo_data: url: str = repo_data['url'] if os.path.exists(repo_name): shutil.rmtree(repo_name) if branch is not None: t.log_message('Cloning repo: ' + url + ', branch: ' + branch + ', to the folder: ' + repo_name) Repo.clone_from(url=url, to_path=repo_name, branch=branch) else: t.log_message('Cloning repo: ' + url + ', to the folder: ' + repo_name) Repo.clone_from(url=url, to_path=repo_name) else: t.log_message('ERROR. URL not found for a repo: ' + repo_name)