def Download_the_file(Id: str, Mod_time: str, name: str, mimetype: str): # Here we change names and such, so that they are of correct format for file name. slugify is amazing at this name_no_spaces = slugify(name) Mod_time_no_wierd_symbols = sub(':|\.', '_', Mod_time) # Here we establish which files should be saved on disc using which format, based on their mimetype mimetype_to_export_file_as = None if mimetype == 'application/vnd.google-apps.spreadsheet': mimetype_to_export_file_as = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' elif mimetype == 'application/vnd.google-apps.document': mimetype_to_export_file_as = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' elif mimetype == 'application/vnd.google-apps.presentation': mimetype_to_export_file_as = 'application/vnd.openxmlformats-officedocument.presentationml.presentation' else: mimetype_to_export_file_as = 'text/plain' # I am not sure why this workflow looks the way it looks. Future me - your on your own and you know as much as I do request = DRIVE.files().export(fileId = Id, mimeType = mimetype_to_export_file_as) print('Tring to download: ', f"Downloading: f'{name_no_spaces}_{Mod_time_no_wierd_symbols}.goo'") fh = FileIO(f'{name_no_spaces}_{Mod_time_no_wierd_symbols}.goo', 'wb') downloader = http.MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print(f"Downloading: f'{name_no_spaces}_{Mod_time_no_wierd_symbols}.goo'")
def download_file_from_cloud(self, file_id, path): """Download a Drive file's content to the local filesystem. :param file_id: ID of the Drive file that will downloaded. :type file_id: str :param path: where the file is written :type path: str :return if the download succeeded """ self.connect() if self.internet_on(): local_fd = open(path + "commands.csv", "wb") request = self.drive.auth.service.files().get_media(fileId=file_id) media_request = http.MediaIoBaseDownload(local_fd, request) while True: try: download_progress, done = media_request.next_chunk() except errors.HttpError as error: print('An error occurred: %s' % error) return False if download_progress: print('Download Progress: %d%%' % int(download_progress.progress() * 100)) if done: print('Download Complete') return True else: return False
def __getreportdataraw(self, report_id): request = self.__api.reports().get(reportId=report_id) result = APIRequest(request).execute() et = 0 retry_attempts = 0 max_wait_time = 500 while True: if result['isReportReady']: request = self.__api.reports().getFile(reportId=report_id, reportFragment=0) data = StringIO() downloader = httpMediaHandler.MediaIoBaseDownload( data, request, chunksize=2**20 * 20) # 20Mb chunks done = False while done is False: unused_status, done = downloader.next_chunk() data.seek(0) return data wait_time = min(max_wait_time, 2**retry_attempts) retry_attempts += 1 time.sleep(wait_time) et += wait_time if et >= DSConnector._DS_TIMEOUT: raise DSAPITimeOut('DS API Request Timeout (files.get())') request = self.__api.reports().get(reportId=report_id) result = APIRequest(request).execute()
def direct_download_file(service, report_id, file_id): """Downloads a report file to disk.""" # Retrieve the file metadata. report_file = service.files().get( reportId=report_id, fileId=file_id).execute() if report_file['status'] == 'REPORT_AVAILABLE': # Prepare a local file to download the report contents to. out_file = io.FileIO(generate_file_name(report_file), mode='wb') # Create a get request. request = service.files().get_media(reportId=report_id, fileId=file_id) # Create a media downloader instance. # Optional: adjust the chunk size used when downloading the file. downloader = http.MediaIoBaseDownload( out_file, request, chunksize=CHUNK_SIZE) # Execute the get request and download the file. download_finished = False while download_finished is False: _, download_finished = downloader.next_chunk() print('File %s downloaded to %s' % (report_file['id'], os.path.realpath(out_file.name)))
def download_file(self): CHUNK_SIZE = 32 * 1024 * 1024 request = self.service.files().get(reportId=self.report_id, fileId=self.file['id']) report_file = request.execute() file_name = report_file['fileName'] or report_file['id'] if report_file['format'] == 'CSV': extension = '.csv' else: extension = '.xml' file_name = file_name + extension if report_file['status'] == 'REPORT_AVAILABLE': out_file = FileIO(file_name, mode='wb') request = self.service.files().get_media(reportId=self.report_id, fileId=self.file['id']) downloader = http.MediaIoBaseDownload(out_file, request, chunksize=CHUNK_SIZE) download_finished = False while download_finished is False: _, download_finished = downloader.next_chunk() self.file_name = file_name return file_name
def __getreportdataraw(self, report_id, file_id, chunk_size): request = self.__api.files().get(reportId=report_id, fileId=file_id) result = APIRequest(request).execute() et = 0 retry_attempts = 0 max_wait_time = 500 while True: if result['status'] == 'REPORT_AVAILABLE': request = self.__api.files().get_media(reportId=report_id, fileId=file_id) data = StringIO() downloader = httpMediaHandler.MediaIoBaseDownload( data, request, chunksize=chunk_size) done = False while done is False: unused_status, done = downloader.next_chunk(num_retries=4) data.seek(0) return data wait_time = min(max_wait_time, 2**retry_attempts) retry_attempts += 1 time.sleep(wait_time) et += wait_time if et >= DCMConnector._DCM_TIMEOUT: raise DCMAPITimeOut('DCM API Request Timeout (files.get())') request = self.__api.files().get(reportId=report_id, fileId=file_id) result = APIRequest(request).execute()
def format_doc(file_id): creds = None # The file token.pickle stores the user's access and refresh tokens, and is # created automatically when the authorization flow completes for the first # time. if os.path.exists('token.pickle'): with open('token.pickle', 'rb') as token: creds = pickle.load(token) # If there are no (valid) credentials available, let the user log in. if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) else: flow = InstalledAppFlow.from_client_secrets_file( 'credentials.json', SCOPES) creds = flow.run_local_server(port=0) # Save the credentials for the next run with open('token.pickle', 'wb') as token: pickle.dump(creds, token) # open services for API's drive_service = build('drive', 'v3', credentials=creds) docs_service = build('docs', 'v1', credentials=creds) request = drive_service.files().export_media(fileId=file_id, mimeType='text/plain') fh = io.BytesIO() downloader = http.MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) #changes the byte class type to a string return fh.getvalue().decode("UTF-8")
def main(argv): if len(argv) > 1: raise app.UsageErrors(f'Unused command line arguments: {argv[1:]}') if FLAGS.file_id is None: raise app.UsageError('Please specify --file_id.') if FLAGS.output_file is None: raise app.UsageError('Please specify --output_file.') SCOPES = 'https://www.googleapis.com/auth/drive.readonly' store = file.Storage(FLAGS.auth_cache_json_file) creds = store.get() if not creds or creds.invalid: if not FLAGS.client_id_json_file: raise app.UsageError('Please set --client_id_json_file.') flow = client.flow_from_clientsecrets(FLAGS.client_id_json_file, SCOPES, cache=None) creds = tools.run_flow(flow, store, flags=tools.argparser.parse_args(args=[])) DRIVE = discovery.build('drive', 'v3', http=creds.authorize(Http())) request = DRIVE.files().get_media(fileId=FLAGS.file_id) fh = io.BytesIO() downloader = http.MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() logging.info("Downloaded %d%%." % int(status.progress() * 100)) with open(FLAGS.output_file, 'wb') as f: f.write(fh.getvalue())
def _download_io(self, src_uri, io_obj): bucket_name, object_name = parse_gcs_uri(src_uri) # Chunked file download req = self.api_client.objects().get_media(bucket=bucket_name, object=object_name) downloader = google_http.MediaIoBaseDownload(io_obj, req) done = False while not done: try: status, done = downloader.next_chunk() except google_errors.HttpError as e: # Error code 416 (request range not satisfiable) # implies we're trying to download a file of size 0 if e.resp.status == 416: break raise if status: log.debug("Download %d%%." % int(status.progress() * 100)) log.debug("Download Complete for %s", src_uri) return io_obj
def download(self, bucket, object_name): """Download an object from a bucket. Args: bucket (str): The name of the bucket to read from. object_name (str): The name of the object to read. Returns: str: The contents of the object. """ verb_arguments = { 'bucket': bucket, 'object': object_name} media_request = self._build_request('get_media', verb_arguments) media_request.http = self.http file_content = '' out_stream = StringIO.StringIO() try: downloader = http.MediaIoBaseDownload(out_stream, media_request) done = False while not done: _, done = downloader.next_chunk(num_retries=self._num_retries) file_content = out_stream.getvalue() finally: out_stream.close() return file_content
def download_file(self, file_id): """Download a Drive file's content to the local filesystem. Args: service: Drive API Service instance. file_id: ID of the Drive file that will downloaded. local_fd: io.Base or file object, the stream that the Drive file's contents will be written to. """ file = self.service.files().get(fileId=file_id).execute() file_name = file['name'] print('Name:', file_name) # print ('MIME type:', file['mimeType']) local_fd = open(file_name, "wb") request = self.service.files().get_media(fileId=file_id) media_request = http.MediaIoBaseDownload(local_fd, request) while True: try: download_progress, done = media_request.next_chunk() except errors.HttpError as error: print('An error occurred:', error) return if download_progress: print('Download Progress:', int(download_progress.progress() * 100)) if done: print('Download Complete') local_fd.close() return
def get_text_file(self, full_bucket_path): """Gets a text file object as a string. Args: full_bucket_path (str): The full path of the bucket object. Returns: str: The object's content as a string. Raises: HttpError: HttpError is raised if the call to the GCP storage API fails """ file_content = '' storage_service = self.service bucket, object_path = get_bucket_and_path_from(full_bucket_path) media_request = (storage_service.objects().get_media( bucket=bucket, object=object_path)) out_stream = StringIO.StringIO() try: downloader = http.MediaIoBaseDownload(out_stream, media_request) done = False while not done: _, done = downloader.next_chunk() file_content = out_stream.getvalue() out_stream.close() except errors.HttpError as http_error: LOGGER.error('Unable to download file: %s', http_error) raise http_error return file_content
def export(self, spreadsheet_id, fformat): request = self.driveService.files().export(fileId=spreadsheet_id, mimeType=fformat.value.split(':')[0]) import io fh = io.FileIO(spreadsheet_id+fformat.value.split(':')[1], 'wb') downloader = ghttp.MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100))
def _open_read( self, remote_path, local_path=None, delete_file_on_close=True, chunksize=None, chunk_callback=lambda _: False, ): """Downloads the object contents to local file system. Optionally stops after the first chunk for which chunk_callback returns True. """ chunksize = chunksize or self.chunksize bucket, obj = self._path_to_bucket_and_key(remote_path) tmp_file_path = local_path or get_local_tempfile(os.path.basename(remote_path)) with open(tmp_file_path, "wb") as fp: # We can't return the tempfile reference because of a bug in python: http://bugs.python.org/issue18879 if delete_file_on_close: return_fp = _DeleteOnCloseFile(tmp_file_path, "r") else: return_fp = fp # Special case empty files because chunk-based downloading doesn't work. result = self.client.objects().get(bucket=bucket, object=obj).execute() if int(result["size"]) == 0: return return_fp request = self.client.objects().get_media(bucket=bucket, object=obj) downloader = http.MediaIoBaseDownload(fp, request, chunksize=chunksize) attempts = 0 done = False while not done: error = None try: _, done = downloader.next_chunk() if chunk_callback(fp): done = True except errors.HttpError as err: error = err if err.resp.status < 500: raise logger.warning("Error downloading file, retrying", exc_info=True) except RETRYABLE_ERRORS as err: logger.warning("Error downloading file, retrying", exc_info=True) error = err if error: attempts += 1 if attempts >= NUM_RETRIES: raise error else: attempts = 0 return return_fp
async def download(self, buf): util = getUtility(IGCloudBlobStore) if not hasattr(self, '_uri'): url = self._upload_file_id else: url = self._uri req = util._service.objects().get_media( bucket=util.bucket, object=url) downloader = http.MediaIoBaseDownload(buf, req, chunksize=CHUNK_SIZE) return downloader
def _do_download(self, file_id, file_obj): request = self._service.files().get_media(fileId=file_id, supportsTeamDrives=True) downloader = gah.MediaIoBaseDownload(file_obj, request, chunksize=self.chunk_size) done = False while not done: status, done = downloader.next_chunk() logger.info("Progress = %d%%" % int(status.progress() * 100))
def get_object(self, bucket, filename, out_file): from googleapiclient import http service = self.create_service() req = service.objects().get_media(bucket=bucket, object=filename) downloader = http.MediaIoBaseDownload(out_file, req) done = False while done is False: status, done = downloader.next_chunk() logging.info("file {} Download {}%.".format( filename, int(status.progress() * 100))) return out_file
def downloadById(fileID, outputName): chunkable_bytes_python_object = io.BytesIO() #handles all chunking and downloads unexecuted_request = DRIVE.files().get_media(fileId=fileID) downloader = http.MediaIoBaseDownload(chunkable_bytes_python_object, unexecuted_request) print("Downloading: "+outputName) while True: state, completion_flag = downloader.next_chunk() print(f"{outputName}:{state.progress() * 100}% Completed") if completion_flag: break with open(outputName, "wb") as f:f.write(chunkable_bytes_python_object.getbuffer())
def download_file(file_id: str, destination: Path, service): # From the google drives docs, lightly edited. # Only for images--docs and sheets require a different treatment. print(f"Downloading {file_id} to {destination}") destination.parent.mkdir(exist_ok=True, parents=True) request = service.files().get_media(fileId=file_id) fh = destination.open(mode="wb") downloader = http.MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk()
def get_object(self, filename, out_file): service = self.__create_service() # Use get_media instead of get to get the actual contents of the object. # http://g.co/dev/resources/api-libraries/documentation/storage/v1/python/latest/storage_v1.objects.html#get_media req = service.objects().get_media(bucket=self.BUCKET_NAME, object=filename) downloader = http.MediaIoBaseDownload(out_file, req) done = False while done is False: status, done = downloader.next_chunk() print("Download {}%.".format(int(status.progress() * 100))) return out_file
def export(self, spreadsheet_id, fformat, filename=None): fformat = getattr(fformat, 'value', fformat) request = self.driveService.files().export(fileId=spreadsheet_id, mimeType=fformat.split(':')[0]) import io ifilename = spreadsheet_id+fformat.split(':')[1] if filename is None else filename fh = io.FileIO(ifilename, 'wb') downloader = ghttp.MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100))
def download_large_object(bucket, gckey, localfile): service = create_service() req = service.objects().get_media(bucket=bucket, object=gckey) with open(localfile, 'wb') as f: downloader = http.MediaIoBaseDownload(f, request, chunksize=1024 * 1024) done = False while done is False: status, done = downloader.next_chunk() if status: print "Download %d%%." % int(status.progress() * 100) print "Download Complete!"
def _download_file(service: object, file_id: str, file_name: str, download_path: str): """Download files to the specified path.""" request = service.files().get_media(fileId=file_id) fh = io.BytesIO() downloader = http.MediaIoBaseDownload(fh, request) done = False while not done: status, done = downloader.next_chunk() with open(f'{download_path}/{file_name}', 'wb') as f: f.write(fh.getvalue())
def open(self, path, delay=False): request = self.service.files().get_media( fileId=self.items[path[1:]][0]) fh = io.BytesIO() try: downloader = http.MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() except Exception: pass return fh
def process_file(service, fieldmap, report_config, file_id, report_time): report_id = report_config['report_id'] stream_name = report_config['stream_name'] stream_alias = report_config['stream_alias'] request = service.files().get_media(reportId=report_id, fileId=file_id) line_state = { 'headers_line': False, 'past_headers': False, 'count': 0 } report_id_int = int(report_id) def line_transform(line): if not line_state['past_headers'] and not line_state['headers_line'] and line == 'Report Fields': line_state['headers_line'] = True return if line_state['headers_line']: line_state['headers_line'] = False line_state['past_headers'] = True return if line_state['past_headers']: row = parse_line(line) # skip report grant total line if row[0] == 'Grand Total:': return obj = {} for i in range(len(fieldmap)): field = fieldmap[i] obj[field['name']] = transform_field(field['type'], row[i]) obj[SINGER_REPORT_FIELD] = report_time obj[REPORT_ID_FIELD] = report_id_int singer.write_record(stream_name, obj, stream_alias=stream_alias) line_state['count'] += 1 stream = StreamFunc(line_transform) downloader = http.MediaIoBaseDownload(stream, request, chunksize=CHUNK_SIZE) download_finished = False while download_finished is False: _, download_finished = downloader.next_chunk() with singer.metrics.record_counter(stream_name) as counter: counter.increment(line_state['count'])
def download(self, path, chunksize=None, chunk_callback=lambda _: False): """Downloads the object contents to local file system. Optionally stops after the first chunk for which chunk_callback returns True. """ chunksize = chunksize or self.chunksize bucket, obj = self._path_to_bucket_and_key(path) with tempfile.NamedTemporaryFile(delete=False) as fp: # We can't return the tempfile reference because of a bug in python: http://bugs.python.org/issue18879 return_fp = _DeleteOnCloseFile(fp.name, 'r') # Special case empty files because chunk-based downloading doesn't work. result = self.client.objects().get(bucket=bucket, object=obj).execute() if int(result['size']) == 0: return return_fp request = self.client.objects().get_media(bucket=bucket, object=obj) downloader = http.MediaIoBaseDownload(fp, request, chunksize=chunksize) attempts = 0 done = False while not done: error = None try: _, done = downloader.next_chunk() if chunk_callback(fp): done = True except errors.HttpError as err: error = err if err.resp.status < 500: raise logger.warning('Error downloading file, retrying', exc_info=True) except RETRYABLE_ERRORS as err: logger.warning('Error downloading file, retrying', exc_info=True) error = err if error: attempts += 1 if attempts >= NUM_RETRIES: raise error else: attempts = 0 return return_fp
def get_object(self): print "FUNCTION: get_object" Logger.log_writer("FUNCTION: get_object") req = self.gcs_client.objects().get_media(bucket=self.bucket, object=self.filename) out_file = e.BytesIO() downloader = e.MediaIoBaseDownload(out_file, req) done = False while done is False: status, done = downloader.next_chunk() print("Download {}%.".format(int(status.progress() * 100))) coordinates = out_file.getvalue().split(",")[:2] out_file.close() return coordinates
def read_data_chunk(self, report_data: dict, chunk: int = 16384) -> bytes: report_id = report_data['id'] file_id = report_data['report_file']['id'] request = self.service().files().get_media(reportId=report_id, fileId=file_id) # Create a media downloader instance. out_file = io.BytesIO() downloader = http.MediaIoBaseDownload(out_file, request, chunksize=chunk) downloader.next_chunk() return out_file.getvalue()
def load_all(self): latest_file = self._get_latest_file() service = self.get_service() req = service.objects().get_media( bucket=SupportedExchange.GS_DBM_BUCKET, object=latest_file['name']) out_file = StringIO() downloader = http.MediaIoBaseDownload(out_file, req) done = False while done is False: status, done = downloader.next_chunk() return json.loads(out_file.getvalue())
def get_object(service, bucket, filename, out_file): try: req = service.objects().get_media(bucket=bucket, object=filename) downloader = http.MediaIoBaseDownload(out_file, req) done = False while done is False: status, done = downloader.next_chunk() print "Status: " + str(status) + ", Download {}%.".format( int(status.progress() * 100)) return out_file except Exception as exc: print str(exc)