def test_media_io_base_download(self): self.request.http = HttpMockSequence([ ({'status': '200', 'content-range': '0-2/5'}, '123'), ({'status': '200', 'content-range': '3-4/5'}, '45'), ]) self.assertEqual(True, self.request.http.follow_redirects) download = MediaIoBaseDownload( fd=self.fd, request=self.request, chunksize=3) self.assertEqual(self.fd, download._fd) self.assertEqual(3, download._chunksize) self.assertEqual(0, download._progress) self.assertEqual(None, download._total_size) self.assertEqual(False, download._done) self.assertEqual(self.request.uri, download._uri) status, done = download.next_chunk() self.assertEqual(self.fd.getvalue(), '123') self.assertEqual(False, done) self.assertEqual(3, download._progress) self.assertEqual(5, download._total_size) self.assertEqual(3, status.resumable_progress) status, done = download.next_chunk() self.assertEqual(self.fd.getvalue(), '12345') self.assertEqual(True, done) self.assertEqual(5, download._progress) self.assertEqual(5, download._total_size)
def test_media_io_base_download_retries_5xx(self): self.request.http = HttpMockSequence([ ({'status': '500'}, ''), ({'status': '500'}, ''), ({'status': '500'}, ''), ({'status': '200', 'content-range': '0-2/5'}, '123'), ({'status': '503'}, ''), ({'status': '503'}, ''), ({'status': '503'}, ''), ({'status': '200', 'content-range': '3-4/5'}, '45'), ]) download = MediaIoBaseDownload( fd=self.fd, request=self.request, chunksize=3) self.assertEqual(self.fd, download._fd) self.assertEqual(3, download._chunksize) self.assertEqual(0, download._progress) self.assertEqual(None, download._total_size) self.assertEqual(False, download._done) self.assertEqual(self.request.uri, download._uri) # Set time.sleep and random.random stubs. sleeptimes = [] download._sleep = lambda x: sleeptimes.append(x) download._rand = lambda: 10 status, done = download.next_chunk(num_retries=3) # Check for exponential backoff using the rand function above. self.assertEqual([20, 40, 80], sleeptimes) self.assertEqual(self.fd.getvalue(), '123') self.assertEqual(False, done) self.assertEqual(3, download._progress) self.assertEqual(5, download._total_size) self.assertEqual(3, status.resumable_progress) # Reset time.sleep stub. del sleeptimes[0:len(sleeptimes)] status, done = download.next_chunk(num_retries=3) # Check for exponential backoff using the rand function above. self.assertEqual([20, 40, 80], sleeptimes) self.assertEqual(self.fd.getvalue(), '12345') self.assertEqual(True, done) self.assertEqual(5, download._progress) self.assertEqual(5, download._total_size)
def pull(bucket, local_file, metadata_only=False): objname = os.path.basename(local_file) k = None try: k = JBoxGS.connect().objects().get(bucket=bucket, object=objname).execute() except HttpError as err: if err._get_reason() != 'Not Found': raise(err) else: return None if not metadata_only: req = JBoxGS.connect().objects().get_media(bucket=bucket, object=objname) fh = open(local_file, "wb") downloader = MediaIoBaseDownload(fh, req, chunksize=JBoxGS.CHUNK_SIZE*1024*1024) done = False num_retries = 0 while not done: try: _, done = downloader.next_chunk() except HttpError, err: num_retries += 1 if num_retries > JBoxGS.MAX_RETRIES: fh.close() os.remove(local_file) raise if err.resp.status in JBoxGS.RETRYABLE_ERRORS: backoff = min(JBoxGS.BACKOFF_FACTOR ** (num_retries - 1), JBoxGS.MAX_BACKOFF) sleep(backoff + random()) else: sleep(JBoxGS.SLEEP_TIME) except:
def download_file(service, item, download_folder='./data/', overwrite=False): if not is_downloadable(item): return False local_path = download_folder + item['path'] if os.path.isfile(local_path) and not overwrite: if file_md5(local_path) == item['md5Checksum']: return False else: print("Corrupt file '%s'" % local_path) mkdir_p(os.path.dirname(local_path)) with open(local_path, "wb") as destination: request = service.files().get_media(fileId=item['id']) downloader = MediaIoBaseDownload(destination, request) done = False while done is False: _, done = downloader.next_chunk() if file_md5(local_path) != item['md5Checksum']: raise Exception("Download for '%s' failed, wrong checksum" % local_path) return True
def get(service): # User can be prompted to input file name(using raw_input) that needs to be be downloaded. file_name = raw_input("Enter file name to be downloaded:") try: # Get Metadata req = service.objects().get( bucket=_BUCKET_NAME, object=file_name, fields='bucket,name,metadata(my-key)', ) resp = req.execute() print json.dumps(resp, indent=2) # Get Payload Data req = service.objects().get_media( bucket=_BUCKET_NAME, object=file_name, ) # The Bytes I/O object may be replaced with any io.Base instance. fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, req, chunksize=1024 * 1024) # show progress at download done = False while not done: status, done = downloader.next_chunk() if status: print 'Download %d%%.' % int(status.progress() * 100) print 'Download Complete' fo = decrypt(fh.getvalue(),key) fi = open(file_name,'wb') fi.write(fo) # fh.getvalue() contains downloaded content.Decrypt the file and save it to onto your local machine print json.dumps(resp, indent=2) except client.AccessTokenRefreshError: print ("Error in the credentials")
def __init__(self, row: List[str], drive_service): self.number = int(row[0]) content_url = urlparse(row[1]) summary_url = urlparse(row[4]) repetition_material = urlparse(row[7]) if len(summary_url.scheme) > 0: self.summary_url = summary_url.geturl() else: self.summary_url = None if len(repetition_material.scheme) > 0: self.repetition_material = repetition_material.geturl() else: self.repetition_material = None if len(content_url.scheme) > 0: file_id = content_url.path.split('/')[-1] if file_id == 'open': file_id = parse_qs(content_url.query)['id'] request = drive_service.files().export_media(fileId=file_id, mimeType='text/plain') fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() self.content = fh.getvalue().decode('utf-8') self.content_type = None else: self.content = None self.content_type = row[1]
def pull(bucket, local_file, metadata_only=False): objname = os.path.basename(local_file) k = None try: k = JBoxGS.connect().objects().get(bucket=bucket, object=objname).execute() except HttpError as err: if err._get_reason() != 'Not Found': raise(err) else: return None if metadata_only: return k else: req = JBoxGS.connect().objects().get_media(bucket=bucket, object=objname) fh = open(local_file, "wb") downloader = MediaIoBaseDownload(fh, req, chunksize=1024*1024) done = False try: while not done: _, done = downloader.next_chunk() finally: fh.close() if not done: os.remove(local_file) return k
def download(service, bucketName, objectName, filename): """ :type service: Resource :type bucketName: basestring :type objectName: basestring :type filename: basestring """ print 'Building download request...' f = file(filename, 'w') request = service.objects().get_media(bucket=bucketName, object=objectName) media = MediaIoBaseDownload(f, request, chunksize=CHUNKSIZE) print 'Downloading bucket: %s object: %s to file: %s' % (bucketName, objectName, filename) progressless_iters = 0 done = False while not done: error = None try: progress, done = media.next_chunk() if progress: print_with_carriage_return( 'Download %d%%.' % int(progress.progress() * 100)) except HttpError, err: error = err if err.resp.status < 500: raise except RETRYABLE_ERRORS, err: error = err
def load_config(self): try: results = self._gdrive.files().list( corpora="user", q="parents='%s' and name='%s'" % (self._album_id, GdriveAlbum.CONFIG_FILE), pageSize=1, spaces='drive').execute() except Error: results = {} items = results.get('files', []) if len(items) == 0: self._config_file_id = '' self._config = {} # TODO new config with default values return config_item = items[0] self._config_file_id=config_item["id"] request = self._gdrive.files().get_media(fileId=self._config_file_id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) while True: _, done = downloader.next_chunk() if done: break config = json.loads(fh.getvalue()) if "albums" not in config: config["albums"] = [] if "pictures" not in config: config["pictures"] = [] self._config = config
def download(self, mime_type=None): """Download the content of the file from Google Drive Args: mime_type: the mime type of the file to download. see here: https://developers.google.com/drive/v3/web/manage-downloads#downloading_google_documents Returns: The content of the file """ if mime_type is None: download_type = MimeTypes.get_download_type(self.meta_data['mimeType']) else: download_type = mime_type req = self.service.files().export_media(fileId=self.meta_data['id'], mimeType=download_type) data = io.BytesIO() downloader = MediaIoBaseDownload(data, req) done = False while not done: _, done = downloader.next_chunk() data.seek(0) self.content = data.read() return self.content
def download_file(self, file_id, write_path, page_num=None, print_details=True, output_type=None): file_metadata = self._files.get(fileId=file_id, fields='name, id, mimeType, modifiedTime, size').execute(num_retries=self._max_retries) file_title = file_metadata['name'] modified_date = datetime.strptime(str(file_metadata['modifiedTime']), '%Y-%m-%dT%H:%M:%S.%fZ').replace(tzinfo=utc).astimezone(timezone('Asia/Singapore')).replace(tzinfo=None) return_data = None if file_metadata['mimeType'] == 'application/vnd.google-apps.spreadsheet': assert page_num is not None download_url = 'https://docs.google.com/spreadsheets/d/%s/export?format=csv&gid=%i' % (file_id, page_num) resp, content = self._service._http.request(download_url) if resp.status == 200: if output_type is not None: assert output_type in ('dataframe', 'list') from io import BytesIO with BytesIO(content) as file_buffer: if output_type == 'list': import unicodecsv as csv return_data = list(csv.reader(file_buffer)) elif output_type == 'dataframe': import pandas as pd return_data = pd.read_csv(file_buffer) else: with open(write_path, 'wb') as write_file: write_file.write(content) logging_string = '[Drive] Downloaded %s [%s]. Last Modified: %s' % (file_title, file_id, modified_date) else: raise HttpError(resp, content) else: request = self._files.get_media(fileId=file_id) with open(write_path, 'wb') as write_file: downloader = MediaIoBaseDownload(write_file, request) done = False while done is False: status, done = downloader.next_chunk() file_size = humanize.naturalsize(int(file_metadata['size'])) logging_string = '[Drive] Downloaded %s [%s] (%s). Last Modified: %s' % (file_title, file_id, file_size, modified_date) if print_details: print '\t' + logging_string if self._logger is not None: self._logger.info(logging_string) return return_data
def drive_pull_media(drive_fid, service): """Retrieve content of a Google Drive file""" logger = logging.getLogger(__name__) # Open a Python file based on inherited Lustre file descriptor with os.fdopen(args.fd, 'wb') as lustre_file: # Get a file content by Google fileID request = service.files().get_media(fileId=drive_fid) downloader = MediaIoBaseDownload(lustre_file, request, chunksize=GAPI_MEDIA_IO_CHUNK_SIZE) # Download by chunk status, done = downloader.next_chunk() while done is False: status, done = downloader.next_chunk() if status: logger.debug("Download %d%%", int(status.progress() * 100))
def download_file(self, local_file_path, bucket_name, storage_file_path): f = file(local_file_path, 'wb') request = self.service.objects().get_media(bucket=bucket_name, object=storage_file_path) media = MediaIoBaseDownload(f, request) base_name = os.path.basename(local_file_path) done = False while not done: progress, done = media.next_chunk() if progress: print('{0} is download {1}/100'.format(base_name, int(100 * progress.progress())))
def stream(self, uri, file=None): file = file or self._get(uri, fields='files(id)') if file: output = tempfile.TemporaryFile() request = self.service.files().get_media(fileId=file.id) downloader = MediaIoBaseDownload(output, request) done = False while not done: status, done = downloader.next_chunk() output.seek(0) return output
def main(): drive_service = driveClient() file_id = '1pE6ZovOBy4koVwWvKwE_Pt-7whVsYRKj' request = drive_service.files().get_media(fileId=file_id) fh = io.FileIO('faw.csv', 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print(f'Download {int(status.progress() * 100)}')
def get_file(name, id): import io download_service = discovery.build('drive', 'v3', developerKey=_dev_key, http=decorator.http()) request = download_service.files().get_media(fileId=id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while not done: status, done = downloader.next_chunk() fh.seek(0) return fh.read()
def get_contents_to_fileobj(self, key, fileobj_to_store_to): key = self.format_key_for_backend(key) self.log.debug("Starting to fetch the contents of: %r to %r", key, fileobj_to_store_to) with self._object_client(not_found=key) as clob: req = clob.get_media(bucket=self.bucket_name, object=key) download = MediaIoBaseDownload(fileobj_to_store_to, req, chunksize=CHUNK_SIZE) done = False while not done: status, done = download.next_chunk() if status: self.log.debug("Download of %r: %d%%", key, status.progress() * 100) return self._metadata_for_key(clob, key)
def test_media_io_base_download_retries_connection_errors(self): self.request.http = HttpMockWithErrors( 3, {'status': '200', 'content-range': '0-2/3'}, b'123') download = MediaIoBaseDownload( fd=self.fd, request=self.request, chunksize=3) download._sleep = lambda _x: 0 # do nothing download._rand = lambda: 10 status, done = download.next_chunk(num_retries=3) self.assertEqual(self.fd.getvalue(), b'123') self.assertEqual(True, done)
def _open(self, name, mode): if mode != "rb": raise ValueError("rb is the only acceptable mode for this backend") # @@@ reading files from GCS is extremely inefficient; fix me # however, for small files, who cares right? ;-) req = self.client.objects().get_media(bucket=self.bucket, object=name) buf = io.BytesIO() media = MediaIoBaseDownload(buf, req) done = False while not done: done = media.next_chunk()[1] buf.seek(0) return buf
def read_file(file_id): #media_body = MediaIoBaseUpload(fh, mimetype='application/octet-stream', chunksize=1024 * 1024, resumable=False) drive_service = create_drive_service() request = drive_service.files().get_media(fileId=file_id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False data = "" while done is False: status, done = downloader.next_chunk() print "Download %d%%." % int(status.progress() * 100) return fh.getvalue()
def download_file(service, file_id, file_path): """Download file to provided path :param service: Google Drive service object :param str file_id: ID of the file to download :param str file_path: path to download the file to """ request = service.files().get_media(fileId=file_id) with open(file_path, mode='wb') as f: downloader = MediaIoBaseDownload(f, request) done = False while done is False: status, done = downloader.next_chunk()
def test_media_io_base_download_handle_redirects(self): self.request.http = HttpMockSequence([ ({'status': '200', 'content-location': 'https://secure.example.net/lion'}, ''), ({'status': '200', 'content-range': '0-2/5'}, 'abc'), ]) download = MediaIoBaseDownload( fd=self.fd, request=self.request, chunksize=3) status, done = download.next_chunk() self.assertEqual('https://secure.example.net/lion', download._uri)
def test_media_io_base_download_handle_4xx(self): self.request.http = HttpMockSequence([ ({'status': '400'}, ''), ]) download = MediaIoBaseDownload( fd=self.fd, request=self.request, chunksize=3) try: status, done = download.next_chunk() self.fail('Should raise an exception') except HttpError: pass # Even after raising an exception we can pick up where we left off. self.request.http = HttpMockSequence([ ({'status': '200', 'content-range': '0-2/5'}, '123'), ]) status, done = download.next_chunk() self.assertEqual(self.fd.getvalue(), '123')
def download_object(self, bucket_name, object_name, write_path): """ Downloads object in chunks. :param bucket_name: Bucket identifier. :type bucket_name: string :param object_name: Can take string representation of object resource or list denoting path to object on GCS. :type object_name: list or string :param write_path: Local path to write object to. :type write_path: string :returns: GcsResponse object. :raises: HttpError if non-retryable errors are encountered. """ resp_obj = GcsResponse('downloaded') req = self._service.objects().get_media( bucket=bucket_name, object=self._parse_object_name(object_name) ) write_file = file(write_path, 'wb') media = MediaIoBaseDownload(write_file, req, chunksize=self._chunksize) progressless_iters = 0 done = False while not done: error = None try: progress, done = media.next_chunk() except HttpError as e: error = e if e.resp.status < 500: raise except self._RETRYABLE_ERRORS as e: error = e if error: progressless_iters += 1 self._handle_progressless_iter(error, progressless_iters) else: progressless_iters = 0 resp_obj.load_resp( self.get_object(bucket_name, object_name), is_download=True ) return resp_obj
def get_contents_to_file(self, obj_key, filepath_to_store_to): self.log.debug("Starting to fetch the contents of: %r to: %r", obj_key, filepath_to_store_to) fileobj = FileIO(filepath_to_store_to, mode="wb") try: done = False request = self.gs_objects.get_media(bucket=self.bucket_name, object=obj_key) download = MediaIoBaseDownload(fileobj, request, chunksize=CHUNK_SIZE) while not done: status, done = download.next_chunk() if status: self.log.debug("Download of %r to %r: %d%%", obj_key, filepath_to_store_to, status.progress() * 100) finally: fileobj.close() if not done: os.unlink(filepath_to_store_to)
def download_report(youtube_reporting, report_url, local_file): request = youtube_reporting.media().download( resourceName=' ' ) request.uri = report_url fh = FileIO(local_file, mode='wb') # Stream/download the report in a single request. downloader = MediaIoBaseDownload(fh, request, chunksize=-1) done = False while done is False: status, done = downloader.next_chunk() if status: print 'Download %d%%.' % int(status.progress() * 100) print 'Download Complete!'
def get_contents_to_fileobj(self, key, fileobj_to_store_to): key = self.format_key_for_backend(key) request = self.gs_objects.get_media(bucket=self.bucket_name, object=key) download = MediaIoBaseDownload(fileobj_to_store_to, request, chunksize=CHUNK_SIZE) done = False while not done: try: status, done = download.next_chunk() except HttpError as ex: if ex.resp["status"] == "404": raise FileNotFoundFromStorageError(key) raise if status: self.log.debug("Download of %r: %d%%", key, status.progress() * 100) return self._metadata_for_key(key)
def export_file_as_str(self, *, fileId: str, ) -> str: file_handler = io.BytesIO() request = self.service.files().export_media( fileId=fileId, mimeType='text/csv', ) downloader = MediaIoBaseDownload(file_handler, request) done = False while done is False: status, done = downloader.next_chunk() print('Downloaded {}%'.format(int(status.progress() * 100))) return file_handler.getvalue().decode('utf-8')
def read_file_from_gcs(service, bucket_id, file_id): """Reads the bucket object and get the contents We are getting the StringIO value """ req = service.objects().get_media( bucket=bucket_id, object=file_id) try: fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, req, chunksize=1024*1024) done = False while not done: status, done = downloader.next_chunk() # throws HttpError if this file is not found except HttpError, e: print >> sys.stderr, e
def media_download(request, chunksize, encoding=None): data = BytesIO() leftovers = b'' media = MediaIoBaseDownload(data, request, chunksize=chunksize) retries = 0 done = False while not done: error = None try: progress, done = media.next_chunk() if progress: print('Download %d%%' % int(progress.progress() * 100)) data.seek(0) if encoding is None: yield data.read() elif encoding.lower() == 'utf-8': position = find_utf8_split(data) yield (leftovers + data.read(position)).decode(encoding) leftftovers = data.read() else: yield data.read().decode(encoding) data.seek(0) data.truncate(0) except HttpError as err: error = err if err.resp.status < 500: raise except (httplib2.HttpLib2Error, IOError) as err: error = err if error: retries += 1 if retries > RETRIES: raise error else: sleep(5 * retries) else: retries = 0 print('Download 100%')
def updateData(self) -> None: # Lookup latest HymnDatabase file and property files on Google Drive, replace local copy if local copy is older dataTypeList = [ "HymnDatabase", "ProjectedSlideProperties", "RegularSlideProperties", "StreamSlideProperties" ] fileNameList = [ "HymnDatabase.db", "ProjectedSlideProperties.ini", "RegularSlideProperties.ini", "StreamSlideProperties.ini" ] for i, dataType in enumerate(dataTypeList): try: fileID = self.globalConfig["GOOGLE_DRIVE_DATA"][dataType + "FileID"] filedDetails = self.driveService.files().get( fileId=fileID, fields="modifiedTime").execute() # Get modified date of DataBase.db file and compare localModifiedDate = pytz.utc.localize(datetime.datetime.min) driveModifiedDate = pytz.utc.localize( datetime.datetime.strptime(filedDetails["modifiedTime"], "%Y-%m-%dT%H:%M:%S.%fZ")) if os.path.exists("Data/" + fileNameList[i]): localModifiedDate = datetime.datetime.fromtimestamp( os.path.getmtime("Data/" + fileNameList[i]), datetime.timezone.utc) # Overwrite local file if localModifiedDate < driveModifiedDate: Logging.writeLog( Logging.LogType.Info, f"GoogleAPITools - Updating {dataType} from [{localModifiedDate}] to [{driveModifiedDate}]" ) request = self.driveService.files().get_media( fileId=fileID) with open("Data/" + fileNameList[i], "wb") as f: downloader = MediaIoBaseDownload(f, request) done = False while done is False: status, done = downloader.next_chunk() print(f"UPDATING {dataType.upper()} : %d%%" % int(status.progress() * 100)) except errors.HttpError as error: print( f"ERROR : An error occurred on updating {dataType.upper()}; {error}" )
def download_file(service, filename, folder_path, mimeType='', successType='success', exportedFormat=None): """ Download the given file id to the given folder_path with the given mimetype mimeType for the binary files is None """ if folder_path == None: folder_path = os.getcwd() + '/' + 'My Drive' # TODO: Implement file existence check in the calling function; check in the file with ID # TODO: If the file is different with same name, create new name and then call this function try: logging.info("Attempting binary download") data_bytes = service.files().get_media(fileId=filename['id']) filename['name'] = sanitize_name(filename) logging.info("Sanitized filename is {0}".format(filename['name'])) fh = io.FileIO(folder_path + '/' + filename['name'], mode='wb') downloader = MediaIoBaseDownload(fh, data_bytes) done = False while done is False: status, done = downloader.next_chunk() if status: pass fh.close() logging.info("Binary download successful for filename {0}, file id: {1}".format(filename['name'], filename['id'])) except: if fh: fh.close() try: logging.info('Attempting to remove zero size file {0}'.format(filename['name'])) if os.stat(folder_path + '/' + filename['name']).st_size == 0: os.remove(folder_path + '/' + filename['name']) logging.info("File {0} successfully removed".format(filename['name'])) except Exception as e: logging.info("Error in removing zero size file {0}".format(filename['name'])) logging.error(str(e)) try: logging.info("Attempting to export the file") export_assistant(service, filename, mimeType, folder_path, exportedFormat, successType) except Exception as e: logging.error("Failed to download or export the file {0}, id = {1}".format(filename['name'], filename['id'])) logging.error(e)
def html_to_pdf( file_name, html_string, ): LOGGER.debug('Using Google Drive to convert PDF') # If no credentials, return a dummy string for testing. if GCP_CREDENTIALS_DICT['private_key'] == 'dummy': pdf_bytes = io.BytesIO(b"Hello World, This is not a real PDF") return pdf_bytes credentials = ServiceAccountCredentials.from_json_keyfile_dict( GCP_CREDENTIALS_DICT) drive_service = build('drive', 'v3', credentials=credentials) # Set the upload to be a HTML file media = MediaIoBaseUpload(io.BytesIO(bytes(html_string, encoding='utf8')), mimetype="text/html", resumable=True) # Create the file, and initiate the conversion to a Docs document # This enables the conversion to PDF file = drive_service.files().create(body={ "name": file_name, "mimeType": "application/vnd.google-apps.document", }, media_body=media, fields='id').execute() # Download as a PDF file request = drive_service.files().export_media(fileId=file.get('id'), mimeType='application/pdf') buffer = io.BytesIO() downloader = MediaIoBaseDownload(buffer, request) done = False while done is False: _, done = downloader.next_chunk() # Delete the Docs document afterwards drive_service.files().delete(fileId=file.get('id')).execute() return buffer
def download_file(file_id, fileList, drive_service): mimeType = get_file(file_id, fileList)['mimeType'] fileName = get_file_name(file_id, fileList) if ('application/vnd.google-apps.' in mimeType): if ('document' in mimeType): # print('This is a document') conversion = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' fileName = fileName + '.docx' elif ('spreadsheet' in mimeType): # print('This is a sheet') conversion = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' fileName = fileName + '.xls' elif ('presentation' in mimeType): # print('This is a presentation') conversion = 'application/vnd.openxmlformats-officedocument.presentationml.presentation' fileName = fileName + '.ppt' else: conversion = 'application/pdf' fileName = fileName + '.pdf' request = drive_service.files().export_media(fileId=file_id, mimeType=conversion) # print('Tak') else: request = drive_service.files().get_media(fileId=file_id) # print('Nie') fh = io.FileIO(fileName, 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) # file_number = int(input('Number of file you want to download?\n')) # onefile = total[file_number] # print(onefile) # filename = onefile['name'] # file_id=onefile['id'] # # FILE DOWNLOADING # request = service.files().get_media(fileId=file_id) # fh = io.FileIO(filename,'wb') # downloader = MediaIoBaseDownload(fh, request) # done = False # while done is False: # status, done = downloader.next_chunk() # print("Download %d%%." % int(status.progress() * 100))
def download_file(self, file_id, name, dest_path): """ Download a file from Google Docs. Note: this checks to see if the file already exists. If it does not this makes a temporary file, saves the contents into the temporary file, and then moves the temporary file to the new location when successful. This allows the overall download job to fail a given file, but restart and pick up where it left off. :param file_id: The google file_id to be downloaded :param name: The name of the file to be downloaded :param dest_path: the path to save the file to :return: """ success = False count = 0 output_path = os.path.join(dest_path, name) if os.path.exists(output_path): print(f"{name} already exists. Skipping") return temp_path = None while not success: _, temp_path = tempfile.mkstemp() with open(temp_path, "wb") as temp_handle: try: request = self.service.files().get_media(fileId=file_id) downloader = MediaIoBaseDownload(temp_handle, request, chunksize=1024*1024) done = False while done is False: status, done = downloader.next_chunk(num_retries=3) # print("Download %d%%." % int(status.progress() * 100)) success = True except HttpError: # for some reason, google apis sometimes return a 500 error in the middle of # downloading a file. Retry the file when that happens. # but don't keep doing the same thing over and over if it keeps failing. os.remove(temp_path) if count > 3: print(f"downloading {name} failed. ") raise count += 1 print(f"error in downloading {name}. retrying.") time.sleep(10) continue if temp_path is not None: # should be impossible for this to still be None here, but doesn't hurt to check shutil.copy(temp_path, output_path) os.remove(temp_path)
def download_file_from_google_drive(id, destination): service = _get_service() print("\tDownload {}".format(destination)) request = service.files().get_media(fileId=id) fh = BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() # The file has been downloaded into RAM, now save it in a file fh.seek(0) with open(destination, "wb") as f: copyfileobj(fh, f, length=131072) return
def _download_file(self, write_to_path): try: service = self.get_service() if 'docs.google.com' in self.url: request = service.files().export(fileId=self.file_id, mimeType='application/pdf') else: request = service.files().get_media(fileId=self.file_id) fh = io.FileIO(write_to_path, mode='wb') downloader = MediaIoBaseDownload(fh, request) done = False while not done: status, done = downloader.next_chunk() except Exception as e: raise UnscrapableSourceException(str(e))
def downloadData(): API_NAME = 'drive' API_VERSION = 'v3' SCOPES = ['https://www.googleapis.com/auth/drive'] id = getID() service = Create_Service('credentials.json',API_NAME,API_VERSION,SCOPES) req = service.files().get_media(fileId=id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fd=fh,request=req) done = False while not done: status, done = downloader.next_chunk() print('Download progress {0}'.format(status.progress()*100)) fh.seek(0) with open (os.path.join('./','test.db'),'wb') as f: f.write(fh.read()) f.close()
def download_text_file(self, file_id): try: request = self.service.files().export_media(fileId=file_id, mimeType='text/plain') fh = io.BytesIO() downloader = MediaIoBaseDownload(fd=fh, request=request) done = False while not done: status, done = downloader.next_chunk() fh.seek(0) wrapper = io.TextIOWrapper(fh, encoding='utf-8') return wrapper.read() except Exception as e: print(e) return None
def open(self, name, name2, mode='rb'): file_data = self.check_file_exists(name) if file_data is None: return "Nie ma!" if file_data['mimeType'] == self.GOOGLE_DRIVE_FOLDER_MIMETYPE: return "To folder!" request = self.drive_service.files().get_media(fileId=file_data['id']) # fh = BytesIO() fh2 = FileIO(name2, 'wb') downloader = MediaIoBaseDownload(fh2, request) done = False while done is False: _, done = downloader.next_chunk() fh2.seek(0) return File(fh2, name)
def load_document(*, url=None, doc_id=None, course): doc_id = doc_id or get_doc_id(url) service = googleapiclient.discovery.build( "drive", "v3", credentials=get_credentials(course) ) request = service.files().export_media(fileId=doc_id, mimeType="text/plain") file = BytesIO() downloader = MediaIoBaseDownload(file, request) done = False while done is False: status, done = downloader.next_chunk() print(status, "Download {:d}%.".format(int(status.progress() * 100))) return file.getvalue().decode("utf-8")
def gdrive_download_file(id): file_id = id creds = cred_function() dservice = build('drive', 'v3', credentials=creds) # the following step give access file with mime type .xlsx request = dservice.files().get_media(fileId=file_id) fh = io.FileIO('test_v1_excel.xlsx', 'wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) df = pd.read_excel('test_v1_excel.xlsx',usecols=None, sheet_name=None) #print(df['manual extract']) vam_id = df['manual extract'].iloc[:, 0] # gives vam_id notice_id = df['manual extract'].iloc[:, 1] # gives notice_id return vam_id.tolist()
def weights_exists_or_download(path, file_id): if not Path(path).exists(): creds_file = os.environ.get('CREDENTIAL_FILE') creds = service_account.Credentials.from_service_account_file( creds_file, scopes=SCOPES) service = build('drive', 'v3', credentials=creds) request = service.files().get_media(fileId=file_id) fh = io.FileIO('unet_v4.pth', mode='wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print(f'Download {int(status.progress() * 100)}') return path
def _open(self, name, mode): if mode != "rb": raise ValueError("rb is the only acceptable mode for this backend") req = self.client.objects().get_media(bucket=self.bucket, object=self._prefixed_name(name)) buf = self._open_io() media = MediaIoBaseDownload(buf, req) done = False try: while not done: done = media.next_chunk()[1] except HttpError as exc: if exc.resp["status"] == "404": raise IOError('object "{}/{}" does not exist'.format(self.bucket, self._prefixed_name(name))) else: raise IOError("unknown HTTP error: {}".format(exc)) buf.seek(0) return GCSFile(buf, name, self)
def __download_items(self, items): if not items: print('No files found.') else: print('Files:') for item in items: file_name = item['name'] if not file_name == self.__file_name: continue file_id = item['id'] request = self.__service.files().get_media(fileId=file_id) fh = io.FileIO(self.__output_folder + file_name, 'w') downloader = MediaIoBaseDownload(fh, request) done = False while not done: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100))
def GDriveDownloader__download_File(self): os.chdir("/Users/noahfarris/Desktop/downloads") for file_id in self.GDriveDownloader_files_to_download: request = self.GDriveDownloader_service.files().get_media( fileId=file_id["id"]) # requests for the wanted file fh = io.BytesIO() downloader = MediaIoBaseDownload( fh, request) # makes the downloader for the file done = False while done is False: status, done = downloader.next_chunk() print("Download {}%".format(int(status.progress() * 100))) f = open(file_id["name"], 'wb') f.write(fh.getvalue()) os.chdir( '/Users/noahfarris/Desktop/CAPSTONE_FINAL/git/cloud-backup/cloud_backup' )
def download_image(drive_id): ''' Downloads entire Google Drive image, at original size. Args: drive_id: Google Drive image file ID (assumes it has been verified) ''' service = build('drive', 'v3', developerKey=flask.current_app.config['GOOGLE_DRIVE_API_KEY']) request = service.files().get_media(fileId=drive_id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk()
def post(self, request, format=None, *args, **kwargs): serializer = GooglePhotosUploadInputSerializer(data=request.data) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) validated_data = serializer.validated_data file_list = validated_data.get("fileList", None) drive_service = self.get_google_drive_service( access_token=validated_data.get("token", None)) social_auth = self.request.user.social_auth.get( provider="mediawiki").extra_data["access_token"] wiki_uploader = WikiUploader( host=settings.WIKI_URL, consumer_secret=settings.SOCIAL_AUTH_MEDIAWIKI_SECRET, consumer_token=settings.SOCIAL_AUTH_MEDIAWIKI_KEY, access_token=social_auth.get("oauth_token", None), access_secret=social_auth.get("oauth_token_secret", None), ) uploaded_results = [] file_upload_log = FileUpload(username=request.user.username) count = 0 for file in file_list: request = drive_service.files().get_media(fileId=file["id"]) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: download_status, done = downloader.next_chunk() uploaded, image_info = wiki_uploader.upload_file( file_name=file["name"], file_stream=fh, description=file["description"]) if uploaded: uploaded_results.append(image_info) count += 1 file_upload_log.number_of_files = count file_upload_log.save() return Response(data=uploaded_results, status=status.HTTP_200_OK)
def download_from_Gdrive(gservice,this_file): try: #download current file into memory download_request = gservice.files().get_media(fileId=this_file['id']) readbyte.flush() # Using an in memory stream location downloader = MediaIoBaseDownload(readbyte, download_request) done = False pbar = InitBar('Downloading: '+ this_file['name']) while done is False: status, done = downloader.next_chunk() pbar(int(status.progress() * 100)) del pbar # tempbyte.close() return readbyte except (Exception) as e: log.error('Error in downloading file from gdrive' +this_file['name']) log.error('\n'+str(e))
def file_download(google_shareable_link, output_filename, output_directory): """ This function downloads a file from Google Drive and stores it in a local directory. Inputs for the function are: (1) "Shareable link" from Google Drive provided as a string. Example: google_shareable_link = 'https://drive.google.com/open?id=1cFi0rOqN8bcJ7H5fpfPAGS5Rem7TtiII' ***To get the link: go to the file in your Google Drive, right click, select "Get Shareable link". (2) Output file name including file extension provided as a string. Example: output_filename = 'Hexagonal_18.bmp' (3) Output Directory path provided as a string. Example: output_directory = '/Users/elenashoushpanova/Desktop/' Output for the function is a file path of saved file. Example: dir_file = '/Users/elenashoushpanova/Desktop/Hexagonal_18.bmp' Note: this function calls for a "google_authorization" function. """ # Call for a google authorization function to get Google Credentials: creds = google_authorization.google_authorization() # Define Google Drive as a source of file: DRIVE = discovery.build('drive', 'v3', http=creds.authorize(Http())) # Converts the Google Shareable link that function got as Input into a # "file id": loc = google_shareable_link.find('id=') + 3 file_id = google_shareable_link[loc:] # Access a file: request = DRIVE.files().get_media(fileId=file_id) # Merging output directory and file name to get a local file path: directory = os.path.dirname(output_directory) image_path = os.path.join(directory, output_filename) # Saving a file: fh = io.FileIO(image_path, mode='w') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) return image_path
def downloadFile(id, name): service = get_gdrive_service() request = service.files().get_media(fileId=id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print( "AFTER : In Download function : {0} and status {1} and status progress {2}" .format(done, status, int(status.progress()))) print("AFTER : In Download function DONE : {0}".format(done)) #print("BEFORE : In Download function var : {0}".format(var)) print("Download %d%%." % int(status.progress() * 100)) with io.open("." + "/" + name, 'wb') as f: fh.seek(0) f.write(fh.read())
def downloadImage(drive,img_name,img_url): image_id = img_url.split("?id=")[1] name = img_name + '.jpg' request = drive.get_media(fileId=image_id) fh = io.FileIO(name,'wb') downloader = MediaIoBaseDownload(fh,request) done = False while done is False: status, done = downloader.next_chunk() # 리사이즈 관련 # size = 500 # origin_img = Image.open(name) # origin_img.thumbnail(size, Image.ANTIALIAS) # origin_img.save("tmp.jpg","JPEG") return name
def _open(self, name, mode): if mode != "rb": raise ValueError("rb is the only acceptable mode for this backend") req = self.client.objects().get_media(bucket=self.bucket, object=self._prefixed_name(name)) buf = self._open_io() media = MediaIoBaseDownload(buf, req) done = False try: while not done: done = media.next_chunk(num_retries=self.num_retries)[1] except HttpError as exc: if exc.resp["status"] == "404": raise IOError('object "{}/{}" does not exist'.format(self.bucket, self._prefixed_name(name))) else: raise IOError("unknown HTTP error: {}".format(exc)) buf.seek(0) return GCSFile(buf, name, self)
def _download_file(file_id: str) -> Path: logger.debug("Downloading file id='%s'", file_id) path = Path('data') / 'restore.json' with drive_client() as client: request = client.files().get_media(fileId=file_id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() logger.debug("Download %d%%.", int(status.progress() * 100)) fh.seek(0) with path.open('wb') as f: f.write(fh.read()) return path
def downloadNotes(service): with open(noteFileCloudId, "r", encoding="utf-8") as fileObject: file_id = fileObject.readline().rstrip() request = service.files().get_media(fileId=file_id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() downloadPercentage = int(status.progress() * 100) print("Downloaded {0}%".format(downloadPercentage)) if downloadPercentage == 100: # Backup an earlier version copyfile(noteFile, "{0}_backup".format(noteFile)) # Write a new file with open(noteFile, "wb") as f: f.write(fh.getbuffer())
def download_drive_file(file_id, output_file, name=None): credentials = get_credentials() http = credentials.authorize(httplib2.Http()) service = discovery.build('drive', 'v3', http=http) # file_id = '0BwwA4oUTeiV1UVNwOHItT0xfa2M' #request = service.files().export_media(fileId=file_id, # mimeType='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') request = service.files().get_media(fileId=file_id) fh = open(output_file, 'wb') #io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) fh.close()
def download_google_doc(file_id, download_destination): # authenticate drive_service = authenticate_to_drive() # download request = drive_service.files().export_media(fileId=file_id, mimeType='text/plain') fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() # write download to a destination file with io.open(download_destination, 'wb') as f: fh.seek(0) f.write(fh.read())
def get_file_from_drive(file_id,name): _, g_drive = login() temp_path = os.path.join('temp_vids',name) request = g_drive.files().get_media(fileId=file_id,supportsAllDrives=True) fh = io.FileIO(temp_path, mode='wb') downloader = MediaIoBaseDownload(fh, request) done = False while done is False: status, done = downloader.next_chunk() print("Download %d%%." % int(status.progress() * 100)) if os.path.isfile(temp_path): return temp_path else: return False
def download_file(drive, file, destFolder): copiedFileMedia = drive.auth.service.files().get_media(fileId=file['id']) newFileName = file['title'] defaultPath = destFolder + "\\" + newFileName fullPath = generate_path_with_unique_filename(destFolder, newFileName) if defaultPath != fullPath : print("file already exist in the disk, new path : " + fullPath) print("download in progress. File size : " + sizeof_file(int(file['fileSize']))) file = open(fullPath, "wb+") downloader = MediaIoBaseDownload(file, copiedFileMedia, chunksize=104857600) # change chunksize here done = False while done is False: status, done = downloader.next_chunk() print("\rDownload %d%%" % int(status.progress() * 100), end="") file.close() print("\ndownload completed : " + newFileName)
def download_content_to_file(file_dict: dict, gdrive_service: Resource): """ Download the file content from S3. This modifies the file dict in-place. """ file_id = file_dict['id'] request = gdrive_service.files().get_media(fileId=file_id) fh = io.BytesIO() downloader = MediaIoBaseDownload(fh, request) done = False while done is False: _, done = downloader.next_chunk() fh.seek(0) file_dict['content'] = fh logging.info("Downloaded %r (%s).", file_id, file_dict['mimeType'])