示例#1
0
    def Download_the_file(Id: str, Mod_time: str, name: str, mimetype: str):
        # Here we change names and such, so that they are of correct format for file name. slugify is amazing at this
        name_no_spaces = slugify(name)
        Mod_time_no_wierd_symbols = sub(':|\.', '_', Mod_time)

        # Here we establish which files should be saved on disc using which format, based on their mimetype
        mimetype_to_export_file_as = None
        if mimetype == 'application/vnd.google-apps.spreadsheet':
            mimetype_to_export_file_as = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
        elif mimetype == 'application/vnd.google-apps.document':
            mimetype_to_export_file_as = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
        elif mimetype == 'application/vnd.google-apps.presentation':
            mimetype_to_export_file_as = 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
        else:
            mimetype_to_export_file_as = 'text/plain'

        # I am not sure why this workflow looks the way it looks. Future me - your on your own and you know as much as I do
        request = DRIVE.files().export(fileId = Id, mimeType = mimetype_to_export_file_as)
        print('Tring to download: ', f"Downloading: f'{name_no_spaces}_{Mod_time_no_wierd_symbols}.goo'")
        fh = FileIO(f'{name_no_spaces}_{Mod_time_no_wierd_symbols}.goo', 'wb')
        downloader = http.MediaIoBaseDownload(fh, request)
        done = False
        while done is False:
            status, done = downloader.next_chunk()
            print(f"Downloading: f'{name_no_spaces}_{Mod_time_no_wierd_symbols}.goo'")
示例#2
0
    def download_file_from_cloud(self, file_id, path):
        """Download a Drive file's content to the local filesystem.
        :param file_id: ID of the Drive file that will downloaded.
        :type file_id: str
        :param path: where the file is written
        :type path: str
        :return if the download succeeded
        """
        self.connect()

        if self.internet_on():
            local_fd = open(path + "commands.csv", "wb")
            request = self.drive.auth.service.files().get_media(fileId=file_id)
            media_request = http.MediaIoBaseDownload(local_fd, request)
            while True:
                try:
                    download_progress, done = media_request.next_chunk()
                except errors.HttpError as error:
                    print('An error occurred: %s' % error)
                    return False
                if download_progress:
                    print('Download Progress: %d%%' %
                          int(download_progress.progress() * 100))
                if done:
                    print('Download Complete')
                    return True
        else:
            return False
示例#3
0
    def __getreportdataraw(self, report_id):
        request = self.__api.reports().get(reportId=report_id)
        result = APIRequest(request).execute()

        et = 0
        retry_attempts = 0
        max_wait_time = 500
        while True:
            if result['isReportReady']:
                request = self.__api.reports().getFile(reportId=report_id,
                                                       reportFragment=0)
                data = StringIO()
                downloader = httpMediaHandler.MediaIoBaseDownload(
                    data, request, chunksize=2**20 * 20)  # 20Mb chunks
                done = False
                while done is False:
                    unused_status, done = downloader.next_chunk()
                data.seek(0)
                return data
            wait_time = min(max_wait_time, 2**retry_attempts)
            retry_attempts += 1
            time.sleep(wait_time)
            et += wait_time
            if et >= DSConnector._DS_TIMEOUT:
                raise DSAPITimeOut('DS API Request Timeout (files.get())')

            request = self.__api.reports().get(reportId=report_id)
            result = APIRequest(request).execute()
def direct_download_file(service, report_id, file_id):
  """Downloads a report file to disk."""
  # Retrieve the file metadata.
  report_file = service.files().get(
      reportId=report_id, fileId=file_id).execute()

  if report_file['status'] == 'REPORT_AVAILABLE':
    # Prepare a local file to download the report contents to.
    out_file = io.FileIO(generate_file_name(report_file), mode='wb')

    # Create a get request.
    request = service.files().get_media(reportId=report_id, fileId=file_id)

    # Create a media downloader instance.
    # Optional: adjust the chunk size used when downloading the file.
    downloader = http.MediaIoBaseDownload(
        out_file, request, chunksize=CHUNK_SIZE)

    # Execute the get request and download the file.
    download_finished = False
    while download_finished is False:
      _, download_finished = downloader.next_chunk()

    print('File %s downloaded to %s' % (report_file['id'],
                                        os.path.realpath(out_file.name)))
示例#5
0
    def download_file(self):
        CHUNK_SIZE = 32 * 1024 * 1024
        request = self.service.files().get(reportId=self.report_id,
                                           fileId=self.file['id'])
        report_file = request.execute()

        file_name = report_file['fileName'] or report_file['id']

        if report_file['format'] == 'CSV':
            extension = '.csv'
        else:
            extension = '.xml'

        file_name = file_name + extension

        if report_file['status'] == 'REPORT_AVAILABLE':
            out_file = FileIO(file_name, mode='wb')

            request = self.service.files().get_media(reportId=self.report_id,
                                                     fileId=self.file['id'])

            downloader = http.MediaIoBaseDownload(out_file,
                                                  request,
                                                  chunksize=CHUNK_SIZE)

            download_finished = False

            while download_finished is False:
                _, download_finished = downloader.next_chunk()
        self.file_name = file_name
        return file_name
示例#6
0
    def __getreportdataraw(self, report_id, file_id, chunk_size):
        request = self.__api.files().get(reportId=report_id, fileId=file_id)
        result = APIRequest(request).execute()

        et = 0
        retry_attempts = 0
        max_wait_time = 500
        while True:
            if result['status'] == 'REPORT_AVAILABLE':
                request = self.__api.files().get_media(reportId=report_id,
                                                       fileId=file_id)
                data = StringIO()
                downloader = httpMediaHandler.MediaIoBaseDownload(
                    data, request, chunksize=chunk_size)
                done = False
                while done is False:
                    unused_status, done = downloader.next_chunk(num_retries=4)
                data.seek(0)
                return data
            wait_time = min(max_wait_time, 2**retry_attempts)
            retry_attempts += 1
            time.sleep(wait_time)
            et += wait_time
            if et >= DCMConnector._DCM_TIMEOUT:
                raise DCMAPITimeOut('DCM API Request Timeout (files.get())')

            request = self.__api.files().get(reportId=report_id,
                                             fileId=file_id)
            result = APIRequest(request).execute()
示例#7
0
def format_doc(file_id):
    creds = None
    # The file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)

    # open services for API's
    drive_service = build('drive', 'v3', credentials=creds)
    docs_service = build('docs', 'v1', credentials=creds)

    request = drive_service.files().export_media(fileId=file_id,
                                                 mimeType='text/plain')
    fh = io.BytesIO()
    downloader = http.MediaIoBaseDownload(fh, request)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
        print("Download %d%%." % int(status.progress() * 100))

    #changes the byte class type to a string
    return fh.getvalue().decode("UTF-8")
def main(argv):
    if len(argv) > 1:
        raise app.UsageErrors(f'Unused command line arguments: {argv[1:]}')
    if FLAGS.file_id is None:
        raise app.UsageError('Please specify --file_id.')
    if FLAGS.output_file is None:
        raise app.UsageError('Please specify --output_file.')
    SCOPES = 'https://www.googleapis.com/auth/drive.readonly'
    store = file.Storage(FLAGS.auth_cache_json_file)
    creds = store.get()
    if not creds or creds.invalid:
        if not FLAGS.client_id_json_file:
            raise app.UsageError('Please set --client_id_json_file.')
        flow = client.flow_from_clientsecrets(FLAGS.client_id_json_file,
                                              SCOPES,
                                              cache=None)
        creds = tools.run_flow(flow,
                               store,
                               flags=tools.argparser.parse_args(args=[]))
    DRIVE = discovery.build('drive', 'v3', http=creds.authorize(Http()))

    request = DRIVE.files().get_media(fileId=FLAGS.file_id)
    fh = io.BytesIO()
    downloader = http.MediaIoBaseDownload(fh, request)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
        logging.info("Downloaded %d%%." % int(status.progress() * 100))
    with open(FLAGS.output_file, 'wb') as f:
        f.write(fh.getvalue())
示例#9
0
文件: gcs.py 项目: wanglt311/mrjob
    def _download_io(self, src_uri, io_obj):
        bucket_name, object_name = parse_gcs_uri(src_uri)

        # Chunked file download
        req = self.api_client.objects().get_media(bucket=bucket_name,
                                                  object=object_name)
        downloader = google_http.MediaIoBaseDownload(io_obj, req)

        done = False
        while not done:
            try:
                status, done = downloader.next_chunk()
            except google_errors.HttpError as e:
                # Error code 416 (request range not satisfiable)
                # implies we're trying to download a file of size 0
                if e.resp.status == 416:
                    break

                raise

            if status:
                log.debug("Download %d%%." % int(status.progress() * 100))

        log.debug("Download Complete for %s", src_uri)
        return io_obj
示例#10
0
    def download(self, bucket, object_name):
        """Download an object from a bucket.

        Args:
            bucket (str): The name of the bucket to read from.
            object_name (str): The name of the object to read.

        Returns:
            str: The contents of the object.
        """
        verb_arguments = {
            'bucket': bucket,
            'object': object_name}

        media_request = self._build_request('get_media', verb_arguments)
        media_request.http = self.http

        file_content = ''
        out_stream = StringIO.StringIO()
        try:
            downloader = http.MediaIoBaseDownload(out_stream, media_request)
            done = False
            while not done:
                _, done = downloader.next_chunk(num_retries=self._num_retries)
            file_content = out_stream.getvalue()
        finally:
            out_stream.close()
        return file_content
    def download_file(self, file_id):
        """Download a Drive file's content to the local filesystem.
      Args:
        service: Drive API Service instance.
        file_id: ID of the Drive file that will downloaded.
        local_fd: io.Base or file object, the stream that the Drive file's
            contents will be written to.
      """

        file = self.service.files().get(fileId=file_id).execute()
        file_name = file['name']
        print('Name:', file_name)
        # print ('MIME type:', file['mimeType'])
        local_fd = open(file_name, "wb")
        request = self.service.files().get_media(fileId=file_id)
        media_request = http.MediaIoBaseDownload(local_fd, request)

        while True:
            try:
                download_progress, done = media_request.next_chunk()
            except errors.HttpError as error:
                print('An error occurred:', error)
                return
            if download_progress:
                print('Download Progress:',
                      int(download_progress.progress() * 100))
            if done:
                print('Download Complete')
                local_fd.close()
                return
示例#12
0
    def get_text_file(self, full_bucket_path):
        """Gets a text file object as a string.

        Args:
            full_bucket_path (str): The full path of the bucket object.

        Returns:
            str: The object's content as a string.

        Raises:
            HttpError: HttpError is raised if the call to the
                GCP storage API fails
        """
        file_content = ''
        storage_service = self.service
        bucket, object_path = get_bucket_and_path_from(full_bucket_path)
        media_request = (storage_service.objects().get_media(
            bucket=bucket, object=object_path))
        out_stream = StringIO.StringIO()
        try:
            downloader = http.MediaIoBaseDownload(out_stream, media_request)
            done = False
            while not done:
                _, done = downloader.next_chunk()
            file_content = out_stream.getvalue()
            out_stream.close()
        except errors.HttpError as http_error:
            LOGGER.error('Unable to download file: %s', http_error)
            raise http_error
        return file_content
示例#13
0
 def export(self, spreadsheet_id, fformat):
     request = self.driveService.files().export(fileId=spreadsheet_id, mimeType=fformat.value.split(':')[0])
     import io
     fh = io.FileIO(spreadsheet_id+fformat.value.split(':')[1], 'wb')
     downloader = ghttp.MediaIoBaseDownload(fh, request)
     done = False
     while done is False:
         status, done = downloader.next_chunk()
         print("Download %d%%." % int(status.progress() * 100))
示例#14
0
文件: gcs.py 项目: ipattarapong/dbnd
    def _open_read(
        self,
        remote_path,
        local_path=None,
        delete_file_on_close=True,
        chunksize=None,
        chunk_callback=lambda _: False,
    ):
        """Downloads the object contents to local file system.

        Optionally stops after the first chunk for which chunk_callback returns True.
        """
        chunksize = chunksize or self.chunksize
        bucket, obj = self._path_to_bucket_and_key(remote_path)

        tmp_file_path = local_path or get_local_tempfile(os.path.basename(remote_path))
        with open(tmp_file_path, "wb") as fp:
            # We can't return the tempfile reference because of a bug in python: http://bugs.python.org/issue18879
            if delete_file_on_close:
                return_fp = _DeleteOnCloseFile(tmp_file_path, "r")
            else:
                return_fp = fp

            # Special case empty files because chunk-based downloading doesn't work.
            result = self.client.objects().get(bucket=bucket, object=obj).execute()
            if int(result["size"]) == 0:
                return return_fp

            request = self.client.objects().get_media(bucket=bucket, object=obj)
            downloader = http.MediaIoBaseDownload(fp, request, chunksize=chunksize)

            attempts = 0
            done = False
            while not done:
                error = None
                try:
                    _, done = downloader.next_chunk()
                    if chunk_callback(fp):
                        done = True
                except errors.HttpError as err:
                    error = err
                    if err.resp.status < 500:
                        raise
                    logger.warning("Error downloading file, retrying", exc_info=True)
                except RETRYABLE_ERRORS as err:
                    logger.warning("Error downloading file, retrying", exc_info=True)
                    error = err

                if error:
                    attempts += 1
                    if attempts >= NUM_RETRIES:
                        raise error
                else:
                    attempts = 0

        return return_fp
示例#15
0
 async def download(self, buf):
     util = getUtility(IGCloudBlobStore)
     if not hasattr(self, '_uri'):
         url = self._upload_file_id
     else:
         url = self._uri
     req = util._service.objects().get_media(
         bucket=util.bucket, object=url)
     downloader = http.MediaIoBaseDownload(buf, req, chunksize=CHUNK_SIZE)
     return downloader
示例#16
0
    def _do_download(self, file_id, file_obj):
        request = self._service.files().get_media(fileId=file_id,
                                                  supportsTeamDrives=True)
        downloader = gah.MediaIoBaseDownload(file_obj,
                                             request,
                                             chunksize=self.chunk_size)

        done = False
        while not done:
            status, done = downloader.next_chunk()
            logger.info("Progress = %d%%" % int(status.progress() * 100))
示例#17
0
 def get_object(self, bucket, filename, out_file):
     from googleapiclient import http
     service = self.create_service()
     req = service.objects().get_media(bucket=bucket, object=filename)
     downloader = http.MediaIoBaseDownload(out_file, req)
     done = False
     while done is False:
         status, done = downloader.next_chunk()
         logging.info("file {} Download {}%.".format(
             filename, int(status.progress() * 100)))
     return out_file
示例#18
0
def downloadById(fileID, outputName):
    chunkable_bytes_python_object = io.BytesIO() #handles all chunking and downloads
    unexecuted_request = DRIVE.files().get_media(fileId=fileID) 
    downloader = http.MediaIoBaseDownload(chunkable_bytes_python_object, unexecuted_request)
    print("Downloading: "+outputName)
    while True:
        state, completion_flag = downloader.next_chunk()
        print(f"{outputName}:{state.progress() * 100}% Completed")
        if completion_flag: break

    with open(outputName, "wb") as f:f.write(chunkable_bytes_python_object.getbuffer())
示例#19
0
def download_file(file_id: str, destination: Path, service):
    # From the google drives docs, lightly edited.
    # Only for images--docs and sheets require a different treatment.
    print(f"Downloading {file_id} to {destination}")
    destination.parent.mkdir(exist_ok=True, parents=True)
    request = service.files().get_media(fileId=file_id)
    fh = destination.open(mode="wb")
    downloader = http.MediaIoBaseDownload(fh, request)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
 def get_object(self, filename, out_file):
     service = self.__create_service()
     # Use get_media instead of get to get the actual contents of the object.
     # http://g.co/dev/resources/api-libraries/documentation/storage/v1/python/latest/storage_v1.objects.html#get_media
     req = service.objects().get_media(bucket=self.BUCKET_NAME, object=filename)
     downloader = http.MediaIoBaseDownload(out_file, req)
     done = False
     while done is False:
         status, done = downloader.next_chunk()
         print("Download {}%.".format(int(status.progress() * 100)))
     return out_file
示例#21
0
 def export(self, spreadsheet_id, fformat, filename=None):
     fformat = getattr(fformat, 'value', fformat)
     request = self.driveService.files().export(fileId=spreadsheet_id, mimeType=fformat.split(':')[0])
     import io
     ifilename = spreadsheet_id+fformat.split(':')[1] if filename is None else filename
     fh = io.FileIO(ifilename, 'wb')
     downloader = ghttp.MediaIoBaseDownload(fh, request)
     done = False
     while done is False:
         status, done = downloader.next_chunk()
         print("Download %d%%." % int(status.progress() * 100))
示例#22
0
def download_large_object(bucket, gckey, localfile):
    service = create_service()
    req = service.objects().get_media(bucket=bucket, object=gckey)
    with open(localfile, 'wb') as f:
        downloader = http.MediaIoBaseDownload(f,
                                              request,
                                              chunksize=1024 * 1024)
        done = False
        while done is False:
            status, done = downloader.next_chunk()
            if status:
                print "Download %d%%." % int(status.progress() * 100)
    print "Download Complete!"
示例#23
0
def _download_file(service: object, file_id: str, file_name: str,
                   download_path: str):
    """Download files to the specified path."""
    request = service.files().get_media(fileId=file_id)
    fh = io.BytesIO()
    downloader = http.MediaIoBaseDownload(fh, request)
    done = False

    while not done:
        status, done = downloader.next_chunk()

    with open(f'{download_path}/{file_name}', 'wb') as f:
        f.write(fh.getvalue())
    def open(self, path, delay=False):
        request = self.service.files().get_media(
            fileId=self.items[path[1:]][0])
        fh = io.BytesIO()
        try:
            downloader = http.MediaIoBaseDownload(fh, request)
            done = False
            while done is False:
                status, done = downloader.next_chunk()
        except Exception:
            pass

        return fh
示例#25
0
def process_file(service, fieldmap, report_config, file_id, report_time):
    report_id = report_config['report_id']
    stream_name = report_config['stream_name']
    stream_alias = report_config['stream_alias']

    request = service.files().get_media(reportId=report_id, fileId=file_id)

    line_state = {
        'headers_line': False,
        'past_headers': False,
        'count': 0
    }

    report_id_int = int(report_id)

    def line_transform(line):
        if not line_state['past_headers'] and not line_state['headers_line'] and line == 'Report Fields':
            line_state['headers_line'] = True
            return
        if line_state['headers_line']:
            line_state['headers_line'] = False
            line_state['past_headers'] = True
            return

        if line_state['past_headers']:
            row = parse_line(line)
            # skip report grant total line
            if row[0] == 'Grand Total:':
                return

            obj = {}
            for i in range(len(fieldmap)):
                field = fieldmap[i]
                obj[field['name']] = transform_field(field['type'], row[i])

            obj[SINGER_REPORT_FIELD] = report_time
            obj[REPORT_ID_FIELD] = report_id_int

            singer.write_record(stream_name, obj, stream_alias=stream_alias)
            line_state['count'] += 1
    
    stream = StreamFunc(line_transform)
    downloader = http.MediaIoBaseDownload(stream,
                                          request,
                                          chunksize=CHUNK_SIZE)
    download_finished = False
    while download_finished is False:
        _, download_finished = downloader.next_chunk()

    with singer.metrics.record_counter(stream_name) as counter:
        counter.increment(line_state['count'])
示例#26
0
文件: gcs.py 项目: maxhollmann/luigi
    def download(self, path, chunksize=None, chunk_callback=lambda _: False):
        """Downloads the object contents to local file system.

        Optionally stops after the first chunk for which chunk_callback returns True.
        """
        chunksize = chunksize or self.chunksize
        bucket, obj = self._path_to_bucket_and_key(path)

        with tempfile.NamedTemporaryFile(delete=False) as fp:
            # We can't return the tempfile reference because of a bug in python: http://bugs.python.org/issue18879
            return_fp = _DeleteOnCloseFile(fp.name, 'r')

            # Special case empty files because chunk-based downloading doesn't work.
            result = self.client.objects().get(bucket=bucket,
                                               object=obj).execute()
            if int(result['size']) == 0:
                return return_fp

            request = self.client.objects().get_media(bucket=bucket,
                                                      object=obj)
            downloader = http.MediaIoBaseDownload(fp,
                                                  request,
                                                  chunksize=chunksize)

            attempts = 0
            done = False
            while not done:
                error = None
                try:
                    _, done = downloader.next_chunk()
                    if chunk_callback(fp):
                        done = True
                except errors.HttpError as err:
                    error = err
                    if err.resp.status < 500:
                        raise
                    logger.warning('Error downloading file, retrying',
                                   exc_info=True)
                except RETRYABLE_ERRORS as err:
                    logger.warning('Error downloading file, retrying',
                                   exc_info=True)
                    error = err

                if error:
                    attempts += 1
                    if attempts >= NUM_RETRIES:
                        raise error
                else:
                    attempts = 0

        return return_fp
示例#27
0
	def get_object(self):
		    print "FUNCTION: get_object"
		    Logger.log_writer("FUNCTION: get_object")
		    req = self.gcs_client.objects().get_media(bucket=self.bucket, object=self.filename)
		    out_file = e.BytesIO()
		    downloader = e.MediaIoBaseDownload(out_file, req)

		    done = False
		    while done is False:
			status, done = downloader.next_chunk()
			print("Download {}%.".format(int(status.progress() * 100)))
		    coordinates = out_file.getvalue().split(",")[:2]
		    out_file.close()
		    return coordinates
示例#28
0
    def read_data_chunk(self, report_data: dict, chunk: int = 16384) -> bytes:
        report_id = report_data['id']
        file_id = report_data['report_file']['id']
        request = self.service().files().get_media(reportId=report_id,
                                                   fileId=file_id)

        # Create a media downloader instance.
        out_file = io.BytesIO()
        downloader = http.MediaIoBaseDownload(out_file,
                                              request,
                                              chunksize=chunk)
        downloader.next_chunk()

        return out_file.getvalue()
示例#29
0
    def load_all(self):
        latest_file = self._get_latest_file()
        service = self.get_service()

        req = service.objects().get_media(
            bucket=SupportedExchange.GS_DBM_BUCKET, object=latest_file['name'])

        out_file = StringIO()
        downloader = http.MediaIoBaseDownload(out_file, req)

        done = False
        while done is False:
            status, done = downloader.next_chunk()

        return json.loads(out_file.getvalue())
示例#30
0
def get_object(service, bucket, filename, out_file):
    try:
        req = service.objects().get_media(bucket=bucket, object=filename)

        downloader = http.MediaIoBaseDownload(out_file, req)

        done = False
        while done is False:
            status, done = downloader.next_chunk()
            print "Status: " + str(status) + ", Download {}%.".format(
                int(status.progress() * 100))

        return out_file
    except Exception as exc:
        print str(exc)