示例#1
0
 def test_create_and_move_blob(self):
     gs_file = StorageFile("gs://aries_test/new_file.txt")
     self.assertFalse(gs_file.blob.exists())
     gs_file.create()
     self.assertTrue(gs_file.blob.exists())
     dest = "gs://aries_test/moved_file.txt"
     gs_file.move(dest)
     self.assertFalse(gs_file.exists())
     dest_file = StorageFile(dest)
     self.assertTrue(dest_file.exists())
     dest_file.delete()
示例#2
0
    def get_file_size(self, path, job_name=None, **kwargs):

        # Ignore local paths
        if self.__get_file_protocol(path) == "Local":
            logging.warning(f"Ignoring path '{path}' as it is local on the disk image. Assuming the path is present!")
            return True

        try:
            # Check if path is prefix, and create StoragePrefix object and get its size
            if path.endswith("*"):
                _size = StoragePrefix(path.rstrip("*")).size

            # Check if it path exists as a file or folder, by creating StorageFile and StorageFolder object
            else:
                _file = StorageFile(path)
                _folder = StorageFolder(path)

                if _file.exists():
                    _size = _file.size
                elif _folder.exists():
                    _size = _folder.size
                else:
                    _size = 0

            # Convert to GB
            return float(_size)/2**30

        except BaseException as e:
            logging.error(f"Unable to get file size: {path}")
            if str(e) != "":
                logging.error(f"Received the following msg:\n{e}")
            raise
示例#3
0
文件: utils.py 项目: qiuosier/Cancer
def transfer_file(to_folder_uri, file_id=None, file_info_href=None):
    # Determine the file_id, file_info and file_content_href
    if file_id is not None:
        file_info_href = "v1pre3/files/%s" % file_id
        file_content_href = "v1pre3/files/%s/content" % file_id
    elif file_info_href is not None:
        file_id = file_info_href.strip("/").split("/")[-1]
        file_content_href = "%s/content" % file_info_href
    else:
        raise ValueError("Either BaseSpace file_id or file_info_href is needed for file transfer.")

    file_info = api_response(file_info_href)
    logger.debug("Transferring file from BaseSpace: %s" % file_content_href)

    # For FASTQ files, add basespace file ID to filename
    # Each MiSeq run may have multiple FASTQ files with the same name.
    filename = file_info.get("Name")
    if filename.endswith(".fastq.gz"):
        filename = filename.replace(".fastq.gz", "_%s.fastq.gz" % file_id)

    # Skip if a file exists and have the same size.
    to_uri = os.path.join(to_folder_uri, filename)
    dest_file = StorageFile(to_uri)
    file_size = file_info.get("Size")
    if file_size and dest_file.exists() and dest_file.size and dest_file.size == file_info.get("Size"):
        logger.debug("File %s exists at destination: %s" % (filename, to_uri))
        return to_uri
    from_uri = build_api_url(file_content_href)
    StorageFile(from_uri).copy(to_uri)
    return to_uri
示例#4
0
文件: source.py 项目: qiuosier/Virgo
    def get_intraday_series(self, symbol, date=None):
        """Gets a pandas data frame of intraday series data.

        Args:
            symbol (str): The name of the equity/stock.
            date (str, optional): Date, e.g. 2017-02-12. Defaults to None.

        Returns: A pandas data frame of intraday series data for the specific date.
            If date is None, the data of the last trading day will be returned.
            This function will return None,
            if date is None and there is no data available in the last 100 days.

        """
        series_type = self.intraday_series_type
        # requested_date stores the original requested date
        requested_date = date
        day_delta = 0
        df = None
        # When date is specified, empty data frame will be return if there is no data for the specific day.
        # When date is not specified, try to get data of the previous day if there is no data today
        while df is None or (requested_date is None and df.empty
                             and day_delta < 100):
            if requested_date is None:
                date = (datetime.datetime.now() -
                        datetime.timedelta(days=day_delta)).strftime(
                            self.date_fmt)
            logger.debug("Getting data for %s" % date)
            # Get the next date as string for filtering purpose
            # next_date is a string of date, which will be used to compare with data frame index.
            dt_date = datetime.datetime.strptime(date, self.date_fmt)
            dt_next = dt_date.date() + datetime.timedelta(days=1)
            next_date = dt_next.strftime(self.date_fmt)

            if self.cache:
                # Check if data has been cached.
                file_path = self.__cache_file_path(symbol, series_type, date)
                storage_file = StorageFile(file_path)
                if storage_file.exists():
                    logger.debug("Reading existing data... %s" % file_path)
                    with storage_file('r') as f:
                        df = pd.read_csv(f,
                                         index_col=0,
                                         parse_dates=['timestamp'])
                else:
                    df = self.__intraday_get_full_data(symbol)
                    df = df[(df['timestamp'] >= date)
                            & (df['timestamp'] < next_date)]
            else:
                # Request new data
                df = self.__request_data(symbol, series_type, 'full')
                df = df[(df['timestamp'] >= date)
                        & (df['timestamp'] < next_date)]

            day_delta += 1

        if df is not None:
            df.set_index('timestamp', inplace=True)
        df.symbol = symbol
        return df
示例#5
0
 def test_binary_read_write(self):
     # File does not exist, a new one will be created
     file_uri = os.path.join(self.TEST_ROOT, "test.txt")
     storage_file = StorageFile(file_uri).open("wb")
     self.assertEqual(storage_file.scheme, self.SCHEME)
     self.assertTrue(storage_file.seekable())
     self.assertFalse(storage_file.readable())
     self.assertEqual(storage_file.write(b"abc"), 3)
     self.assertEqual(storage_file.tell(), 3)
     self.assertEqual(storage_file.write(b"def"), 3)
     self.assertEqual(storage_file.tell(), 6)
     storage_file.close()
     self.assertTrue(storage_file.exists())
     storage_file.open('rb')
     self.assertEqual(storage_file.read(), b"abcdef")
     storage_file.close()
     storage_file.delete()
     self.assertFalse(storage_file.exists())
示例#6
0
    def get_file_size(self, path, job_name=None, **kwargs):

        retry_count = kwargs.get("retry_count", 0)

        # Ignore local paths
        if self.__get_file_protocol(path) == "Local":
            logging.warning(f"Ignoring path '{path}' as it is local on the disk image. Assuming the path is present!")
            return True

        if retry_count < 5:
            try:
                # Check if path is prefix, and create StoragePrefix object and get its size
                if path.endswith("*"):
                    _size = StoragePrefix(path.rstrip("*")).size

                # Check if it path exists as a file or folder, by creating StorageFile and StorageFolder object
                else:
                    _file = StorageFile(path)
                    _folder = StorageFolder(path)
                    _size = 0

                    found = False
                    trial_count = 0
                    while not found:

                        if trial_count > 10:
                            logging.error(f"Cannot get size of '{path}' as it doesn't exist after multiple trials!")
                            break

                        time.sleep(trial_count)

                        if _file.exists():
                            _size = _file.size
                            found = True
                        elif _folder.exists():
                            _size = _folder.size
                            found = True
                        else:
                            trial_count += 1
                            logging.warning(f"Cannot get size of '{path}' as it does not exist! Trial {trial_count}/10")

                # Convert to GB
                return float(_size)/2**30

            except BaseException as e:
                logging.error(f"Unable to get file size: {path}")
                if str(e) != "":
                    logging.error(f"Received the following msg:\n{e}")
                if "dictionary changed size" in str(e):
                    kwargs['retry_count'] = retry_count + 1
                    return self.get_file_size(path, job_name, **kwargs)
                raise
        else:
            logging.warning(f"Failed to get size of '{path}'! Attempted to retrieve size {retry_count + 1} times.")
            return 0
示例#7
0
    def test_http(self):
        """
        """
        # URL does not exist
        storage_obj = StorageFile("http://example.com/abc/")
        self.assertFalse(storage_obj.exists())

        # URL exists
        storage_obj = StorageFile("https://www.google.com")
        self.assertTrue(storage_obj.exists())

        # Download. Copy to local file.
        storage_obj = StorageFile("https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf")
        local_file_path = os.path.join(self.test_folder_path, "test.pdf")
        if os.path.exists(local_file_path):
            os.remove(local_file_path)
        storage_obj.copy(local_file_path)
        self.assertTrue(os.path.exists(local_file_path))
        self.assertGreater(StorageFile(local_file_path).size, 0)
        StorageFile(local_file_path).delete()
示例#8
0
    def test_create_copy_and_delete_file(self):
        new_folder_uri = os.path.join(self.TEST_ROOT, "new_folder")
        with TempFolder(new_folder_uri) as folder:
            self.assertTrue(folder.is_empty())

            # Create a sub folder inside the new folder
            sub_folder_uri = os.path.join(new_folder_uri, "sub_folder")
            logger.debug(sub_folder_uri)
            sub_folder = StorageFolder(sub_folder_uri).create()
            self.assertTrue(sub_folder.exists())

            # Copy an empty file
            src_file_path = os.path.join(self.TEST_ROOT, "test_folder_0",
                                         "empty_file")
            dst_file_path = os.path.join(new_folder_uri, "copied_file")
            f = StorageFile(src_file_path)
            logger.debug(f.exists())
            time.sleep(2)
            f.copy(dst_file_path)
            self.assertTrue(StorageFile(dst_file_path).exists())

            # Copy a file with content and replace the empty file
            src_file_path = os.path.join(self.TEST_ROOT, "test_folder_0",
                                         "abc.txt")
            dst_file_path = os.path.join(new_folder_uri, "copied_file")
            f = StorageFile(src_file_path)
            f.copy(dst_file_path)
            dst_file = StorageFile(dst_file_path)
            self.assertTrue(dst_file.exists())
            # Use the shortcut to read file, the content will be binary.
            self.assertEqual(dst_file.read(), b"abc\ncba\n")

            # Empty the folder. This should delete file and sub folder only
            folder.empty()
            self.assertTrue(folder.exists())
            self.assertTrue(folder.is_empty())
            self.assertFalse(sub_folder.exists())
            self.assertFalse(dst_file.exists())
示例#9
0
文件: source.py 项目: qiuosier/Virgo
    def __get_valid_daily_cache(self, symbol):
        """Gets the latest un-expired cache file for daily data.

        Args:
            symbol (str): The symbol of the equity/stock.

        Returns:
            str: File path if an un-expired cache file exists. Otherwise None.
        """
        for i in range(self.daily_cache_expiration):
            d = datetime.datetime.now() - datetime.timedelta(days=i)
            file_path = self.__cache_file_path(symbol, self.daily_series_type,
                                               d.strftime(self.date_fmt))
            storage_file = StorageFile(file_path)
            if storage_file.exists():
                return storage_file
        return None