Python FileProcessor示例，file_processor.FileProcessor Python示例

示例#1

0

显示文件

文件： test_file_processor.py 项目： Art95/jooble_test

    def test_generate_statistics_some_codes(self):
        needed_feature_codes = random.sample(self.feature_codes, k=2)

        file_processor = FileProcessor(self.input_file_path)
        actual_stats = file_processor.generate_statistics(needed_feature_codes)

        _, feature_codes, features = read_test_data_as_arrays(
            self.input_file_path)

        selected_indexes = []

        for code in needed_feature_codes:
            indexes = np.where(feature_codes == code)[0].tolist()
            selected_indexes += indexes

        selected_features = features[selected_indexes]

        expected_stats = {
            "count": np.uint32(selected_features.shape[0]),
            "mean": selected_features.mean(axis=0),
            "std": selected_features.std(axis=0, ddof=1),
            "max": np.amax(selected_features, axis=0),
            "min": np.amin(selected_features, axis=0)
        }

        for metric in expected_stats:
            np.testing.assert_allclose(actual_stats[metric],
                                       expected_stats[metric])

示例#2

0

显示文件

def main():

    fileConfig('logging_config.ini')

    db_util = DatabaseUtility()

    primary_ui = PrimaryUI()

    primary_ui.set_doc_names(db_util.get_saved_doc_names())

    file_processor = FileProcessor()

    while True:

        event, file_names = primary_ui.Read()

        if event is not None:

            # Process the data

            if file_names[0] != '':

                if event == primary_ui.IMPORT:
                    db_util.save_docs(
                        file_processor.convert_file_names_to_name_data_dict(
                            file_names[0]))

                # elif event == primary_ui.EXPORT:

        else:
            break

示例#3

0

显示文件

文件： json_file_processor.py 项目： cbmdk/GarminDB

    def __init__(self,
                 input_file,
                 input_dir,
                 file_regex,
                 latest,
                 debug,
                 recursive=False):
        """
        Return an instance of JsonFileProcessor.

        Parameters:
            input_file (string): file (full path) to check for data
            input_dir (string): directory (full path) to check for data files
            latest (Boolean): check for latest files only
            debug (Boolean): enable debug logging
            recursive (Boolean): check the search directory recursively

        """
        self.debug = debug
        root_logger.info("Debug: %s", debug)
        if input_file:
            self.file_names = FileProcessor.match_file(input_file, file_regex)
            root_logger.info("Found %d json files for %s in %s",
                             self.file_count(), file_regex, input_file)
        if input_dir:
            self.file_names = FileProcessor.dir_to_files(
                input_dir, file_regex, latest, recursive)
            root_logger.info("Found %d json files for %s in %s",
                             self.file_count(), file_regex, input_dir)

示例#4

0

显示文件

 def test_run(self):
     mock_repo = mock.Mock(spec=FileRepoABC)
     mock_repo.get_files.return_value = [BytesIO(b"foo"), BytesIO(b"bar")]
     fp = FileProcessor(mock_repo, "some/path")
     results = fp.run()
     self.assertEqual(results, ["acbd18db4cc2f85cedef654fccc4a4d8",
                                "37b51d194a7513e45b56f6524f2d51f2"])

示例#5

0

显示文件

文件： test_file_processor.py 项目： Art95/jooble_test

    def test_transform_features_all_codes(self):
        file_processor = FileProcessor(self.input_file_path)
        file_processor.transform_features(self.output_file_path)

        job_ids, feature_codes, features = read_test_data_as_arrays(
            self.input_file_path)

        means = features.mean(axis=0)

        expected_z_scores = stats.zscore(
            features, axis=0, ddof=1)  # using sample standard deviation
        expected_argmaxs = np.argmax(features, axis=1)
        expected_maxs = np.amax(features, axis=1)
        expected_diffs = np.abs(expected_maxs - means[expected_argmaxs])

        line_index = 0

        with open(self.output_file_path) as f:
            next(f)

            for line in f:
                job_id, z_scores, argmax, diff = _parse_transformed_line(line)

                np.testing.assert_equal(job_id, job_ids[line_index])
                np.testing.assert_allclose(z_scores,
                                           expected_z_scores[line_index])
                np.testing.assert_equal(argmax, expected_argmaxs[line_index])
                np.testing.assert_almost_equal(diff,
                                               expected_diffs[line_index])

                line_index += 1

示例#6

0

显示文件

文件： gcs_bucket_hash.py 项目： stationedabroad/py-tiingo-util

def main():
    """ 
        Use custom file processor to pull bcket hashes
    """
    gcs_repo = GCSFileRepo(APP_CREDENTIALS_JSON, COVID_BUCKET)
    fp = FileProcessor(gcs_repo, None)
    gcs_bucket_hashes = fp.run()
    print(gcs_bucket_hashes)

示例#7

0

显示文件

文件： import_fitbit_csv.py 项目： cbmdk/GarminDB

 def __init__(self, input_file, input_dir, db_params_dict, metric, debug):
     """Return a new instance of FitBitData given the location of the data files, paramters for accessing the database, and if the data should be stored in metric units."""
     self.metric = metric
     self.fitbitdb = FitBitDB.FitBitDB(db_params_dict, debug)
     if input_file:
         self.file_names = FileProcessor.match_file(input_file, r'.*\.csv')
     if input_dir:
         self.file_names = FileProcessor.dir_to_files(input_dir, r'.*\.csv')

示例#8

0

显示文件

文件： second_pass_crawler.py 项目： jay-hood/electionmoney

class SecondPassCrawler:
    session = attr.ib()
    navigator = attr.ib(init=False)
    file_processor = attr.ib(init=False)

    def __attrs_post_init__(self):
        self.navigator = SeleniumNavigator(loading_strategy='none')
        self.file_processor = FileProcessor()

    def exit(self):
        self.navigator.close_browser()
        self.session.close()

    def get_urls(self):
        # results = self.session.query(Report).all()
        # return (report.url for report in results)
        return ['http://media.ethics.ga.gov/search/Campaign/Campaign_ReportOptions.aspx?NameID=16067&FilerID=C2012000744&CDRID=59991']

    def add_scrapelog_to_db(self, url, content, dtime):
        slog = ScrapeLog(scrape_date=dtime,
                         raw_data=content,
                         page_url=url)
        try:
            self.session.add(slog)
            self.session.commit()
        except Exception as e:
            self.session.rollback()
            logging.info(e)

    def crawl_download_link(self):
        parser = CSVLinkParser(self.navigator.page_source())
        parsed_link = parser.parse()
        if parsed_link is not None:
            logging.info(f'Parsed link: {parsed_link}')
            url = self.navigator.get_current_url()
            self.navigator.click_link(parsed_link)
            logging.info('Clicking download link for csv file.')
            content, dtime = self.file_processor.process()
            self.add_scrapelog_to_db(url, content, dtime)
            self.file_processor.delete_csv()

    def crawl_view_contributions_ids(self):
        logging.info(f'Current page: {self.navigator.get_current_url()}')
        parser = ContributionsViewParser(self.navigator.page_source())
        parsed_link = parser.parse()
        if parsed_link is not None:
            logging.info(f'Parsed link: {parsed_link}')
            self.navigator.click_link(parsed_link)
            self.navigator.wait_for_csv_link()
            self.crawl_download_link()

    def crawl(self):
        urls = self.get_urls()
        for url in urls:
            logging.info(f'Current url: {url}')
            self.navigator.navigate(url)
            self.navigator.wait_for_contributions_id() 
            self.crawl_view_contributions_ids()

示例#9

0

显示文件

 def __init__(self, input_file, input_dir, db_params_dict, metric, debug):
     self.metric = metric
     self.mshealth_db = MSHealthDB.MSHealthDB(db_params_dict, debug)
     if input_file:
         self.file_names = FileProcessor.match_file(
             input_file, r'Daily_Summary_.*\.csv')
     if input_dir:
         self.file_names = FileProcessor.dir_to_files(
             input_dir, r'Daily_Summary_.*\.csv')

示例#10

0

显示文件

文件： yahoo_web_scrapper.py 项目： sumit-haswar/stocks_infer

def main():
    """

    :return:
    """

    file_processor = FileProcessor("")

    db_stock_data = StockData(config['postgresql']['host'],
                              config['postgresql']['database'])

    # get stock symbols from data file
    # pharma_stocks = file_processor.read_file("lookup_data/pharma_stocks")

    fortune_500_0_stocks = file_processor.read_file("lookup_data/fortune_500_0")

    s_p_500_data = pd.read_csv("lookup_data/s_p_500.csv", delimiter=',')
    s_p_500_data_stocks = s_p_500_data['stock'].tolist()

    stock_data_list = []
    dt_utc_now = datetime.utcnow()

    stocks = s_p_500_data_stocks + fortune_500_0_stocks

    invalid_data = []

    print('getting data from yahoo . . .')
    for stock in stocks:
        stock_data = get_stock_info(stock, dt_utc_now)
        if stock_data:
            stock_data_list.append(stock_data)
        else:
            invalid_data.append(stock)

    file_processor.save_file('data/fortune_500_0.json',str(stock_data_list))

    print('adding to database . . .')
    for stock_data in stock_data_list:
        try:
            db_stock_data.insert_stock_data(stock_data)
        except Exception as ex:
            # todo log exception
            invalid_data.append(stock_data.name)
            continue

    print('invalid_data:')
    print(invalid_data)
    # ['AET', 'APC', 'ANDV', 'BHGE', 'BBT', 'BF.B', 'CA', 'CSRA', 'DWDP', 'DPS', 'EVHC',
    # 'ESRX', 'GGP', 'HCP', 'LLL', 'LUK', 'KORS', 'MON', 'NFX', 'PX', 'RHT', 'COL',
    # 'SCG', 'SYMC', 'TWX', 'TMK', 'TSS', 'WYN', 'XL']

    # ['AET', 'APC', 'ANDV', 'BHGE', 'BBT', 'BF.B', 'CA', 'CSRA', 'DWDP', 'DPS', 'EVHC',
    # 'ESRX', 'GGP', 'HRS', 'HCP', 'LLL', 'KORS', 'MON', 'NFX', 'PX', 'RHT', 'COL', 'SCG',
    # 'TWX', 'TMK', 'TSS', 'WYN', 'XL', 'BRK.B', 'GD', 'JEC', 'LUK', 'MCK', 'CRM',
    # 'STI', 'SYMC', 'VIAB', 'MCK', 'CRM']

    print('total records added: {0}'.format(len(stock_data_list)))

示例#11

0

显示文件

 def __init__(self, input_file, input_dir, db_params_dict, metric, debug):
     """Return an instance of MSHealthData given an input file or files and information on the databse to put it in."""
     self.metric = metric
     self.mshealth_db = MSHealthDB.MSHealthDB(db_params_dict, debug)
     if input_file:
         self.file_names = FileProcessor.match_file(
             input_file, r'Daily_Summary_.*\.csv')
     if input_dir:
         self.file_names = FileProcessor.dir_to_files(
             input_dir, r'Daily_Summary_.*\.csv')

示例#12

0

显示文件

文件： test_file_processor.py 项目： knakayama/aws-sam-ci-cd

def test_zip_bytes_io(s3):
    file_processor = FileProcessor({}, {}, s3)

    actual = file_processor._zip_bytes_io(pytest.bucket_origin, pytest.key)

    assert type(actual) is BytesIO
    with ZipFile(actual) as zip_obj:
        assert zip_obj.namelist() == [pytest.file_zipped]
        with zip_obj.open(pytest.file_zipped) as file_obj:
            assert file_obj.read() == pytest.body_zipped.encode()

示例#13

0

显示文件

文件： test_file_processor.py 项目： knakayama/aws-sam-ci-cd

def test_upload_file_obj(s3, s3_prefix):
    bytes_io = BytesIO()
    key = f'{s3_prefix}/{pytest.file_zipped}'
    os.environ['BUCKET_PROCESSED'] = pytest.bucket_processed

    file_processor = FileProcessor({}, {}, s3)
    zip_bytes_io = file_processor._zip_bytes_io(pytest.bucket_origin,
                                                pytest.key)
    file_processor._upload_file_obj(zip_bytes_io)
    s3.download_fileobj(pytest.bucket_processed, key, bytes_io)

    assert type(bytes_io) is BytesIO
    assert bytes_io.getvalue() == pytest.body_zipped.encode()

示例#14

0

显示文件

文件： test_file_processor.py 项目： knakayama/aws-sam-ci-cd

def test_valid_bucket_key_list(event):
    file_processor = FileProcessor(event, {}, {})
    actual = file_processor._bucket_key_list()

    assert type(actual) is list

    for i in range(len(event)):
        assert 'bucket' in actual[i]
        assert actual[i]['bucket'] == event['Records'][i]['s3']['bucket'][
            'name']
        assert 'key' in actual[i]
        assert actual[i]['key'].startswith('test')
        assert actual[i]['key'].endswith('.zip')

示例#15

0

显示文件

 def __init__(self, input_file, input_dir, db_params_dict, metric, debug):
     self.metric = metric
     self.mshealth_db = MSHealthDB.MSHealthDB(db_params_dict, debug)
     self.cols_map = {
         'Date': ('timestamp', CsvImporter.map_mdy_date),
         'Weight': ('weight', MSVaultData.__map_weight),
     }
     if input_file:
         self.file_names = FileProcessor.match_file(
             input_file, r'HealthVault_Weight_.*\.csv')
     if input_dir:
         self.file_names = FileProcessor.dir_to_files(
             input_dir, r'HealthVault_Weight_.*\.csv')

示例#16

0

显示文件

 def __init__(self, input_file, input_dir, db_params_dict, metric, debug):
     """Return an instance of MSVaultData given an input file or files and information on the databse to put it in."""
     self.metric = metric
     self.mshealth_db = MSHealthDB.MSHealthDB(db_params_dict, debug)
     self.cols_map = {
         'Date': ('timestamp', CsvImporter.map_mdy_date),
         'Weight': ('weight', MSVaultData.__map_weight),
     }
     if input_file:
         self.file_names = FileProcessor.match_file(
             input_file, r'HealthVault_Weight_.*\.csv')
     if input_dir:
         self.file_names = FileProcessor.dir_to_files(
             input_dir, r'HealthVault_Weight_.*\.csv')

示例#17

0

显示文件

    def test_run_returns_md5_of_contents(self):
        with mock.patch('file_processor.storage'):
            fp = FileProcessor('some/path')
            with mock.patch.object(fp, 'bucket') as mock_bucket:
                mock_blob = mock.Mock(name='mock_blob')

                def mock_download_to_file(bytez):
                    bytez.write(b'foobarbaz')

                mock_blob.download_to_file = mock_download_to_file
                mock_bucket.list_blobs.return_value = [mock_blob]
                # WHEN
                results = fp.run()
                # THEN
                self.assertEqual(results, ['6df23dc03f9b64cc38a0fc1483df6e21'])

示例#18

0

显示文件

文件： test_file_processor.py 项目： Art95/jooble_test

    def test_transform_features_some_codes(self):
        selected_codes = random.sample(self.feature_codes, k=2)

        file_processor = FileProcessor(self.input_file_path)
        file_processor.transform_features(self.output_file_path,
                                          selected_codes)

        job_ids, feature_codes, features = read_test_data_as_arrays(
            self.input_file_path)

        selected_indexes = []

        for code in selected_codes:
            indexes = np.where(feature_codes == code)[0].tolist()
            selected_indexes += indexes

        selected_indexes = sorted(selected_indexes)

        selected_job_ids = job_ids[selected_indexes]
        selected_features = features[selected_indexes]

        means = selected_features.mean(axis=0)

        expected_z_scores = stats.zscore(
            selected_features, axis=0,
            ddof=1)  # using sample standard deviation
        expected_argmaxs = np.argmax(selected_features, axis=1)
        expected_maxs = np.amax(selected_features, axis=1)
        expected_diffs = np.abs(expected_maxs - means[expected_argmaxs])

        line_index = 0

        with open(self.output_file_path) as f:
            next(f)

            for line in f:
                job_id, z_scores, argmax, diff = _parse_transformed_line(line)

                np.testing.assert_equal(job_id, selected_job_ids[line_index])
                np.testing.assert_allclose(z_scores,
                                           expected_z_scores[line_index])
                np.testing.assert_equal(argmax, expected_argmaxs[line_index])
                np.testing.assert_almost_equal(diff,
                                               expected_diffs[line_index])

                line_index += 1

            self.assertEqual(line_index, len(selected_indexes))

示例#19

0

显示文件

def backup(config, database):
    """
    Process directoreis to be backed up
    """
    logger = logging.getLogger('mylog')

    clean_removed(config, database)

    if not consistency_check(config, database):
        logger.warning('Consistency check detected problems!')
        if not query_yes_no('Continue?'):
            sys.exit()

    dirs = config.get('Backup', 'to_backup').split()
    logger.info('Directories to backup: ' + ','.join(dirs))

    exclude_dirs = config.get('Backup', 'to_exclude').split()
    logger.info('Directories to exclude: ' + ','.join(exclude_dirs))


    file_proc = FileProcessor(config, database, encrypt_file)

    #Count files to show progress later
    total = 0

    for directory in dirs:
        for subdir, _, files in os.walk(directory):
            if  subdir in exclude_dirs:
                continue

            for single_file in files:
                fpath = os.path.join(subdir, single_file)
                total = total + 1

    count = 0
    for directory in dirs:
        logger.debug('Processing directory' + directory)
        for subdir, dirs, files in os.walk(directory):
            if  subdir in exclude_dirs:
                logger.debug('Skipping directory' + subdir)
                continue

            for single_file in files:
                fpath = os.path.join(subdir, single_file)
                logger.debug('Processing file ' + fpath)
                logger.info(str((count * 100) / total) + ' %')
                file_proc.process(fpath)
                count = count + 1

示例#20

0

显示文件

文件： import_garmin.py 项目： cbmdk/GarminDB

 def __init__(self, input_dir, latest, measurement_system, debug):
     logger.info("Processing daily FIT data")
     self.measurement_system = measurement_system
     self.debug = debug
     if input_dir:
         self.file_names = FileProcessor.dir_to_files(
             input_dir, Fit.file.name_regex, latest, True)

示例#21

0

显示文件

文件： __init__.py 项目： prachil007/in-code-generator

def main():
    print('IN Code Generator started....\nAnalysing input file...')

    input_file_path, package_name = parse_args()

    input_file_contents = FileProcessor.get_file_contents(input_file_path)
    if input_file_contents:
        file_contents: [
            FileContent
        ] = CodeProcessor.process_file_contents(input_file_contents)
        FileProcessor.remove_output_dir()
        for file_content in file_contents:
            file_content.package_name = package_name
            if file_content:
                file_exports: FileProcessor = FileProcessor(file_content)
                file_exports.process_and_export_templates()

示例#22

0

显示文件

文件： copy_garmin.py 项目： cbmdk/GarminDB

 def copy_monitoring(self, monitoring_dir, latest):
     """Copy daily monitoring data FIT files from a USB mounted Garmin device to the given directory."""
     device_monitoring_dir = GarminDBConfigManager.device_monitoring_dir(self.device_mount_dir)
     logger.info("Copying monitoring files from %s to %s", device_monitoring_dir, monitoring_dir)
     file_names = FileProcessor.dir_to_files(device_monitoring_dir, Fit.file.name_regex, latest)
     for file in progressbar.progressbar(file_names):
         shutil.copy(file, monitoring_dir)

示例#23

0

显示文件

文件： input_processor.py 项目： ankushj470/PrefixMatching

    def process_input(self, f_name, query):
        """
        @summary: This method process the input file using FileProcessor class and returns the output
        @param f_name:file name to be processed
        @param query:query to be searched
        @return: Dictionary with query as the key and list of top found elems as the value
        """
        fp_obj = FileProcessor()
        data_leftover = ""

        with open(f_name) as fp:
            for chunk in fp_obj.read_in_chunks(fp, config.CHUNK_SIZE):
                curr_list, data_leftover = self.convert_to_list(
                    (data_leftover + chunk).strip('[]'))
                self.prefix_match(curr_list, query)

        return self.convert_to_dict(self.result_list, query)

示例#24

0

显示文件

文件： test_file_processor.py 项目： Art95/jooble_test

    def test_generate_statistics_all_codes(self):
        file_processor = FileProcessor(self.input_file_path)
        actual_stats = file_processor.generate_statistics()

        _, _, features = read_test_data_as_arrays(self.input_file_path)

        expected_stats = {
            "count": np.uint32(features.shape[0]),
            "mean": features.mean(axis=0),
            "std": features.std(axis=0, ddof=1),
            "max": np.amax(features, axis=0),
            "min": np.amin(features, axis=0)
        }

        for metric in expected_stats:
            np.testing.assert_allclose(actual_stats[metric],
                                       expected_stats[metric])

示例#25

0

显示文件

class TestFileProcessor(TestCase):
    def setUp(self):
        self.processor = FileProcessor()

    def getFakeFile(self, string):
        fake_file = FakeFile()
        fake_file.write(string)
        fake_file.seek(0)
        return fake_file

    def test_readFile_returns_list_of_words_from_file(self):
        test_str = "Test String"
        self.assertTrue(
            isinstance(self.processor.readFile(self.getFakeFile(test_str)),
                       list))

    def test_readFile_returns_list_of_words_from_file_splitted_by_space_by_default(
            self):
        test_str = "Test String"
        self.assertEqual(self.processor.readFile(self.getFakeFile(test_str)),
                         [["Test", "String"]])

    def test_readFile_returns_list_of_words_from_file_splitted_by_tab(self):
        test_str = "Test\tString"
        self.assertEqual(
            self.processor.readFile(self.getFakeFile(test_str), "\t"),
            [["Test", "String"]])

    def test_readFile_returns_comma_separated_values_as_list(self):
        test_str = "test,str"
        self.assertEqual(
            self.processor.readFile(self.getFakeFile(test_str), ","),
            [["test", "str"]])

    def test_readFile_returns_each_line_as_new_list(self):
        test_str = "test,this\nline,str"
        self.assertEqual(
            self.processor.readFile(self.getFakeFile(test_str), ","),
            [["test", "this"], ["line", "str"]])

    def test_retrieveData_returns_dict_of_variables_and_last_element_as_desired_class(
            self):
        test_list = ["test_var1", "test_var2", "test_var3", "90", "test_class"]
        (data, des_class) = self.processor.retrieveData(test_list)
        self.assertTrue(isinstance(data, dict))
        self.assertEqual(des_class, "test_class")

    def test_retrieveData_returns_string_and_number_variables_in_separate_list(
            self):
        test_list = [
            "test_var1", "123.5samp", "sadkf123", "90", "test_var2",
            "3434.134", "test_class"
        ]
        (data, des_class) = self.processor.retrieveData(test_list)
        self.assertEqual(
            data, {
                "string": ["test_var1", "123.5samp", "sadkf123", "test_var2"],
                "num": [90, 3434.134]
            })

示例#26

0

显示文件

def test_processor(feature_size, chunksize):
    """Tests FileProcessor.

    :param feature_size: int, size of feature vector.
    :param chunksize: int, FileProcessor chunk size.
    """
    sep = '\t'
    n_rows = 50
    feature = 3

    with TemporaryDirectory() as dir:
        input_path = os.path.join(dir, 'data.tsv')
        output_path = os.path.join(dir, 'data_proc.tsv')

        data, feature_values = generate_data(n_rows=n_rows,
                                             feature=feature,
                                             feature_size=feature_size,
                                             seed=42)

        data.to_csv(input_path, sep=sep, index=False)

        reader_params = {
            'chunksize': chunksize,
            'sep': sep,
        }

        transformers = (
            Standardizer,
            MaxIndex,
            MaxFeatureAbsMeanDiff,
        )
        processor = FileProcessor(transformers, reader_params=reader_params)
        processor.train(input_path)
        processor.process(input_path, output_path)

        processed = pd.read_csv(output_path, sep=sep)

    # check feature_{i}_stand_{index}
    expected_stand = (feature_values -
                      feature_values.mean(axis=0)) / feature_values.std(axis=0,
                                                                        ddof=1)
    stand = processed.filter(regex=f'feature_{feature}_stand_[0-9]+')
    assert np.allclose(expected_stand, stand)
    assert np.allclose(stand.mean(axis=0), 0)
    assert np.allclose(stand.std(axis=0, ddof=1), 1)

    # check max_feature_{i}_index
    expected_max = feature_values.max(axis=1)
    max_index = processed[f'max_feature_{feature}_index'].values
    max_mask = (max_index.reshape(
        (-1, 1)) == np.arange(feature_values.shape[1]).reshape((1, -1)))
    fact_max = feature_values[max_mask]
    assert np.allclose(expected_max, fact_max)

    # check max_feature_{i}_abs_mean_diff
    expected_max_mean = np.broadcast_to(feature_values.mean(axis=0),
                                        shape=max_mask.shape)[max_mask]
    expected_abs_mean_diff = np.abs(expected_max - expected_max_mean)
    abs_mean_diff = processed[f'max_feature_{feature}_abs_mean_diff']
    assert np.allclose(expected_abs_mean_diff, abs_mean_diff)

示例#27

0

显示文件

文件： file_handler.py 项目： yabanci/chl

class FileHandler(PatternMatchingEventHandler):

    def __init__(self):
        PatternMatchingEventHandler.__init__(self, patterns=["*.txt"])
        self.file_processor = FileProcessor()

    def on_created(self, event):
        logging.info("New file: {file}".format(file=event.src_path))
        self.process(event)

    def process(self, event):
        filename = event.src_path
        data = self.file_processor.parse_data(filename)

示例#28

0

显示文件

文件： test_suite.py 项目： kmagida110/second-lowest-silver

    def test_check_lookup(self):

        expected_result = FileProcessor.get_rate_code_lookup(TEST_ZIP_FILE)

        # Check that inputs processed correctly and zips return the right region
        self.assertEqual(expected_result['36749'], 'AL-11')
        self.assertEqual(expected_result['36703'], 'AL-11')
        self.assertEqual(expected_result['84310'], 'UT-2')

        # Check that leading zeros were maintained
        self.assertEqual(expected_result['05770'], 'VT-1')

        # Check the conflicted rows were dropped
        self.assertIsNone(expected_result.get('84409'))

示例#29

0

显示文件

    def __init__(self, input_dir, latest, measurement_system, debug):
        """
        Return an instance of GarminMonitoringFitData.

        Parameters:
        input_dir (string): directory (full path) to check for monitoring data files
        latest (Boolean): check for latest files only
        measurement_system (enum): which measurement system to use when importing the files
        debug (Boolean): enable debug logging

        """
        logger.info("Processing daily FIT data")
        self.measurement_system = measurement_system
        self.debug = debug
        if input_dir:
            self.file_names = FileProcessor.dir_to_files(
                input_dir, Fit.file.name_regex, latest, True)

示例#30

0

显示文件

    def __init__(self, input_dir, latest, measurement_system, debug):
        """
        Return an instance of GarminTcxData.

        Parameters:
        db_params_dict (dict): configuration data for accessing the database
        input_dir (string): directory (full path) to check for data files
        latest (Boolean): check for latest files only
        measurement_system (enum): which measurement system to use when importing the files
        debug (Boolean): enable debug logging

        """
        logger.debug("Processing activities tcx data")
        self.measurement_system = measurement_system
        self.debug = debug
        if input_dir:
            self.file_names = FileProcessor.dir_to_files(
                input_dir, self.tcx_filename_regex, latest)

示例#31

0

显示文件

文件： test_suite.py 项目： kmagida110/second-lowest-silver

    def test_find_second_lowest(self):
        """
        Test whether the find second lowest function works on a small set of test data
        """
        expected_result = FileProcessor.get_second_lowest_cost_lookup(
            TEST_PLAN_FILE)

        # Check that the two region that should have second best rates are included and correct
        self.assertEqual(expected_result['GA-7'], 300.62)
        self.assertEqual(expected_result['MI-4'], 150.6767)

        # Check that two plan that were both the lowest returns the third item as the second lowest
        # (see assumption in comments)
        self.assertEqual(expected_result['TX-15'], 260.05)

        # Check that only one silver isn't added to the final dictionary
        self.assertNotIn('FL-60', expected_result)

        # Check that no silver plan is not added to the final dictionary
        self.assertNotIn('IL-5', expected_result)

示例#32

0

显示文件

文件： datahelper.py 项目： prathamtandon/machinelearning

def file_read (filename):
    fp = FileProcessor (filename, ' ')
    return fp.get_lines_as_array ()

示例#33

0

显示文件

文件： file_handler.py 项目： yabanci/chl

 def __init__(self):
     PatternMatchingEventHandler.__init__(self, patterns=["*.txt"])
     self.file_processor = FileProcessor()