Python read_csv示例

编程语言: Python

命名空间/包名称: apache_beam.dataframe.io

方法/功能: read_csv

hotexamples.com的示例: 7

Python read_csv - 已找到7个示例。这些是从开源项目中提取的最受好评的apache_beam.dataframe.io.read_csv现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def ReadCSVToPandas(
    p: beam.Pipeline,
    *args,
    **kwargs,
) -> PCollection[pd.DataFrame]:
    data = p | "Read CSV" >> df_io.read_csv(*args, **kwargs)
    return df_convert.to_pcollection(data, yield_elements='pandas')

示例#2

显示文件

文件： taxiride.py 项目： ashishramtri/Apache-Beam

def run_aggregation_pipeline(pipeline_args, input_path, output_path):
  # The pipeline will be run on exiting the with block.
  with beam.Pipeline(options=PipelineOptions(pipeline_args)) as p:
    rides = p | read_csv(input_path)

    # Count the number of passengers dropped off per LocationID
    agg = rides.groupby('DOLocationID').passenger_count.sum()
    agg.to_csv(output_path)

示例#3

显示文件

def run_aggregation_pipeline(pipeline, input_path, output_path):
    # The pipeline will be run on exiting the with block.
    # [START DataFrame_taxiride_aggregation]
    with pipeline as p:
        rides = p | read_csv(input_path)

        # Count the number of passengers dropped off per LocationID
        agg = rides.groupby('DOLocationID').passenger_count.sum()
        agg.to_csv(output_path)

示例#4

显示文件

 def test_read_write_csv(self):
     input = self.temp_dir({'1.csv': 'a,b\n1,2\n', '2.csv': 'a,b\n3,4\n'})
     output = self.temp_dir()
     with beam.Pipeline() as p:
         df = p | io.read_csv(input + '*.csv')
         df['c'] = df.a + df.b
         df.to_csv(output + 'out.csv', index=False)
     self.assertCountEqual(['a,b,c', '1,2,3', '3,4,7'],
                           set(self.read_all_lines(output + 'out.csv*')))

示例#5

显示文件

文件： taxiride.py 项目： ashishramtri/Apache-Beam

def run_enrich_pipeline(
    pipeline_args, input_path, output_path, zone_lookup_path):
  """Enrich taxi ride data with zone lookup table and perform a grouped
  aggregation."""
  # The pipeline will be run on exiting the with block.
  with beam.Pipeline(options=PipelineOptions(pipeline_args)) as p:
    rides = p | "Read taxi rides" >> read_csv(input_path)
    zones = p | "Read zone lookup" >> read_csv(zone_lookup_path)

    # Enrich taxi ride data with boroughs from zone lookup table
    # Joins on zones.LocationID and rides.DOLocationID, by first making the
    # former the index for zones.
    rides = rides.merge(
        zones.set_index('LocationID').Borough,
        right_index=True,
        left_on='DOLocationID',
        how='left')

    # Sum passengers dropped off per Borough
    agg = rides.groupby('Borough').passenger_count.sum()
    agg.to_csv(output_path)

示例#6

显示文件

文件： io_test.py 项目： justsh/incubator-beam

 def test_file_not_found(self):
     with self.assertRaisesRegex(FileNotFoundError, r'/tmp/fake_dir/\*\*'):
         with beam.Pipeline() as p:
             _ = p | io.read_csv('/tmp/fake_dir/**')

示例#7

显示文件

文件： data_lib.py 项目： kokizzu/google-research

def read_csv_as_pcoll(pipeline, path):
    label = os.path.basename(path)
    raw_df = (pipeline | f"ReadCSV{label}" >> df_io.read_csv(path))
    return df_convert.to_pcollection(raw_df,
                                     pipeline=pipeline,
                                     label=f"ToPColl{label}")