def execute(self, context): hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id) pg = PostgresHook(postgres_conn_id=self.postgres_conn_id) logging.info("Dumping postgres query results to local file") conn = pg.get_conn() cursor = conn.cursor() cursor.execute(self.sql, self.parameters) with NamedTemporaryFile("wb") as f: csv_writer = csv.writer(f, delimiter=self.delimiter, encoding="utf-8") field_dict = OrderedDict() for field in cursor.description: field_dict[field[0]] = self.type_map(field[1]) csv_writer.writerows(cursor) f.flush() cursor.close() conn.close() logging.info("Loading file into Hive") hive.load_file(f.name, self.hive_table, field_dict=field_dict, create=self.create, partition=self.partition, delimiter=self.delimiter, recreate=self.recreate)
def execute(self, context): hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id) hive.load_avro(self.hdfs_dir, self.hive_table, schemafile=self.schemafile, create=self.create, recreate=self.recreate, partition=self.partition)
def execute(self, context): hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id) pg = PostgresHook(postgres_conn_id=self.postgres_conn_id) logging.info("Dumping postgres query results to local file") conn = pg.get_conn() cursor = conn.cursor() cursor.execute(self.sql, self.parameters) with NamedTemporaryFile("wb") as f: csv_writer = csv.writer(f, delimiter=self.delimiter, encoding="utf-8") field_dict = OrderedDict() fields_to_hash = set([]) ctr = 0 for field in cursor.description: field_dict[field[0]] = self.type_map(field[0], field[1]) if field[0].startswith('hkey_'): fields_to_hash.add(ctr) ctr += 1 field_dict['record_source'] = 'STRING' field_dict['load_dtm'] = 'TIMESTAMP' field_dict['seq_num'] = 'BIGINT' seq = long(1) for row in cursor: new_row = [] for idx, val in enumerate(list(row)): if idx in fields_to_hash: m = hashlib.sha1() m.update(val) new_row.append(m.hexdigest().upper()) else: new_row.append(val) csv_writer.writerow(new_row + [self.record_source, self.load_dtm, seq]) seq += long(1) f.flush() cursor.close() conn.close() logging.info("Loading file into Hive") hive.load_file(f.name, self.hive_table, field_dict=field_dict, create=self.create, delimiter=self.delimiter, recreate=self.recreate, partition=self.partition)
def execute(self, context): hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id) file_hook = FileHook(file_conn_id=self.file_conn_id) abs_path = file_hook.complete_file_path(self.relative_file_path) logging.info("Retrieving file and loading into Hive") with NamedTemporaryFile("wb") as f: logging.info("Loading file into Hive") hive.load_file(abs_path, self.hive_table, schemafile=self.schemafile, create=self.create, recreate=self.recreate, partition=self.partition)