def process_all(self): try: print('process_all ... begin') extractor = Extractor(self.language) transformer = Transformer() extractor.connect() num_of_products = extractor.get_num_of_products() extractor.execute() num_of_rows = 10 rows = extractor.get_next_batch(num_of_rows) pipeline = self.loader.create_pipeline() while len(rows) > 0: for product in rows: print('\n {}'.format(product)) tf = transformer.get_tf(product) print('len tf: {}'.format(len(tf))) self.loader.insert_tf(product['id'], tf, pipeline) pipeline.execute() rows = extractor.get_next_batch(num_of_rows) extractor.close() self.loader.count_df() self.loader.count_tf_idf(num_of_products) except Exception as e: print('ETL.process_all(), error: {}'.format(e))
def c_07(): ''' get number of products ''' try: extractor = Extractor(language='es') extractor.connect() n = extractor.get_num_of_products() print(n) except Exception as e: print('c_06(), error: {}'.format(e))