def main(): if len(sys.argv) == 2: jar_path = sys.argv[1] else: print ('[ERROR] jar path need input!') return extractor = Extractor(jar_path) extractor.start()
def main(): if len(sys.argv) == 2: jar_path = sys.argv[1] else: print('[ERROR] jar path need input!') return extractor = Extractor(jar_path) extractor.start()
def collect(user: str, date_range: DateRange) -> Extractor: data = [] for year in date_range.iter_year(): url = f"https://github.com/{user}" params = { "from": f"{year}-01-01", } text = Request.fetch_text(url, params) rects = BeautifulSoup(text, "html.parser").findAll("rect") for rect in rects: data_date = rect["data-date"] data_count = rect["data-count"] if date.fromisoformat(data_date) in date_range: data.append( [ pd.Timestamp(data_date), pd.to_numeric(data_count), ] ) return Extractor( user=user, df=pd.DataFrame( data=data, columns=["date", "count"], ), )
def extract_features(): # Get the dataset. data = DataSet() # get the model. model = Extractor(SAVED_CNN_EXTRACTOR_MODEL) if not os.path.exists(PROCESSED_SEQUENCES_DATA_DIR): os.makedirs(PROCESSED_SEQUENCES_DATA_DIR) # Loop through data. folders = ['train', 'test'] # folders = ['train'] for folder in folders: print(f'Extracting features from {folder} videos...') video_filenames = list(data.data[folder].keys()) # video_filenames=['171'] pbar = tqdm(total=len(video_filenames)) for video_filename in video_filenames: # Get the path to the sequence for this video. path = os.path.join(PROCESSED_SEQUENCES_DATA_DIR, video_filename + '-features') # numpy will auto-append .npy # Check if we already have it. if os.path.isfile(path + '.npy'): pbar.update(1) continue # Get the frames for this video. frames = data.get_frames_paths(folder, video_filename) # Now loop through and extract features to build the sequence. sequence = [] for image in frames: features = model.extract(image) sequence.append(features) # Save the sequence. np.save(path, sequence) pbar.update(1) pbar.close()
def run(self): # TODO: Unncomment for uncompress and move events json file compressed # self._uncompress_events() # TODO: Unncomment for deleting events json file compressed # Delete events json file # os.remove(src_gz_file) extractor = Extractor(self.src_db_config) df_plans = extractor.export_table_to_df(self.table_plans) transformer = Transformer() df_events = transformer.create_events_info_df_from_file( self.src_events) df_purchase = transformer.create_purchase_detail_df_from_df( df_events, df_plans) loader = Loader(db_name='test_fever') events_json = df_events.to_dict(orient="records") # loader.delete_many(collection_name=self.mongo_db_events, json_query={}) # Test try: loader.insert_many(collection_name=self.mongo_db_events, json_list=events_json) except pymongo.errors.BulkWriteError: loader.upsert_many_one_by_one(collection_name=self.mongo_db_events, json_list=events_json) purchase_json = df_purchase.to_dict(orient="records") # loader.delete_many(collection_name=self.mongo_db_purchase, json_query={}) # Test try: loader.insert_many(collection_name=self.mongo_db_purchase, json_list=purchase_json) except pymongo.errors.BulkWriteError: loader.upsert_many_one_by_one( collection_name=self.mongo_db_purchase, json_list=purchase_json)
class TestExtractor(unittest.TestCase): def setUp(self): src_db_config = "/opt/repos/plan-test/config/db_config.json" self.extractor = Extractor(src_db_config) def test_list_data_bases(self): res = self.extractor.list_data_bases() self.assertTrue("information_schema" in res) self.assertTrue("fevertest" in res) def test_list_tables(self): res = self.extractor.list_tables() self.assertTrue("fever_plans" in res) def test_execute(self): query = "SELECT * FROM fever_plans" result = self.extractor.execute(query) self.assertEqual(552, result.rowcount) def test_export_table_to_csv(self): table = "fever_plans" csv_dst = "/opt/repos/plan-test/test/out/fever_plans.csv" self.extractor.export_table_to_csv(table, csv_dst)
def __writer() -> Writer: return Writer( extractor=Extractor( user="******", df=pd.DataFrame( { "count": [0], "date": pd.date_range( start="2010-12-25", end="2010-12-25", ), } ), ), skeleton_string_map={ "header-section": "header-section {user}", "repository": "repository {link}", "repository-title": "repository-title", "issue": "issue {link}", "issue-title": "issue-title", "summary-section": "summary-section", "today": "today {date} {count} {length}", "today-peak": "today-peak {start} {length}", "max": "max {date} {count}", "max-peak": "max-peak {start} {end} {length}", "total": "total {sum} {avg}", "graph-section": "graph-section", "count-sum-recent": "count-sum-recent", "count-sum-full": "count-sum-full", "dayofweek-sum-recent": "dayofweek-sum-recent", "dayofweek-mean-full": "dayofweek-mean-full", "month-sum-recent": "month-sum-recent", "year-sum-full": "year-sum-full", "contribution-count": "contribution-count", "day": "day", "dayofweek": "dayofweek", "month": "month", "year": "year", }, skeleton_list_map={ "dayofweek": ["zero", "one", "two", "three", "four", "five", "six"], "month": ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven"], }, )
SAVED_CNN_EXTRACTOR_MODEL, SAVED_RNN_MODEL from src.data import DataSet from src.extractor import Extractor from src.utils import VideoHelper def prepare_sequence_for_rnn(sequence): rnn_input = [] for r in range(RNN_WINDOW_SIZE, len(sequence)): l = r - RNN_WINDOW_SIZE window = np.asarray(sequence[l:r]) rnn_input.append(window) return np.asarray(rnn_input) cnn_extractor_model = Extractor(SAVED_CNN_EXTRACTOR_MODEL) rnn_model = load_model(SAVED_RNN_MODEL, custom_objects={ 'ccc_loss': metrics.ccc_loss, 'rmse': metrics.rmse, 'rmse_v': metrics.rmse_v, 'rmse_a': metrics.rmse_a, 'cc_v': metrics.cc_v, 'cc_a': metrics.cc_a, 'ccc_v': metrics.ccc_v, 'ccc_a': metrics.ccc_a }) video_helper = VideoHelper() private_test_video_filenames = video_helper.get_private_test_video_filenames()
#input: English sentence if __name__ == '__main__': sentence = '' if len(sys.argv) == 3: experiment_dir = sys.argv[1] sentence = sys.argv[2] else: assert False # load config _dir = os.path.dirname(os.path.abspath(__file__)) config = SMTSemparseConfig(_dir+'/settings.yaml', _dir+'/dependencies.yaml') #stem sentence = Extractor(config).preprocess_nl(sentence) # we need a temp dir! temp_dir = tempfile.mkdtemp() #decode moses = Moses(config) moses.decode_sentence(experiment_dir, sentence, temp_dir) #convert to bracket structure print Functionalizer(config).run_sentence(experiment_dir, temp_dir) #delete tmp files shutil.rmtree(temp_dir)
def setUp(self): src_db_config = "/opt/repos/plan-test/config/db_config.json" self.extractor = Extractor(src_db_config)