def test_existence_checking(self): """Tests wheather not existing datasets are marked as not existing and wheather proper month parameter wouldn't be consider inproper """ params = [f'2010-{num:0>2}' for num in range(1, 13)] # all month of 2010 for month in params: with self.assertRaises(ValueError) as cm: # ValueError should be raised with proper message download_data(month) self.assertEqual(f'Dataset from {month} cannot be found on lichess.org', cm.exception.args[0], msg=month)
def test_parameter_checking(self): """Test if wrong parameters are recognised as wrong """ params = ['12-2015', '12-15', '2015_12', '2015.12', '12.2015', '12/2015', '12/15', '2015-00', '2015-13'] # list of examples of possible wrong month parameters for month in params: with self.assertRaises(ValueError) as cm: # ValueError should be raised with proper message download_data(month) self.assertEqual('Month parameter should be in form `yyyy-mm`', cm.exception.args[0], msg=month)
def test_downloading(self): """Tests wheather file was saved to returned location """ month = '2013-01' # smallest of available datasets path = download_data(month) self.assertTrue(os.path.isfile(path), msg='File on returned location does not exist') os.remove(path)
def main(): if not os.path.isdir(DATASETS): raise FileNotFoundError('Folder given to save datasets does not exist.') datasets = sys.argv[1:] # use datasets given by user for month in set(datasets): filename = download_data(month) out_path = os.path.join(DATASETS, filename) preprocess_data(filename, out_path) print(f'Successfully downloaded and preproccessed {len(set(datasets))} file(s).')
list_of_candidates = save_data.load_saved_data(working_directory, "list_of_candidates.data") edge_cases = save_data.load_saved_data(working_directory, "edge_cases.data") candidate_to_committee_map = save_data.load_saved_data( working_directory, "candidate_to_committee_map.data") committee_to_candidate_map = save_data.load_saved_data( working_directory, "committee_to_candidate_map.data") list_of_committees = save_data.load_saved_data(working_directory, "list_of_committees.data") elif not saved_data_exists: print("Did not detect saved data, reloading data") # Check for necessary files and download them print("Checking for Federal Election Commission files") downloader.download_data(working_directory) print("Reading election files and creating data structures") # Create list of election campaign objects list_of_campaigns = campaign.read_election_data(working_directory) list_of_candidates = candidate.read_candidate_data(working_directory) list_of_committees = committee.read_committee_data(working_directory) # Cross-link the data print("Crosslinking all the data files together") list_of_campaigns, list_of_candidates, edge_cases, candidate_to_committee_map, committee_to_candidate_map = \ data_crossmap.link_campaigns_candidates_and_committees_together(list_of_campaigns, list_of_candidates, list_of_committees) print("Saving the data structures") all_data = [ list_of_campaigns, list_of_candidates, edge_cases, candidate_to_committee_map, committee_to_candidate_map, list_of_committees
def main(): # reading the command line arguments parser = argparse.ArgumentParser( description='Read in file paths and other parameters.') parser.add_argument('--technique', choices=['nmf', 'unet'], help='technique to use to segment neurons', default='nmf', type=str) parser.add_argument('--k', help='number of blocks to estimate per block', default="full", type=str) parser.add_argument('--max_size', help='max_size maximum size of each region', default="full", type=str) parser.add_argument('--min_size', help='min_size minimum size for each region', default=20, type=int) parser.add_argument('--max_iter', help='max_iter maximum number of algorithm iterations', default=20, type=int) parser.add_argument( '--percentile', help= 'percentile value for thresholding (higher means more thresholding)', default=95, type=int) parser.add_argument( '--overlap', help= 'overlap value for determining whether to merge (higher means fewer merges)', default=0.1, type=float) args = parser.parse_args() #downloading the data as zip files dld.download_data() #calling extractor to extract the downloaded files zip.extract_zips() technique = args.technique k_value = args.k max_size_value = args.max_size min_size_value = args.min_size max_iter_value = args.max_iter percentile_value = args.percentile overlap_value = args.overlap if technique == 'nmf': nmf.NMF_experiments(k=k_value, max_size=max_size_value, min_size=min_size_value, percentile=percentile_value, max_iter=max_iter_value, overlap=overlap_value) elif technique == "unet": print('Warning: This code is under progress') train_image_path, test_image_path, train_region_path = un.get_train_test_region_paths( ) train_images_list = un.get_image_list(train_image_path) un.create_nparray(train_images, "train.npy") test_images_list = un.get_image_list(test_image_path) un.create_nparray(test_images_list, "test.npy") mask_list = un.region_to_mask(train_region_path) un.train_model() result = un.predict() masks = un.prepare_masks(result) un.masks_to_json(masks) un.remove_npy()
import optical_flow as opt import prepare_unet_data as unetdata if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--mode',type=str,help='train or test') parser.add_argument('--preproc_type',type=str,help='full or mean or normalize') parser.add_argument('--optical_flow',type=str,help='full or step_wise or first_two') parser.add_argument('--image_processing',type=str,help='none or sobel or roberts or prewitt or kirsch') parser.add_argument('--unet',type=str,help='T or F') args = parser.parse_args() #downloading the data as tar files dld.download_data() #calling extractor to extract the downloaded files ext.extract_tars() if args.mode =='train': #reading the names of the files in a list file_list = read.read_filename('../dataset/train.txt') #reading the whole data as list of list containing all frames for each file whole_data = read.load_whole_data('../dataset/data', file_list) #reading the masks in the list mask_list = read.load_mask('../dataset/masks', file_list) if args.mode =='test':
# except TimeoutException: # continue # item_data = MongoRepo.create(item_data) # download_data(item_data) start_time = datetime.datetime.now() logger.info('Script started at {}'.format(start_time)) with Hydralians() as hydralians: category_hrefs = hydralians.get_category_hrefs() item_hrefs = hydralians.get_item_hrefs(category_hrefs) logger.info('Items to parse: {}'.format(len(item_hrefs))) # for item_href in item_hrefs: # MongoRepo.create_product_url({'url': item_href}) items_bar = tqdm(total=len(item_hrefs)) items_bar.set_description(desc='Items') for item_href in item_hrefs: items_bar.update() try: item_data = hydralians.get_item_data(item_href) except Exception as ex: logger.info('Problem with {}'.format(item_href)) logger.error(str(ex)) continue item_data = MongoRepo.create(item_data) download_data(item_data) items_bar.close() end_time = datetime.datetime.now() logger.info('Script ended at {}'.format(end_time))