def threaded_function(): global memories global done start = timeit.timeit() while not done: #cpus.append(psutil.cpu_percent()) memories.append(psutil.virtual_memory()._asdict()) end = timeit.timeit() if(end - start>5): time.sleep(5); start=time.timeit() print("done")
def ap(): _time_st[s] = timeit() return ()
def Esol_series_bessel(M, e, N=2): E = M for n in xrange(1, N + 1): E += (2. / n) * jn(n, e) * sin(n * M) return E # # Comparación de Soluciones # In[256]: e = 0.6 Ms = linspace(0, 2 * pi, 100) t1 = timeit() # Numeric solution Ens = array([Esol_numeric(M, e) for M in Ms]) t2 = timeit() print "Numeric:", (t2 - t1) * 1E6 t1 = t2 # Analytic solution Eas = array([Esol_analytic(M, e) for M in Ms]) t2 = timeit() print "Analytic:", (t2 - t1) * 1E6 t1 = t2 # Fourier series solution Efs = array([Esol_series_fourier(M, e) for M in Ms])
def Esol_series_bessel(M,e,N=2): E=M for n in xrange(1,N+1): E+=(2./n)*jn(n,e)*sin(n*M) return E # # Comparación de Soluciones # In[256]: e=0.6 Ms=linspace(0,2*pi,100) t1=timeit() # Numeric solution Ens=array([Esol_numeric(M,e) for M in Ms]) t2=timeit();print "Numeric:",(t2-t1)*1E6;t1=t2 # Analytic solution Eas=array([Esol_analytic(M,e) for M in Ms]) t2=timeit();print "Analytic:",(t2-t1)*1E6;t1=t2 # Fourier series solution Efs=array([Esol_series_fourier(M,e) for M in Ms]) t2=timeit();print "Fourier:",(t2-t1)*1E6;t1=t2 # Bessel series solution Ebs=array([Esol_series_bessel(M,e,N=7) for M in Ms])
def wrapper(*args, **kwargs): start = timeit() rv = func(*args, **kwargs) end = timeit() print(start - end) return rv
def convert(data_dir, output_filepath, append_to_existing=False): """ Converts TransXchange formatted schedule data into GTFS feed. data_dir : str Data directory containing one or multiple TransXchange .xml files. output_filepath : str Full filepath to the output GTFS zip-file, e.g. '/home/myuser/data/my_gtfs.zip' append_to_existing : bool (default is False) Flag for appending to existing gtfs-database. This might be useful if you have TransXchange .xml files distributed into multiple directories (e.g. separate files for train data, tube data and bus data) and you want to merge all those datasets into a single GTFS feed. """ # Total start tot_start_t = timeit() # Filepath for temporary gtfs db target_dir = os.path.dirname(output_filepath) gtfs_db = os.path.join(target_dir, "gtfs.db") # If append to database is false remove previous gtfs-database if it exists if append_to_existing == False: if os.path.exists(gtfs_db): os.remove(gtfs_db) # NAPTAN stops naptan_stops_fp = get_path("naptan_stops") # Retrieve all TransXChange files files = glob.glob(os.path.join(data_dir, "*.xml")) # Iterate over files print("Populating database ..") # Limit the processed files by file size (in MB) # Files with lower filesize than below will be processed file_size_limit = 1000 # Create workers workers = create_workers(input_files=files, file_size_limit=file_size_limit, stops_fp=naptan_stops_fp, gtfs_db=gtfs_db) # Create Pool pool = multiprocessing.Pool() # Generate GTFS info to the database in parallel pool.map(process_files, workers) # Print information about the total time tot_end_t = timeit() tot_duration = (tot_end_t - tot_start_t) / 60 print("===========================================================") print("It took %s minutes in total." % round(tot_duration, 1)) # Generate output dictionary gtfs_data = generate_gtfs_export(gtfs_db) # Export to disk save_to_gtfs_zip(output_zip_fp=output_filepath, gtfs_data=gtfs_data)
def process_files(parallel): # Get files from input instance files = parallel.input_files file_size_limit = parallel.file_size_limit naptan_stops_fp = parallel.stops_fp gtfs_db = parallel.gtfs_db for idx, fp in enumerate(files): # Filesize size = round((os.path.getsize(fp) / 1000000), 1) if file_size_limit < size: continue print( "=================================================================" ) print("[%s / %s] Processing TransXChange file: %s" % (idx, len(files), os.path.basename(fp))) print("Size: %s MB" % size) # Log start time start_t = timeit() data = untangle.parse(fp) # Parse stops stop_data = get_stops(data, naptan_stops_fp=naptan_stops_fp) # Parse agency agency = get_agency(data) # Parse GTFS info containing data about trips, calendar, stop_times and calendar_dates gtfs_info = get_gtfs_info(data) # Parse stop_times stop_times = get_stop_times(gtfs_info) # Parse trips trips = get_trips(gtfs_info) # Parse calendar calendar = get_calendar(gtfs_info) # Parse calendar_dates calendar_dates = get_calendar_dates(gtfs_info) # Parse routes routes = get_routes(gtfs_info=gtfs_info, data=data) # Initialize database connection conn = sqlite3.connect(gtfs_db) # Only export data into db if there exists valid stop_times data if len(stop_times) > 0: stop_times.to_sql(name='stop_times', con=conn, index=False, if_exists='append') stop_data.to_sql(name='stops', con=conn, index=False, if_exists='append') routes.to_sql(name='routes', con=conn, index=False, if_exists='append') agency.to_sql(name='agency', con=conn, index=False, if_exists='append') trips.to_sql(name='trips', con=conn, index=False, if_exists='append') calendar.to_sql(name='calendar', con=conn, index=False, if_exists='append') if calendar_dates is not None: calendar_dates.to_sql(name='calendar_dates', con=conn, index=False, if_exists='append') else: print( "UserWarning: File %s did not contain valid stop_sequence data, skipping." % (os.path.basename(fp))) # Close connection conn.close() # Log end time and parse duration end_t = timeit() duration = (end_t - start_t) / 60 print("It took %s minutes." % round(duration, 1))
def process_files(parallel): # Get files from input instance files = parallel.input_files file_size_limit = parallel.file_size_limit gtfs_db = parallel.gtfs_db for idx, path in enumerate(files): # If type is string, it is a direct filepath to XML if isinstance(path, str): data, file_size, xml_name = read_unpacked_xml(path) # If the type is dictionary contents are in a zip elif isinstance(path, dict): # If the type of value is a string the file can be read directly # from the given Zipfile path, with following structure: # {"transxchange_name.xml" : "/home/data/myzipfile.zip"} if isinstance(list(path.values())[0], str): data, file_size, xml_name = read_xml_inside_zip(path) # If the type of value is a dictionary the xml-file # is in a ZipFile which is inside another ZipFile. # In such cases, the path stucture is: # {"outermost_zipfile_path.zip": {"inner_zipfile.zip": "transxchange.xml"}} elif isinstance(list(path.values())[0], dict): data, file_size, xml_name = read_xml_inside_nested_zip(path) else: raise ValueError( "Something is wrong with the input xml-file paths.") else: raise ValueError( "Something is wrong with the input xml-file paths.") # Filesize size = round((file_size / 1000000), 1) if file_size_limit < size: continue print( "=================================================================" ) print("[%s / %s] Processing TransXChange file: %s" % (idx, len(files), xml_name)) print("Size: %s MB" % size) # Log start time start_t = timeit() # Parse stops stop_data = get_stops(data) if stop_data is None: print("Did not found any valid stops. Skipping..") continue # Parse agency agency = get_agency(data) # Parse GTFS info containing data about trips, calendar, stop_times and calendar_dates gtfs_info = get_gtfs_info(data) # Parse stop_times stop_times = get_stop_times(gtfs_info) # Parse trips trips = get_trips(gtfs_info) # Parse calendar calendar = get_calendar(gtfs_info) # Parse calendar_dates calendar_dates = get_calendar_dates(gtfs_info) # Parse routes routes = get_routes(gtfs_info=gtfs_info, data=data) # Initialize database connection conn = sqlite3.connect(gtfs_db) # Only export data into db if there exists valid stop_times data if len(stop_times) > 0: stop_times.to_sql(name='stop_times', con=conn, index=False, if_exists='append') stop_data.to_sql(name='stops', con=conn, index=False, if_exists='append') routes.to_sql(name='routes', con=conn, index=False, if_exists='append') agency.to_sql(name='agency', con=conn, index=False, if_exists='append') trips.to_sql(name='trips', con=conn, index=False, if_exists='append') calendar.to_sql(name='calendar', con=conn, index=False, if_exists='append') if calendar_dates is not None: calendar_dates.to_sql(name='calendar_dates', con=conn, index=False, if_exists='append') else: print( "UserWarning: File %s did not contain valid stop_sequence data, skipping." % (xml_name)) # Close connection conn.close() # Log end time and parse duration end_t = timeit() duration = (end_t - start_t) / 60 print("It took %s minutes." % round(duration, 1))
def convert(input_filepath, output_filepath, append_to_existing=False, worker_cnt=None, file_size_limit=2000): """ Converts TransXchange formatted schedule data into GTFS feed. input_filepath : str File path to data directory or a ZipFile containing one or multiple TransXchange .xml files. Also nested ZipFiles are supported (i.e. a ZipFile with ZipFile(s) containing .xml files.) output_filepath : str Full filepath to the output GTFS zip-file, e.g. '/home/myuser/data/my_gtfs.zip' append_to_existing : bool (default is False) Flag for appending to existing gtfs-database. This might be useful if you have TransXchange .xml files distributed into multiple directories (e.g. separate files for train data, tube data and bus data) and you want to merge all those datasets into a single GTFS feed. worker_cnt : int Number of workers to distribute the conversion process. By default the number of CPUs is used. file_size_limit : int File size limit (in megabytes) can be used to skip larger-than-memory XML-files (should not happen). """ # Total start tot_start_t = timeit() # Filepath for temporary gtfs db target_dir = os.path.dirname(output_filepath) gtfs_db = os.path.join(target_dir, "gtfs.db") # If append to database is false remove previous gtfs-database if it exists if append_to_existing == False: if os.path.exists(gtfs_db): os.remove(gtfs_db) # Retrieve all TransXChange files files = get_xml_paths(input_filepath) # Iterate over files print("Populating database ..") # Create workers workers = create_workers(input_files=files, worker_cnt=worker_cnt, file_size_limit=file_size_limit, gtfs_db=gtfs_db) # Create Pool pool = multiprocessing.Pool() # Generate GTFS info to the database in parallel pool.map(process_files, workers) # Print information about the total time tot_end_t = timeit() tot_duration = (tot_end_t - tot_start_t) / 60 print("===========================================================") print("It took %s minutes in total." % round(tot_duration, 1)) # Generate output dictionary gtfs_data = generate_gtfs_export(gtfs_db) # Export to disk save_to_gtfs_zip(output_zip_fp=output_filepath, gtfs_data=gtfs_data)
def calibrate(args, jones, alphas): # simple calibration to test if simulation went as expected. # Note do not run on large data set # load data ms = table(args.ms) time = ms.getcol('TIME') _, tbin_idx, tbin_counts = chunkify_rows(time, args.utimes_per_chunk) n_time = tbin_idx.size ant1 = ms.getcol('ANTENNA1') ant2 = ms.getcol('ANTENNA2') n_ant = np.maximum(ant1.max(), ant2.max()) + 1 uvw = ms.getcol('UVW').astype(np.float64) data = ms.getcol(args.out_col) # this is where we put the data # we know it is pure Stokes I so we can solve using diagonals only data = data[:, :, (0, 3)].astype(np.complex128) n_row, n_freq, n_corr = data.shape flag = ms.getcol('FLAG') flag = flag[:, :, (0, 3)] # get phase dir radec0 = table(args.ms + '::FIELD').getcol('PHASE_DIR').squeeze().astype( np.float64) # get freqs freq = table(args.ms + '::SPECTRAL_WINDOW').getcol('CHAN_FREQ')[0].astype( np.float64) assert freq.size == n_freq # now get the model # get source coordinates from lsm lsm = Tigger.load(args.sky_model) radec = [] stokes = [] spi = [] ref_freqs = [] for source in lsm.sources: radec.append([source.pos.ra, source.pos.dec]) stokes.append([source.flux.I]) tmp_spec = source.spectrum spi.append([tmp_spec.spi if tmp_spec is not None else 0.0]) ref_freqs.append([tmp_spec.freq0 if tmp_spec is not None else 1.0]) n_dir = len(stokes) radec = np.asarray(radec) lm = radec_to_lm(radec, radec0) # get model visibilities model = np.zeros((n_row, n_freq, n_dir, 2), dtype=np.complex) stokes = np.asarray(stokes) ref_freqs = np.asarray(ref_freqs) spi = np.asarray(spi) for d in range(n_dir): Stokes_I = stokes[d] * (freq / ref_freqs[d])**spi[d] model[:, :, d, 0:1] = im_to_vis(Stokes_I[None, :, None], uvw, lm[d:d + 1], freq) model[:, :, d, 1] = model[:, :, d, 0] # set weights to unity weight = np.ones_like(data, dtype=np.float64) # initialise gains jones0 = np.ones((n_time, n_ant, n_freq, n_dir, n_corr), dtype=np.complex128) # calibrate ti = timeit() jones_hat, jhj, jhr, k = gauss_newton(tbin_idx, tbin_counts, ant1, ant2, jones0, data, flag, model, weight, tol=1e-5, maxiter=100) print("%i iterations took %fs" % (k, timeit() - ti)) # verify result for p in range(2): for q in range(p): diff_true = np.angle(jones[:, p] * jones[:, q].conj()) diff_hat = np.angle(jones_hat[:, p] * jones_hat[:, q].conj()) try: assert_array_almost_equal(diff_true, diff_hat, decimal=2) except Exception as e: print(e)