def scrape_last_n_years_of_k_players(self, n, k, mode, singOrDubs="singles"): last_ten_years = get_dates() dates_by_year = get_dates_by_year() if not mode in self.modes: print("Error: Invalid Mode. Options are " + str(self.modes)) else: filename = self.generate_filename(singOrDubs, mode) output_file = open(filename, "w") output_file.write(self.file_header[mode]) searchedPlayers = [] for year in last_ten_years[:n]: str_year = str(year) next_url = "https://www.atptour.com/en/rankings/" + singOrDubs + "?rankDate=" + str_year + "-12-" + dates_by_year[ year] + "&rankRange=0-5000" if self.debug: print(year) if self.debug: print(next_url) self.browser.get(next_url) self.scrape_func[mode](k, output_file, next_url, str_year, searchedPlayers) output_file.close() self.browser.quit()
def transfer_files(start_date=None, end_date=datetime.date.today()): """! Transfer daily ACCESS-G files from NCI to network location. - Need an NCI login and private ssh key with NCI - or, password input. - If password input, have to run this from the terminal and not with an IDE's "run". Run without arguments to update - only transfer files newer than the newest file. @param start_date: starting date for files to download @param end_date: end date for files to download (not inclusive) """ my_hostname = 'raijin.nci.org.au' my_username = '******' #my_password = getpass() private_key = '~/.ssh/id_rsa' if not start_date: start_date = get_start_date(settings.ACCESS_G_PATH) today = datetime.date.today() yesterday = today - datetime.timedelta(days=1) if start_date >= today or (start_date == yesterday and datetime.datetime.now().hour < 8): # The previous day's 1200 file is uploaded to NCI at ~7.30am each day return (print('ACCESS-G downloaded files are already up to date')) dates = get_dates(start_date, end_date) with pysftp.Connection(host=my_hostname, username=my_username, private_key=private_key) as sftp: print("Connection succesfully established ... ") # Switch to a remote directory sftp.cwd('/g/data3/lb4/ops_aps2/access-g/0001/') nc_filename = 'accum_prcp.nc' hour = settings.ACCESS_HOUR localPath = 'temp/' for date in dates: new_file_name = settings.access_g_filename(date) remoteFilePath = date + '/' + hour + '/fc/sfc/' + nc_filename localFilePath = localPath + new_file_name sftp.get(remoteFilePath, localFilePath) australiaFile = limit_coordinates(localFilePath) australiaFile.to_netcdf(networkPath + new_file_name) print('File: ' + new_file_name + ' written')
def update_reports(): global MAX_REPORT_STALENESS global last_report_update if (time() - last_report_update) < MAX_REPORT_STALENESS: return global report_dates global reports_json report_dates = dateutil.get_dates() with open('config/reports.json') as reports_file: reports_json = json.load(reports_file) last_report_update = time()
def plot_dist_xtime(identifier="164952497", window_size=seconds_in_month): """ :param identifier: id of user :param model: method for calculating frequency :param window_size: in seconds or number of messages :return: activity of first user and the second """ dates.create_single_file(identifier) plt.figure(figsize=(10, 7.), dpi=200) diffs, timing = dates.making_difference_sorted( dates.get_directed_dates(identifier, from_me=True)) # ans will return activity of user1 in this case, x axis is time and y is activity # with sliding window approach ans = build_with_xtime( timing, diffs, window_size) # window_size is seconds only for xtime x_user1 = np.array(ans[0]) y_user1 = np.array(ans[1]) plt.plot(x_user1, y_user1, label="User1 activity", lw=2.) diffs, timing = dates.making_difference_sorted( dates.get_directed_dates(identifier, from_me=False)) # ans will return activity of user2 in this case, x axis is time and y is activity # with sliding window approach ans = build_with_xtime( timing, diffs, window_size) # window_size is seconds only for xtime x_user2 = np.array(ans[0]) y_user2 = np.array(ans[1]) plt.plot(x_user2, y_user2, label="User2 activity", lw=2.) print(x_user1) diffs, timing = dates.making_difference_sorted(dates.get_dates(identifier)) ans = build_with_xtime( timing, diffs, window_size) # window_size is seconds only for xtime plt.plot(ans[0], ans[1], label="Summary activity", lw=2.) plt.legend() plt.xlabel("months", fontsize=27) plt.ylabel("frequency", fontsize=27) plt.show() plt.figure(figsize=(10, 7.), dpi=200) y = np.interp(x_user2, x_user1, y_user1) - y_user2 plt.plot(x_user2, y, label="difference in activity", lw=2.) plt.xlabel("months", fontsize=27) plt.ylabel("difference", fontsize=27) #print("vk.com/id{id} игнорит {count} сообщений в месяцn\n".format(id=identifier, count=np.mean(y)*window_size)) plt.show() return [x_user1, y_user1], [x_user2, y_user2]
def pull_data(): dates_list = get_dates(LAST_DAY) for date in dates_list: csv_url = DATA_URL + date + '.csv' json_file = OUTPUT_PATH + date + '.json' with closing(urlopen(csv_url)) as infile: with open(json_file, 'w') as outfile: data = infile.read().decode('ascii', 'ignore') datafile = StringIO(data) dict_reader = csv.DictReader(datafile) outfile.write('[\n') for row in dict_reader: # only write US states to output if row['Province/State'] in LIST_OF_STATES: outfile.write(json.dumps(row, indent=4)) outfile.write(',\n') outfile.write(']\n')
## Adds your timetable from `data.txt` to Google Calendar. from __future__ import print_function import os import json import datetime import sys import re from icalendar import Calendar, Event import dates WORKING_DAYS = dates.get_dates() import build_event import argparse import getpass parser = argparse.ArgumentParser() parser.add_argument("-i", "--input") parser.add_argument("-o", "--output") args = parser.parse_args() DEBUG = False GENERATE_ICS = True TIMETABLE_DICT_RE = '([0-9]{1,2}):([0-9]{1,2}):([AP])M-([0-9]{1,2}):([0-9]{1,2}):([AP])M' timetable_dict_parser = re.compile(TIMETABLE_DICT_RE) INPUT_FILENAME = args.input if args.input else "data.txt" if not os.path.exists(INPUT_FILENAME): print("Input file", INPUT_FILENAME, "does not exist.")
def test_get_dates(self): result = dates.get_dates(self.start_date, self.end_date) assert result == self.dates
import os import json import datetime import sys # this script works only with Python 3 if sys.version_info[0] != 3: print ("This script works only with Python 3") sys.exit(1) import re from icalendar import Calendar, Event import dates WORKING_DAYS = dates.get_dates() import build_event from update_subjects_json import update_sub_list import argparse import getpass parser = argparse.ArgumentParser() parser.add_argument("-i", "--input") parser.add_argument("-o", "--output") args = parser.parse_args() DEBUG = False GENERATE_ICS = True TIMETABLE_DICT_RE ='([0-9]{1,2}):([0-9]{1,2}):([AP])M-([0-9]{1,2}):([0-9]{1,2}):([AP])M'
import yaml from functions import get_airports_raw_data, get_connections_from_stations_data, execute_request from dates import get_dates print('Starting main.py to run Ryanpy!') RequestOneFlight = namedtuple('RequestOneFlight', ['orig', 'dest', 'date']) with open("Config.yml", 'r') as ymlfile: config = yaml.load(ymlfile, Loader=yaml.SafeLoader) date_from, date_to, duration = config['dates']['fromdate'], config['dates']['todate'], config['dates']['duration'] departure_airports = config['airports']['departureairports'] date_list = get_dates(date_from, date_to) flight_list = [] data_conections = get_connections_from_stations_data(get_airports_raw_data()) #print(data_conections['FKB']) for i in range(len(departure_airports)): print("Looking for connections starting form: {}.".format(departure_airports[i])) print("All destinations: {}.".format(data_conections[departure_airports[i]])) #TODO: create request - which can given as paramater to function for item in data_conections[departure_airports[i]]: print("Looking for connections starting form: {} to {}.".format(departure_airports[i], item)) list_two_way = [] requestOneFlight = RequestOneFlight( orig=departure_airports[i], dest=item, date=date_list[0]
def test_get_dates(): data = dates.get_dates() assert len(data) == 4 for row in data: assert len(row) == 3 assert isinstance(row[1], datetime.date)
def aggregate_netcdf(update_only=True, start_date=None, end_date=None, smips=False, accessg=False): if smips or accessg: if smips: aggregate_file = aggregated_smips path = settings.SMIPS_DEST_PATH if not end_date: end_date = settings.yesterday files = settings.smips_filename elif accessg: aggregate_file = aggregated_access_g path = settings.ACCESS_G_PATH if not end_date: end_date = datetime.date.today() files = settings.access_g_filename else: return print('Run with smips=True or accessg=True') if update_only: if not start_date: if accessg: nc = xr.open_dataset(path + aggregate_file, decode_times=False) latest = nc.time.values[-1] start = datetime.date(1900, 1, 1) start_date = start + datetime.timedelta( int(latest)) + datetime.timedelta(days=1) nc.close() if start_date >= datetime.date.today(): return print( 'ACCESS-G aggregation is already up to date') elif smips: nc = xr.open_dataset(path + aggregate_file) latest = nc.time.values[-1] start_date = convert_date(latest) + datetime.timedelta( days=1) nc.close() if start_date >= settings.yesterday: return print('SMIPS aggregation is already up to date') dates = get_dates(start_date=start_date, end_date=end_date) files = [path + files(date) for date in dates] else: if smips: files = [file for file in glob.glob(path + '*/*.nc')] elif accessg: files = [ file for file in glob.glob(path + '*/*12.nc') ] # there's one file in the access-g directories that's called cdo.nc if len(files) <= 0: return print('File aggregation is up to date') add_to_netcdf_cube_from_files(end_date=end_date, cubename=aggregate_file, files=files)