def get_json_files_data(path, min=1): """ Get all the file of a given extension (default is .csv) in a directory. Error if less than min are find :param path: directory where the files are :param min: minimum number of files that needs to be find (if less Error) :return: files with the given extension """ json_files = find_files(path, "json", min) json_data = dict() print("===========================================") print("= Converting JSON data into Python object =") print("===========================================") i = 0 for file in json_files: base = os.path.basename(file) # name with extension (ex. 'file.json') id = os.path.splitext( base )[0] # name without extension (ex. 'file') in this case, the names are the trip ids json_data[id] = json.load( open(file)) # get the json data as a python dict printrp('( ' + str(i) + ' / ' + str(len(json_files) - 1) + ' )') if found_CmdPrinter else print(i) i += 1 print('( ' + str(i - 1) + ' / ' + str(len(json_files) - 1) + ' )') return json_data
def write_csv_file(json_datas, attributes): """ Write information from the json_data corresponding to the attributes in a csv file :param json_data: data from which to retrieve the information (expect to be a dict with tripIDs as keys and json data (in python dict) as value) :param attributes: array of attributes to retrieve from the json files (must be of format ["path/to/attribute", "path/to/other/attribute"] :return: """ j = 0 csv_output_file = 'results.csv' splited_attributes = [] for attribute in attributes: splited_attributes += [attribute.split('/')] # Exemple of the above process: # Input : ['plan/itineraries/0/duration', 'requestParameters/date] # Output : [['plan','itineraries,'0','duration'], ['requestParameters','date']] with open(csv_output_file, 'w', newline='') as csvfile: csvwriter = csv.writer(csvfile, delimiter=',') # write the header header = [] for attribute in splited_attributes: header += [attribute[-1]] # attribute[-1] is the name of the attribute # TODO : Make the header more significant csvwriter.writerow(header) for id in json_datas.keys(): data = json_datas[id] j += 1 # counter to keep track of the progress if found_CmdPrinter : # if we have a CmdPrinter, use this method printrp(str(j) + '...') else: print(j) if 'error' in data: # if no itineraries were find (ie. there was an error), write the error id and short error message csvwriter.writerow([id] + ['error'] + [str(data['error']['id'])] + [str(data['error']['message'])]) else: row = [] # the next row that will be written in the csv file loop = False splited_attributes = adjust_path(splited_attributes, data) for attribute_path in splited_attributes: # From the above exemple : # splited_attibutes = [['plan','itineraries,'0','duration'], ['requestParameters','date']] # attribute = ['plan','itineraries,'0','duration'] row += [find_attribute(attribute_path, data)] csvwriter.writerow(row)
def download_json_files(ori, des, date, hredep, args): """ Wrapper method to clean the code. Calls all the methods needed to download the JSON files from OTP API :param ori: origin file :param des: destination file :param hredep: (Optional) hours file :return: """ print("======================================") print("= Extracting JSON files from OTP API =") print("======================================") i = 0 for id in ori.keys( ): # just so we can get all the ids (could have been des.keys() or hredep.keys()) if download_json: # don't retrieve the data from OTP API if the user specifies it url = build_url(ori[id]['orilon'], ori[id]['orilat'], des[id]['deslon'], des[id]['deslat'], date[id]['year'], date[id]['month'], date[id]['day'], hredep[id]['hour'], hredep[id]['minute'], args[id]) try: extract_json(url, id, make_dir(json_output)) except OSError: print("ERROR : OTP is not currently running on the given port") exit() printrp('( ' + str(i) + ' / ' + str(len(ori)) + ' )') if found_CmdPrinter else print(i) i += 1 if download_json: print('( ' + str(i) + ' / ' + str(len(ori)) + ' )') else: print("Already downloaded")
def create_od_single_file(od_survey): ori = dict() des = dict() hredep = dict() date = dict() args = dict( ) # to store non-classic parameters (such as age, sex, salary, etc.) print("======================================================") print("= Extracting Origin/Destionation data from csv files =") print("======================================================") with open(od_survey, 'r', newline='') as csvfile: reader = csv.DictReader(csvfile) i = 0 numline = sum(1 for line in csvfile) - 1 csvfile.seek(0) for row in reader: try: orilon = row['orilon'] orilat = row['orilat'] ori[i] = {'orilon': orilon, 'orilat': orilat} deslon = row['deslon'] deslat = row['deslat'] des[i] = {'deslon': deslon, 'deslat': deslat} except KeyError: print( "ERROR - Missing Origin/Destination coordinates. " "Make sure the headers for OD coordinates are 'orilon', 'orilat', 'deslon', 'deslat'" ) try: year = row['year'] except KeyError: if verbose: print("WARN - Line ", i, " : No column 'year' found. Used default ", DEFAULT_YEAR) year = DEFAULT_YEAR try: month = row['month'] except KeyError: if verbose: print("WARN - Line ", i, " : No column 'month' found. Used default ", DEFAULT_MONTH) month = DEFAULT_MONTH try: day = row['day'] except KeyError: if verbose: print("WARN - Line ", i, " : No column 'day' found. Used default ", DEFAULT_DAY) day = DEFAULT_DAY date[i] = {'year': year, 'month': month, 'day': day} try: hour = row['hour'] except KeyError: if verbose: print("WARN - Line ", i, " : No column 'hour' found. Used default ", DEFAULT_HOUR) hour = DEFAULT_HOUR try: minute = row['minute'] except KeyError: if verbose: print("WARN - Line ", i, " : No column 'minute' found. Used default ", DEFAULT_MINUTE) minute = DEFAULT_MINUTE hredep[i] = {'hour': hour, 'minute': minute} tmp_args = dict() for column in row.keys(): if column != 'orilon' and column !='orilat' and column !='deslon' \ and column !='deslat' and column !='year' and column !='month' \ and column !='day' and column !='hour' and column !='minute': tmp_args[column] = row[column] args[i] = tmp_args i += 1 printrp('( ' + str(i) + ' / ' + str(numline) + ' )') if found_CmdPrinter else print(i) print('( ' + str(i) + ' / ' + str(numline) + ' )') return ori, des, date, hredep, args
def write_csv_file(csv_output_file, full_data): """ Write information in a csv file :param csv_output_file: name of the write-to file (will be create if doesn't already exist and overwrite if it does)(name must end with .csv) :param full_data: data of the json files as python object data[id] = dict() contaning one json file information :return: """ j = 0 csv_file_path = make_dir(csv_output_file) # csv_file_path = os.path.join(csv_file_path, csv_output_file) try: with open(csv_file_path, 'w', newline='') as csvfile: csvwriter = csv.writer(csvfile, delimiter=',') csvwriter.writerow([ 'tripId', 'agency_tripId', 'itinerary_nb', 'modes', 'actual_time', 'perceived_time', 'start_time', 'end_time', 'walk_time', 'walk_distance', 'transit_time', 'waiting_time', 'boardings', 'bus_lines_numbers', 'boarding_stop_ids', 'debarquer_stop_ids' ]) print("======================================") print("= Creating CSV file from JSON files =") print("======================================") for id in full_data.keys(): # just so we can get all the ids data = full_data[id] j += 1 printrp('( ' + str(j) + ' / ' + str(len(full_data) - 1) + ' )') if found_CmdPrinter else print(j) if 'error' in data: # if no itineraries were find (ie. there was an error), write the error id and error message # note : msg is the short message (eg. PATH_NOT_FOUND), message is the long description csvwriter.writerow([id] + ['error'] + [str(data['error']['id'])] + [str(data['error']['message'])] + [str(data['error']['msg'])]) else: for itinerary_nb in range(len( data['plan']['itineraries'])): boarding = 0 busNbs = "" boarding_stop_ids = "" debarquer_stop_ids = "" agency_trip_ids = "" modes = "" for leg in data['plan']['itineraries'][itinerary_nb][ 'legs']: modes += leg['mode'] + ';' if leg['mode'] == 'BUS': # every time a BUS step is included in the itineraries : # add 1 to the boarding counter # add the bus line number to busNbs # add the stop_ids to boarding_stop_ids and debarquer_stop_ids boarding += 1 busNbs += leg['route'] + ";" boarding_stop_ids += str( leg['from']['stopCode']) + ';' debarquer_stop_ids += str( leg['to']['stopCode']) + ';' agency_trip_ids += str( leg['tripId'].split(':')[1]) + ';' # we need to .split that line because tripId is given as agencyId:tripId busNbs = busNbs[: -1] # removing the trailing semi-colon boarding_stop_ids = boarding_stop_ids[:-1] debarquer_stop_ids = debarquer_stop_ids[:-1] agency_trip_ids = agency_trip_ids[:-1] modes = modes[:-1] startTime = time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(data['plan']['itineraries'] [itinerary_nb]['startTime'] / 1000)) endTime = time.strftime( '%Y-%m-%d %H:%M:%S', time.localtime(data['plan']['itineraries'] [itinerary_nb]['endTime'] / 1000)) # those are /1000 because OTP gives Epoch time in milliseconds walkTime = data['plan']['itineraries'][itinerary_nb][ 'walkTime'] transitTime = data['plan']['itineraries'][ itinerary_nb]['transitTime'] waitingTime = data['plan']['itineraries'][ itinerary_nb]['waitingTime'] # Write all the information inside a csv file csvwriter.writerow([ id, str(agency_trip_ids), str(itinerary_nb + 1), str(modes), str(data['plan']['itineraries'][itinerary_nb] ['duration']), str( get_perceived_time(walkTime, transitTime, waitingTime)), str(startTime), str(endTime), str(walkTime), str(data['plan']['itineraries'][itinerary_nb] ['walkDistance']), str(transitTime), str(waitingTime), str(boarding), str(busNbs), str(boarding_stop_ids), str(debarquer_stop_ids) ]) except PermissionError: print( 'ERROR - Cannot write to CSV file. The file might be used by another app.' ) exit() except OSError: print("ERROR - Couldn't open file " + csv_file_path + ". Please verify the file's permissions.") print('( ' + str(j - 1) + ' / ' + str(len(full_data) - 1) + ' )')