def GetAnalytics(vehDf, traDf, Startdate, Enddate, metric): vehDf = gen.filter_df_by_date(vehDf,Startdate,Enddate) traDf = gen.filter_df_by_date(traDf,Startdate,Enddate) journey_descriptives = JourneyDescriptives(vehDf,metric).descriptives_df metric_descriptives = MetricDescriptives(vehDf[metric]).descriptives_df all_descriptives = pd.concat([metric_descriptives,journey_descriptives]) return all_descriptives
def doaj_search(date): total_pages = 1 page_number = 1 if page_number == 1: print('Calling DOAJ Api for Entries Created on ' + str(date)) first_response = Gf.api_call(doaj_url(doaj_date(date))) try: url = first_response.json()["next"] except: print(' Only one page to collect') last_page_url = str(first_response.json()["last"]) total_pages = last_page_url[last_page_url.find("page=") + 5] total_docs = first_response.json()["total"] if total_pages != 1: print(' Total of ' + str(total_pages) + ' pages to collect, for ' + str(total_docs) + ' total docs') print(' Storing page ' + str(page_number) + '/' + str(total_pages)) # print(pd.json_normalize(first_response.json()["data"])) result = pd.json_normalize(first_response.json()["results"]) result = [ ''.join(c for c in s if c not in string.punctuation) for s in result ] page_number += 1 while 1 < int(page_number) <= int(total_pages): print(' Calling for page ' + str(page_number)) try: loop_response = Gf.api_call(url) url = loop_response.json()["next"] print(' Storing page ' + str(page_number) + '/' + str(total_pages)) new_data = pd.json_normalize(loop_response.json()["results"]) new_data = [ ''.join(c for c in s if c not in string.punctuation) for s in new_data ] result = pd.concat([result, new_data]) except: print('exception in try loop') print('All pages collected') page_number += 1 result = doaj_clean(result) return result
def datacite_search(date): total_pages = 1 page_number = 1 date_range = date + '%20TO%20' + date if page_number == 1: print('Calling Datacite Api for Entries Created on ' + str(date)) first_response = Gf.api_call( datacite_date_1st_call(datacite_date(date))) url = first_response.json()["links"]["next"] total_pages = first_response.json()["meta"]["totalPages"] total_docs = first_response.json()["meta"]["total"] print('Total of ' + str(total_pages) + ' pages to collect, for ' + str(total_docs) + ' total docs') print('Storing page ' + str(page_number) + '/' + str(total_pages)) # print(pd.json_normalize(first_response.json()["data"])) result = pd.json_normalize(first_response.json()["data"]) result = [ ''.join(c for c in s if c not in string.punctuation) for s in result ] page_number += 1 while 1 < page_number <= total_pages: print('Calling for page ' + str(page_number)) try: loop_response = Gf.api_call(url) url = loop_response.json()["links"]["next"] print('Storing page ' + str(page_number) + '/' + str(total_pages)) new_data = pd.json_normalize(loop_response.json()["data"]) new_data = [ ''.join(c for c in s if c not in string.punctuation) for s in new_data ] result = pd.concat([result, new_data]) except: print('All pages collected') page_number += 1 result = datacite_clean(result) return result
def build_df_descriptives(df, Startdate, Enddate, metric): df = gen.filter_df_by_date(df, Startdate, Enddate) journey_descriptives = JourneyDescriptives(df, metric).descriptives_df metric_descriptives = MetricDescriptives(df[metric]).descriptives_df combined_descriptives = pd.concat([metric_descriptives,journey_descriptives]) return combined_descriptives
def create_avgDf(trainDf, vehDf, non_zero=False, predicted_only=False): avg_frames = {} for Df, name in zip([trainDf, vehDf], ['Train', 'Vehicle']): if name == 'Train': grouping = [ 'RouteSignature', pd.Grouper(level='tiplocIndex'), 'MatchedDepartureTime' ] else: grouping = [ 'RouteSignature', pd.Grouper(level='tiplocIndex'), 'MatchedDepartureTime', pd.Grouper(level='sequence') ] if non_zero == True: Df = gen.remove_zeros(Df, 'loadweigh.kg') Df = gen.remove_nan(Df, 'loadweigh.kg') if predicted_only == True and 'prediction' in Df.columns.tolist(): Df = gen.remove_nan(Df, 'prediction') groups = Df.groupby(grouping, sort=False) averages = groups['loadweigh.kg'].transform('mean') counts = groups['loadweigh.kg'].transform('count') error = groups['loadweigh.kg'].transform('sem') avg_frames[name] = pd.DataFrame() avg_frames[name]['loadweigh.kg'] = Df['loadweigh.kg'] # avg_frames[name]['Group Key'] = groups avg_frames[name]['AVG Group Loadweigh'] = averages avg_frames[name]['Group Counts'] = counts avg_frames[name]['from_avg'] = abs( avg_frames[name]['loadweigh.kg'] - avg_frames[name]['AVG Group Loadweigh']) avg_frames[name]['Dataset RMSE'] = rmse( avg_frames[name]['AVG Group Loadweigh'], avg_frames[name]['loadweigh.kg']) avg_frames[name]['Grouped Error'] = error if 'prediction' in Df.columns.tolist(): avg_frames[name]['prediction'] = Df['prediction'] return avg_frames
def compute_held(self): total = 0 for item in self.incoming: total += item adjusted = gf.sigmoid(total) self.held = adjusted
def cr_search(date): print('Calling CR Api for Entries Created on '+str(date)) loop = 0 repeat = 0 while loop <= repeat: response=Gf.api_call(cr_url(date, loop)) if loop == 0: total_docs=response.json()["message"]["total-results"] print('Total docs to collect: '+str(total_docs)) if total_docs > 1000: import math repeat = math.ceil(total_docs/1000) if repeat > 10: print('Implement cursor') print('Collecting in batches of 1000, therefore ' + str(repeat - 1) + ' cycles remaining') result=pd.json_normalize(response.json()["message"]["items"]) print('Processing Loop:' + str(loop)) new_data = pd.json_normalize(response.json()["message"]["items"]) result = pd.concat([result, new_data]) loop+=1 print('Main Done') print(result.abstract) fillna =result.abstract.fillna('') cleaning = fillna.to_numpy() np_where=np.where(cleaning == '') list_empty_values=np_where[0] print(list_empty_values) loop=0 while loop<10: print('Populating abstract of document number ' + str(loop)) result.abstract.iat[list_empty_values[loop]] = Gf.doi_to_abstract(result.DOI.iat[list_empty_values[loop]]) loop+=1 print('10 Cleaning Complete') result.to_csv('pidgon.csv') # try: # url=first_response.json()["next"] # except: # print(' next didnt work - check for pagenation') # # last_page_url=str(first_response.json()["last"]) # total_pages=last_page_url[last_page_url.find("page=")+5] # total_docs=first_response.json()["total"] # # if total_pages != 1: # print(' Total of '+str(total_pages)+' pages to collect, for '+str(total_docs)+' total docs') # # print(' Storing page '+str(page_number)+'/'+str(total_pages)) # # # print(pd.json_normalize(first_response.json()["data"])) # # # result=[''.join(c for c in s if c not in string.punctuation) for s in result] # page_number+=1 # while 1<int(page_number)<=int(total_pages): # print(' Calling for page '+str(page_number)) # try: # loop_response=Gf.api_call(url) # url=loop_response.json()["next"] # print(' Storing page '+str(page_number)+'/'+str(total_pages)) # new_data=pd.json_normalize(loop_response.json()["results"]) # new_data=[''.join(c for c in s if c not in string.punctuation) for s in new_data] # result=pd.concat([result, new_data]) # # except: # print('exception in try loop') # # print('All pages collected') # page_number+=1 # result=doaj_clean(result) return result
rawOR = csv.reader(orfile) for row in rawOR: OR.append( m.Movie(row[0], float(row[1]), float(row[3]), "", float(row[2]), float(row[3]))) input_layer = n.NeuronCol(3) output_layer = n.NeuronCol(1) input_layer.connect(output_layer.neurons) errors = [] avg = [] for i in range(0, 100): for movie in OR: gf.initialize(input_layer, movie) for neuron in input_layer.neurons: neuron.fire() for neuron in output_layer.neurons: neuron.compute_held() output_layer.neurons[0].compute_j(movie) # calculate output error output_layer.output_error(movie) # save the error errors.append(output_layer.neurons[0].j) for neuron in input_layer.neurons: neuron.adjust_weights()
import General_Functions as Gf Gf.collect_data()
if enddate is None: enddate = datetime.date.today() if descriptives_df is None: descriptives_df = build_df_descriptives(df, startdate, enddate, metric) descriptives_df.rename(columns={'value': period}, inplace=True) else: descriptives_df[period] = build_df_descriptives(df, startdate, enddate, metric)['value'] descriptives[metric] = descriptives_df return descriptives #---------------------------------------------------------------------------------------------------------------------- diagnostic_log = DiagnosticLog.buildDiagnosticLog(config) #data_set = DataSetProcessing.DataSet(diagnostic_log) #data_set.loadDataFramesFromFile(datafile) trainjournDf, vehjournDf = gen.build_frames_from_file(datafile) diagnostic_log.writeEntry(7, 'Vehicle and Journey Dataframes created from data set', 'Created Dataframes',) vehicle_descriptives = get_all_descriptives(config,vehjournDf) diagnostic_log.writeEntry(7, 'Vehicle Descriptive Dataframe created', 'Created Dataframes',) train_descriptives = get_all_descriptives(config,trainjournDf) diagnostic_log.writeEntry(7, 'Train Descriptive Dataframe created', 'Created Dataframes',)
plt.legend() if save == True: plt.savefig( 'C:\\Users\\lwb1u18\\Internship\Analytics Results\Plots\WeekdayPlots\\' + station + '\\' + station + '.png') logging.writeEntry( 5, 'Multiday plot Saved', 'C:\\Users\\lwb1u18\\Internship\Analytics Results\Plots\WeekdayPlots\\' + station + '\\' + station + '.png') plt.show() #--------------------------------------------------------------------------------------------------------------------------- logging = DiagnosticLog.buildDiagnosticLog(config) trainjournDf, vehjournDf = gen.build_frames_from_file(filepath) logging.writeEntry(7, 'Created Dataframes', 'Vehicle and Journey Dataframes Created') plots = {} for station in config['stations']: plots[station] = trainjournDf.loc[trainjournDf['tiploc'] == station] if station == 'GTWK': plots['northbound' + station] = plots[station].loc[ plots[station]['northbound'] == True] plots['southbound' + station] = plots[station].loc[ plots[station]['northbound'] == False] del plots[station] #plot_loadweigh(plots['VICTRIC'], 'London Victoria line', ) #plot_loadweigh(plots['BRGHTN'], 'Brighton line', )