示例#1
0
def GetAnalytics(vehDf, traDf, Startdate, Enddate, metric):
    vehDf = gen.filter_df_by_date(vehDf,Startdate,Enddate)
    traDf = gen.filter_df_by_date(traDf,Startdate,Enddate)
    
    journey_descriptives = JourneyDescriptives(vehDf,metric).descriptives_df
    metric_descriptives = MetricDescriptives(vehDf[metric]).descriptives_df
    all_descriptives = pd.concat([metric_descriptives,journey_descriptives])

    return all_descriptives
示例#2
0
def doaj_search(date):
    total_pages = 1
    page_number = 1

    if page_number == 1:
        print('Calling DOAJ Api for Entries Created on ' + str(date))

        first_response = Gf.api_call(doaj_url(doaj_date(date)))

        try:
            url = first_response.json()["next"]
        except:
            print(' Only one page to collect')

        last_page_url = str(first_response.json()["last"])
        total_pages = last_page_url[last_page_url.find("page=") + 5]
        total_docs = first_response.json()["total"]

        if total_pages != 1:
            print(' Total of ' + str(total_pages) + ' pages to collect, for ' +
                  str(total_docs) + ' total docs')

        print('   Storing page ' + str(page_number) + '/' + str(total_pages))

        # print(pd.json_normalize(first_response.json()["data"]))

        result = pd.json_normalize(first_response.json()["results"])
        result = [
            ''.join(c for c in s if c not in string.punctuation)
            for s in result
        ]
        page_number += 1

    while 1 < int(page_number) <= int(total_pages):
        print('   Calling for page ' + str(page_number))
        try:
            loop_response = Gf.api_call(url)
            url = loop_response.json()["next"]
            print('   Storing page ' + str(page_number) + '/' +
                  str(total_pages))
            new_data = pd.json_normalize(loop_response.json()["results"])
            new_data = [
                ''.join(c for c in s if c not in string.punctuation)
                for s in new_data
            ]
            result = pd.concat([result, new_data])

        except:
            print('exception in try loop')

        print('All pages collected')
        page_number += 1

    result = doaj_clean(result)

    return result
示例#3
0
def datacite_search(date):
    total_pages = 1
    page_number = 1

    date_range = date + '%20TO%20' + date

    if page_number == 1:
        print('Calling Datacite Api for Entries Created on ' + str(date))

        first_response = Gf.api_call(
            datacite_date_1st_call(datacite_date(date)))

        url = first_response.json()["links"]["next"]

        total_pages = first_response.json()["meta"]["totalPages"]
        total_docs = first_response.json()["meta"]["total"]
        print('Total of ' + str(total_pages) + ' pages to collect, for ' +
              str(total_docs) + ' total docs')

        print('Storing page ' + str(page_number) + '/' + str(total_pages))

        # print(pd.json_normalize(first_response.json()["data"]))

        result = pd.json_normalize(first_response.json()["data"])
        result = [
            ''.join(c for c in s if c not in string.punctuation)
            for s in result
        ]
        page_number += 1

    while 1 < page_number <= total_pages:
        print('Calling for page ' + str(page_number))
        try:
            loop_response = Gf.api_call(url)
            url = loop_response.json()["links"]["next"]
            print('Storing page ' + str(page_number) + '/' + str(total_pages))
            new_data = pd.json_normalize(loop_response.json()["data"])
            new_data = [
                ''.join(c for c in s if c not in string.punctuation)
                for s in new_data
            ]
            result = pd.concat([result, new_data])

        except:
            print('All pages collected')

        page_number += 1

    result = datacite_clean(result)

    return result
示例#4
0
def build_df_descriptives(df, Startdate, Enddate, metric):
    df = gen.filter_df_by_date(df, Startdate, Enddate)

    journey_descriptives = JourneyDescriptives(df, metric).descriptives_df
    metric_descriptives = MetricDescriptives(df[metric]).descriptives_df
    combined_descriptives = pd.concat([metric_descriptives,journey_descriptives])

    return combined_descriptives
def create_avgDf(trainDf, vehDf, non_zero=False, predicted_only=False):
    avg_frames = {}
    for Df, name in zip([trainDf, vehDf], ['Train', 'Vehicle']):
        if name == 'Train':
            grouping = [
                'RouteSignature',
                pd.Grouper(level='tiplocIndex'), 'MatchedDepartureTime'
            ]
        else:
            grouping = [
                'RouteSignature',
                pd.Grouper(level='tiplocIndex'), 'MatchedDepartureTime',
                pd.Grouper(level='sequence')
            ]

        if non_zero == True:
            Df = gen.remove_zeros(Df, 'loadweigh.kg')

            Df = gen.remove_nan(Df, 'loadweigh.kg')

        if predicted_only == True and 'prediction' in Df.columns.tolist():
            Df = gen.remove_nan(Df, 'prediction')

        groups = Df.groupby(grouping, sort=False)
        averages = groups['loadweigh.kg'].transform('mean')
        counts = groups['loadweigh.kg'].transform('count')
        error = groups['loadweigh.kg'].transform('sem')

        avg_frames[name] = pd.DataFrame()
        avg_frames[name]['loadweigh.kg'] = Df['loadweigh.kg']
        #        avg_frames[name]['Group Key'] = groups
        avg_frames[name]['AVG Group Loadweigh'] = averages
        avg_frames[name]['Group Counts'] = counts
        avg_frames[name]['from_avg'] = abs(
            avg_frames[name]['loadweigh.kg'] -
            avg_frames[name]['AVG Group Loadweigh'])
        avg_frames[name]['Dataset RMSE'] = rmse(
            avg_frames[name]['AVG Group Loadweigh'],
            avg_frames[name]['loadweigh.kg'])
        avg_frames[name]['Grouped Error'] = error
        if 'prediction' in Df.columns.tolist():
            avg_frames[name]['prediction'] = Df['prediction']

    return avg_frames
示例#6
0
 def compute_held(self):
     total = 0
     for item in self.incoming:
         total += item
     adjusted = gf.sigmoid(total)
     self.held = adjusted
def cr_search(date):
    print('Calling CR Api for Entries Created on '+str(date))

    loop = 0
    repeat = 0
    while loop <= repeat:

        response=Gf.api_call(cr_url(date, loop))

        if loop == 0:
            total_docs=response.json()["message"]["total-results"]
            print('Total docs to collect: '+str(total_docs))
            if total_docs > 1000:
                import math
                repeat = math.ceil(total_docs/1000)
                if repeat > 10:
                    print('Implement cursor')
            print('Collecting in batches of 1000, therefore ' + str(repeat - 1) + ' cycles remaining')
            
            result=pd.json_normalize(response.json()["message"]["items"])
        print('Processing Loop:' + str(loop))
        new_data = pd.json_normalize(response.json()["message"]["items"])
        result = pd.concat([result, new_data])

        loop+=1

    print('Main Done')

    print(result.abstract)

    fillna =result.abstract.fillna('')
    cleaning = fillna.to_numpy()

    np_where=np.where(cleaning == '')

    list_empty_values=np_where[0]

    print(list_empty_values)

    loop=0
    while loop<10:
        print('Populating abstract of document number ' + str(loop))
        result.abstract.iat[list_empty_values[loop]] = Gf.doi_to_abstract(result.DOI.iat[list_empty_values[loop]])

        loop+=1

    print('10 Cleaning Complete')

    result.to_csv('pidgon.csv')

        
        


        # try:
        #     url=first_response.json()["next"]
        # except:
        #     print(' next didnt work - check for pagenation')
        # 
        # last_page_url=str(first_response.json()["last"])
        # total_pages=last_page_url[last_page_url.find("page=")+5]
        # total_docs=first_response.json()["total"]
        # 
        # if total_pages != 1:
        #     print(' Total of '+str(total_pages)+' pages to collect, for '+str(total_docs)+' total docs')
        # 
        # print('   Storing page '+str(page_number)+'/'+str(total_pages))
        # 
        # # print(pd.json_normalize(first_response.json()["data"]))
        # 
        # 
        # result=[''.join(c for c in s if c not in string.punctuation) for s in result]
        # page_number+=1

    # while 1<int(page_number)<=int(total_pages):
    #     print('   Calling for page '+str(page_number))
    #     try:
    #         loop_response=Gf.api_call(url)
    #         url=loop_response.json()["next"]
    #         print('   Storing page '+str(page_number)+'/'+str(total_pages))
    #         new_data=pd.json_normalize(loop_response.json()["results"])
    #         new_data=[''.join(c for c in s if c not in string.punctuation) for s in new_data]
    #         result=pd.concat([result, new_data])
    # 
    #     except:
    #         print('exception in try loop')
    # 
    #     print('All pages collected')
    #     page_number+=1

    # result=doaj_clean(result)

    return result
示例#8
0
    rawOR = csv.reader(orfile)
    for row in rawOR:
        OR.append(
            m.Movie(row[0], float(row[1]), float(row[3]), "", float(row[2]),
                    float(row[3])))

input_layer = n.NeuronCol(3)
output_layer = n.NeuronCol(1)

input_layer.connect(output_layer.neurons)

errors = []
avg = []
for i in range(0, 100):
    for movie in OR:
        gf.initialize(input_layer, movie)
        for neuron in input_layer.neurons:
            neuron.fire()
        for neuron in output_layer.neurons:
            neuron.compute_held()

        output_layer.neurons[0].compute_j(movie)

        # calculate output error
        output_layer.output_error(movie)

        # save the error
        errors.append(output_layer.neurons[0].j)

        for neuron in input_layer.neurons:
            neuron.adjust_weights()
示例#9
0
import General_Functions as Gf

Gf.collect_data()
示例#10
0
            if enddate is None:
                enddate = datetime.date.today()

            if descriptives_df is None:
                descriptives_df = build_df_descriptives(df, startdate, enddate, metric)
                descriptives_df.rename(columns={'value': period}, inplace=True)
            else:
                descriptives_df[period] = build_df_descriptives(df, startdate, enddate, metric)['value']

        descriptives[metric] = descriptives_df

    return descriptives

#----------------------------------------------------------------------------------------------------------------------
diagnostic_log = DiagnosticLog.buildDiagnosticLog(config)

#data_set = DataSetProcessing.DataSet(diagnostic_log)
#data_set.loadDataFramesFromFile(datafile)

trainjournDf, vehjournDf = gen.build_frames_from_file(datafile)
diagnostic_log.writeEntry(7, 'Vehicle and Journey Dataframes created from data set', 'Created Dataframes',)

vehicle_descriptives = get_all_descriptives(config,vehjournDf)
diagnostic_log.writeEntry(7, 'Vehicle Descriptive Dataframe created', 'Created Dataframes',)
train_descriptives = get_all_descriptives(config,trainjournDf)
diagnostic_log.writeEntry(7, 'Train Descriptive Dataframe created', 'Created Dataframes',)



示例#11
0
        plt.legend()
        if save == True:
            plt.savefig(
                'C:\\Users\\lwb1u18\\Internship\Analytics Results\Plots\WeekdayPlots\\'
                + station + '\\' + station + '.png')
            logging.writeEntry(
                5, 'Multiday plot Saved',
                'C:\\Users\\lwb1u18\\Internship\Analytics Results\Plots\WeekdayPlots\\'
                + station + '\\' + station + '.png')
        plt.show()


#---------------------------------------------------------------------------------------------------------------------------
logging = DiagnosticLog.buildDiagnosticLog(config)

trainjournDf, vehjournDf = gen.build_frames_from_file(filepath)
logging.writeEntry(7, 'Created Dataframes',
                   'Vehicle and Journey Dataframes Created')

plots = {}
for station in config['stations']:
    plots[station] = trainjournDf.loc[trainjournDf['tiploc'] == station]
    if station == 'GTWK':
        plots['northbound' + station] = plots[station].loc[
            plots[station]['northbound'] == True]
        plots['southbound' + station] = plots[station].loc[
            plots[station]['northbound'] == False]
        del plots[station]

#plot_loadweigh(plots['VICTRIC'], 'London Victoria line', )
#plot_loadweigh(plots['BRGHTN'], 'Brighton line', )