def main(): mac.acquire() san.analyze() man.analyze() mre.reporting() print( '========================= Pipeline is complete. You may find the results in the folder ./data/results and my Tweeter account!=========================' )
def main(country): print('Starting pipe line') df_acq = m_acquisition.data_df() print('Cleaning retrieved data!') df_wrang= m_wrangling.wrangle(df_acq) print('Analysing data!') df_analysis=m_analysis.analyze(df_wrang,country)
def main(args): data_base = mac.proyect_data(args.path) jobs = mwr.job_id(data_base) data_jobs = mwr.get_jobs(jobs) df_countries = mwr.get_info(args.url) rural_clean = mwr.rural_column(data_base) merged_data = mwr.merged_info(data_base, df_countries, data_jobs) grouped_rural = man.analyze(merged_data) filter_data = mwr.filter_country(grouped_rural, args.country)
def main(some_args): data = mac.acquire() filtered = mwr.wrangle(data, year) results = man.analyze(filtered) fig = mre.plotting_function(results, title, arguments) mre.save_viz(fig, title) print( '========================= Pipeline is complete. You may find the results in the folder ./data/results =========================' )
def main(path, country, unknown): print('Starting Pipeline...') df_with_dates_raw = mac.acquire(path) df_with_dates_clean = mwr.wrangle(df_with_dates_raw, country, unknown) df_analyze = man.analyze(df_with_dates_clean) mre.save_df(df_analyze, country, unknown) print(f'The results of the country -{country}- are: ') print(df_analyze) print('Finished Pipeline')
def main(some_args): print(some_args) print("Starting data analysis process!") #list_of_df = mac.tables_to_df(some_args.db_path) clean = mwr.clean_data() csv = man.analyze(clean) mre.export(csv, some_args.ruta) webbrowser.open_new_tab('http://127.0.0.1:8050/') mre.dash_report(clean) print("Process finished!")
def main(args): print('starting pipeline...\n---------------------') tables_from_db = m_acquisition.get_tables(args.path) jobs_api = m_acquisition.get_jobs(args.api, args.updt) country_codes_dic = m_acquisition.get_countries(args.url) final = m_wrangling.final_table(tables_from_db, jobs_api, country_codes_dic) analysis = m_analysis.analyze(final, args.country) print('\n', analysis) print( '\n========================= Pipeline is complete. You may find the results in the folder ./data/results =========================' )
def main(country, job): data = mac.acquire() relevant_data = mwr.transform_data(data) clean_data, countries_codes = mwr.country_name_import(relevant_data) select_job, key_uuid = mwr.job_data(clean_data, job) filtered_data = mwr.job_filtering(clean_data, select_job, key_uuid) filtered_data = man.country_filtering(filtered_data, country, countries_codes) result = man.analyze(filtered_data, job, clean_data) mre.visualizing_histogram(result['Age'], country, job) mre.reporting(result) print( '======= Pipeline is complete. You may find the results in the folder ./data/results =======' )
def main(arguments): data = mac.acquire(arguments.path) filtered = mwr.wrangle(data, arguments.unemployed) results = man.analyze(filtered) reporting = mre.reporting(results, arguments.country) reporting.to_csv('./data/results/Results.csv') print(reporting) print( '\n\n======================| Pipeline is complete. You may find the results in the folder ./data/results |==============================\n\n' )
def main(country): print('Starting pipeline...') # Acquisition m_acquisition.acquisition() # Wrangling df = m_wrangling.wrangling() # Analysis final_data = m_analysis.analyze(df, country) # Reporting m_reporting.reporting(final_data, country) print('pipeline finished...')
def main(arg1, arg2): data_personal_info = mac.acquire_personal_info(arg1) print('A dataframe with personal info was created') clean_gender = mwr.wrangle(data_personal_info) country_names = mac.fetch_country() print('A dataframe with country info was created') career_info = mac.acquire_career_info(arg1) print('A dataframe with career info was created') job_titles = mac.fetch_job_titles(career_info) print('Job titles successfully retrieved') country_info = mac.acquire_country_info(arg1) main_df = mwr.merge_dfs(country_info, career_info, clean_gender, job_titles, country_names) print('Main dataframe retreived') final_table = man.analyze(main_df) print( 'Final table with the results has been created in /data/results folder' ) table_country = man.filter_country(arg2, final_table) table_country.to_csv(r'data/results/final_table.csv', index=False, header=True)
def main(args): print('==== Starting Pipeline ====') ## Challenge 1 # getting data data_raw = mac.get_data_from_sql() data_no_job = mac.get_country_name(data_raw) data = mac.get_normalized_job_title(data_no_job) # analizing data final_df = man.analyze(data, args.country) # saving csv locationcsv = [mre.save_csv(final_df, args.country)] ##Bonus 1 - poll info #getting data data_poll = mac.get_poll_info() # analizing data data_b1 = man.get_poll_resume(data_poll, args.country) # saving csv locationcsv.append(mre.save_csv(data_b1, f'Poll{args.country}')) ##Bonus 2 - # getting data data_skills = mac.get_skills(args.country) # analizing data data_skills_by_education = man.get_skills_by_education(data_skills) # saving csv locationcsv.append( mre.save_csv(data_skills_by_education, f'Skills{args.country}')) # sending email with attached reports mre.send_email(locationcsv) #uploading to website mre.upload_to_website(locationcsv) print( '==== Pipeline is complete. You may find the results in the folder ./data/results ====' )
def dash_report(df): df = man.analyze(df) app = dash.Dash(__name__) app.layout = html.Div([ html.H1("Challenge 1", style={'text-align': 'center'}), html.Label('Job Title'), dcc.Dropdown( id='select_job', options=[{ 'label': elem, 'value': elem } for elem in df['Job_title'].unique()], value= 'geographic information systems data administrator gis data administrator' ), html.Br(), html.Label('Country'), dcc.Dropdown(id='select_country', options=[{ 'label': elem, 'value': elem } for elem in df['Country'].unique()], value='Spain'), html.Br(), html.Label('Age Group'), dcc.Dropdown( id='select_age_group', options=[{ 'label': elem, 'value': elem } for elem in df['Age_group'].unique()], value='14_25', ), dcc.Graph(id='my_table', figure={}) ]) @app.callback(Output( component_id='my_table', component_property='figure'), [ Input(component_id='select_job', component_property='value'), Input(component_id='select_country', component_property='value'), Input(component_id='select_age_group', component_property='value') ]) def update_graph(job_slctd, country_slctd, age_slctd): dff = df.copy() dff = dff[(dff['Job_title'] == job_slctd) & (dff['Country'] == country_slctd) & (dff['Age_group'] == age_slctd)] fig = go.Figure(data=[ go.Table(header=dict( values=list(dff.columns), fill_color='paleturquoise', align=['left', 'center'], height=40, ), cells=dict( values=[ dff.Country, dff.Age_group, dff.Job_title, dff.Quantity, dff.Percentage ], fill_color='lavender', align=['left', 'center'], height=30, )) ]) fig.update_layout() return fig return app.run_server(debug=True, use_reloader=False)
def main(arguments): rural = mac.acquire() rural_processed = mwr.wrangling(rural) rural_analysed = man.analyze(rural_processed, arguments.country) return rural_analysed
def main(scrape, download, model): print('Starting Pipeline...') mac.acquire(scrape) mwr.wrangle(scrape, download) man.analyze(model) print('Finished Pipeline')