def maybe_download_and_extract(runs_dir): logdir = os.path.join(runs_dir, default_run) if not os.path.exists(logdir): # weights are downloaded. Nothing to do print('mkdir') print(logdir) os.makedirs(logdir) import zipfile download_name = tv_utils.download(weights_url, runs_dir) logging.info("Extracting MultiNet_pretrained.zip") zipfile.ZipFile(download_name, 'r').extractall(runs_dir) #from download_date.py import download_data as d data_dir, run_dir = d.get_pathes() if not os.path.exists(data_dir): os.makedirs(data_dir) vgg_weights = os.path.join(data_dir, 'vgg16.npy') # Download VGG DATA if not os.path.exists(vgg_weights): download_command = "wget {} -P {}".format(d.vgg_url, data_dir) logging.info("Downloading VGG weights.") d.download(d.vgg_url, data_dir) else: logging.warning("File: {} exists.".format(vgg_weights)) logging.warning("Please delete to redownload VGG weights.") return
def download_from_meta(self, filename, subpart_data=None, n_jobs=3, chunk_size=10): """ Download files contained in a meta file (tsv) Args: filename: str, path of the meta file containing the name of audio files to donwnload (tsv with column "filename") subpart_data: int, the number of files to use, if a subpart of the dataframe wanted. chunk_size: int, (Default value = 10) number of files to download in a chunk n_jobs : int, (Default value = 3) number of parallel jobs """ result_audio_directory = self.get_audio_dir_path_from_meta(filename) # read metadata file and get only one filename once df = DatasetDcase2019Task4.get_df_from_meta(filename, subpart_data) filenames = df.filename.drop_duplicates() download(filenames, result_audio_directory, n_jobs=n_jobs, chunk_size=chunk_size)
def plot_gdd(stationid): Citi_name, temp = DW.download(stationid, 2014) gdd1 = GD.gdd_cal_accum(stationid, 2015) gdd2 = GD.gdd_cal_accum(stationid, 2014) gdd3 = GD.gdd_cal_accum(stationid, 2013) gdd4 = GD.gdd_cal_accum(stationid, 2016) figure_name = "Bokeh_GDD-Accum_{}.html".format(stationid) bok_title="GDD PLOT Accum {}".format(Citi_name) plot=plotting.figure(title=bok_title) plot.xaxis.axis_label = 'Year/Time' plot.yaxis.axis_label = 'Accumlated GDD' x_data=np.linspace(1,13,12) plot.line(x_data,gdd1, legend="2015", line_color = "red") plot.line(x_data,gdd2, legend="2015", line_color = "blue") plot.circle(x_data,gdd2, legend="2014", line_color = "blue") plot.line(x_data,gdd3, legend="2015", line_color = "green") plot.triangle(x_data,gdd3, legend="2013", line_color = "green") plot.line(x_data,gdd4, legend="2016", line_color = "orange") plotting.output_file(os.path.dirname(os.path.realpath(__file__)) + "/../plots/"+ figure_name, title=bok_title) plotting.show(plot)
def plot_min_max(stationid, year): #calling the download function and geting the table and citi name Citi_name, df_rename = DW.download(stationid, year) y = df_rename['Max Temp (°C)'] y2 = df_rename['Min Temp (°C)'] #creating Figure name figure_name = "Bokeh_Min-Max_{}.html".format(stationid) bok_title = "Bokeh_Min-Max plot {}".format(Citi_name) print(Citi_name) plot1 = plotting.figure(title=bok_title) plot1.xaxis.axis_label = 'Days' plot1.yaxis.axis_label = 'Temperature' x_data = np.linspace(0, 365, 365) plot1.line(x_data, y, legend="Max Temperature", line_color="red") plot1.line(x_data, y2, legend="Min Temperature", line_color="blue") plot1.circle(x_data, y2, line_color="blue") plotting.output_file(os.path.dirname(os.path.realpath(__file__)) + "/../plots/" + figure_name, title=bok_title) plotting.show(plot1)
def plot_min_max(stationid, year): #calling the download function and geting the table and citi name Citi_name, df_rename = DW.download(stationid, year) fig= plt.figure(num=1, figsize=(15,6)) x = np.linspace(0,365,365) y = df_rename['Max Temp (°C)'] y2 = df_rename['Min Temp (°C)'] #creating Figure name figure_name = "Fig_{}_{}.png".format(stationid, year) plt.plot(x,y,label="Max Temperature") plt.plot(x,y2,label="Min Temperature") plt.legend(bbox_to_anchor=(1, 1), loc=1) plt.xticks(np.arange(0,365,10)) plt.title(Citi_name, color = "red", size = 15) plt.xlabel('Days', color = "green") plt.ylabel('Temperature',color = "green") fig.savefig("../plots/"+figure_name) #for saving figure plt.show()
def gdd_cal_accum(stationid, year): Citi_name, df_rename = DW.download(stationid, year) gdd = [] for i in range(1, 365, 31): gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][0:i])) return gdd
def main(): names = list(config.DATA_NAMES.keys()); download_data.clearDir(names); download_data.download(names); for name,keys in config.DATA_NAMES.items(): if name in config.TUSHARE_DATA_NAME: continue; if name=='MktFunddAdjGet' or name=='FundETFConsGet': flt = False; else: flt = True; pass; print(name); dataio.updatedb(name,keys,filterSecID=flt); process_raw_data.main(); utils_io.runExtract('extractors',None); pass;
def main(): logging.basicConfig(format=LOGGING_FORMAT) logger = logging.getLogger("PED") args = parse_args() logger.info(f"input config: {args}") if args.drive_folder_id: download_data.download(args.drive_folder_id) # Integrate data df = preprocess.integrate_data(FILENAMES_TO_INTEGRATE) # Fill missing values df = preprocess.fill_missing_values(df) # TODO column transformation + additional features # TODO grouping by video_id ? # TODO predict pass
def plot_gdd(stationid, year): Citi_name, df_rename = DW.download(stationid, year) gdd = GD.gdd_cal_accum(stationid, year) figure_name = "Fig_GDD_{}.png".format(stationid) x = np.linspace(1, 13, 12) fig = plt.figure(num=1, figsize=(10, 6)) plt.title("accumulated GDD " + Citi_name) plt.plot(x, gdd, label=year) plt.xlabel("Year/Time") plt.ylabel("accumulated GDD") plt.legend(bbox_to_anchor=(1, 1), loc=1) fig.savefig("../plots/" + figure_name) #for saving figure
def gdd_cal(stationid, year): Citi_name, df_rename = DW.download(stationid, year) gdd = [] gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][0:30])) gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][30:60])) gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][60:90])) gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][90:120])) gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][120:150])) gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][150:180])) gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][180:210])) gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][210:240])) gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][240:270])) gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][270:300])) gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][300:330])) gdd.append(gdd_tot(df_rename['Mean Temp (°C)'][330:364])) return gdd
logger = logging.getLogger("CourseHandler") if __name__ == "__main__": logger.info("Welcome to '%s' by %s", "WorkShop BDA", "Pascal Fares") logger.info("Course Version: %s", "0.0") logger.info("Course Name: %s", "Mastering Big Data Analytics with PySpark [Machine Learning & Data Mining Workshop]") logger.info("1. Step 1 : Install Java ") subprocess.check_call(['java','--version']) logger.info("Installing spark, needed packages and pyspark") subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", '-r', 'requirement_me.txt']) logger.info("And Downloading the data") download_spark() download() # Set up the course launch pyspark logger.info("Set up environment of the course and launch pyspark with jupyter lab") HOME_PATH = Path(os.environ['HOME']) REPO_PATH = HOME_PATH / 'DataMiningSpark' BASE_DIR = REPO_PATH spark_home = 'sparkhome/spark-3.0.1-bin-hadoop2.7' data_examples = spark_home + "/data" data_sets = 'data-sets' os.environ['SPARK_LOCAL_IP'] = '127.0.0.1' os.environ['SPARK_HOME'] = str(BASE_DIR / spark_home) os.environ['DATA_SETS'] = str(BASE_DIR / data_sets) os.environ['DATA_EXAMPLES'] = str(BASE_DIR / data_examples) print(os.environ['SPARK_HOME']) os.environ['PYSPARK_DRIVER_PYTHON'] = 'jupyter'
from datetime import datetime import logist import recommend import download_data import requests import random import json app = Flask(__name__) app.debug = True app.config['SECRET_KEY'] = 'secret_key' app.config['GOOGLEMAPS_KEY'] = "AIzaSyB0_0YF6CqoeTD5EpRBYBLPoeMYRXvjfk8" GoogleMaps(app) recom = recommend.Recommender() logist = logist.Logistc() down = download_data.download() # firebase = firebase.FirebaseApplication('https://foodie-yelp.firebaseio.com', None) firebase_ = down.firebase1 @app.route('/login', methods=['GET', 'POST']) def login(): if request.method == 'POST': username = request.form['username'] password = request.form['password'] users = firebase_.get('/users', None) if (username in users and password == ''.join(users[username]['password'].values())): session['logged_in'] = True session['username'] = username
from os import listdir, remove from os.path import isfile, join from download_data import download import glob import sys dataDir = "data" csvFiles = [f for f in listdir(dataDir) if isfile(join(dataDir, f))] tickers = [f.split("_")[0] for f in csvFiles] tickers = list(set(tickers)) #unique for ticker in tickers: fileList = glob.glob(join(dataDir, ticker + '*.csv')) for filePath in fileList: try: remove(filePath) except: print("Error while deleting file: ", filePath, ". :", sys.exc_info()) download(ticker)