def download_image(url, dest): """ Downloads a single image into provided destination. :param url: :param dest: """ logging.info('Downloading {} into {}'.format(url, dest)) dest = um.join_paths(dest, url.split('/')[-1]) response = requests.get(url) if um.is_image_response(response): with open(dest, 'wb') as f: f.write(response.content)
def download_image(url, dest): """ Downloads a single image into provided destination. :param url: :param dest: """ logging.info('Downloading {} into {}'.format(url, dest)) dest = um.join_paths(dest, url.split('/')[-1]) response = requests.get(url) if um.is_image_response(response): with open(dest, 'wb') as f: f.write(response.content) if __name__ == '__main__': parser = um.get_cli_parser() parser.add_option("-f", "--file", dest="filename", default=um.join_paths(um.get_script_path(), 'links'), help="data file with urls") parser.add_option("-d", "--dest", dest="dest", default=um.join_paths(um.get_script_path(), 'images'), help="destination folder") opts = um.parse_cli_options(parser) um.set_logging() # create destination path if it doesn't exist um.make_path(opts.dest) with cnc.ProcessPoolExecutor(max_workers=mp.cpu_count()) as executor: with open(opts.filename) as f: for line in f: executor.submit(download_image, line.replace('\n', ''), opts.dest)
from sklearn.grid_search import GridSearchCV __author__ = "Denys Sobchyshak" __email__ = "*****@*****.**" # setting up constants outlier_share = 0.1 validation_share = 0.1 # operational settings ut.set_logging() parser = ut.get_cli_parser() parser.add_option("-f", "--file", dest="filename", default=ut.join_paths(ut.get_script_path(), 'data', 'day.csv'), help="data file with urls") opts = ut.parse_cli_options(parser) data_path = opts.filename is_hour_data = 'hour' in data_path # loading data data = pd.read_csv(data_path, parse_dates=[1], index_col=1, date_parser=lambda x: pd.datetime.strptime(x, "%Y-%m-%d")) logging.info('Cleaning and normalizing the feature space') # we have no need in an index feature features = data.drop(['instant'], axis=1) fields_to_encode = ['season', 'yr', 'mnth', 'weekday', 'weathersit']
:param dest: """ logging.info('Downloading {} into {}'.format(url, dest)) dest = um.join_paths(dest, url.split('/')[-1]) response = requests.get(url) if um.is_image_response(response): with open(dest, 'wb') as f: f.write(response.content) if __name__ == '__main__': parser = um.get_cli_parser() parser.add_option("-f", "--file", dest="filename", default=um.join_paths(um.get_script_path(), 'links'), help="data file with urls") parser.add_option("-d", "--dest", dest="dest", default=um.join_paths(um.get_script_path(), 'images'), help="destination folder") opts = um.parse_cli_options(parser) um.set_logging() # create destination path if it doesn't exist um.make_path(opts.dest) with cnc.ProcessPoolExecutor(max_workers=mp.cpu_count()) as executor: with open(opts.filename) as f: for line in f:
from sklearn import linear_model from sklearn.grid_search import GridSearchCV __author__ = "Denys Sobchyshak" __email__ = "*****@*****.**" # setting up constants outlier_share = 0.1 validation_share = 0.1 # operational settings ut.set_logging() parser = ut.get_cli_parser() parser.add_option("-f", "--file", dest="filename", default=ut.join_paths(ut.get_script_path(), 'data', 'day.csv'), help="data file with urls") opts = ut.parse_cli_options(parser) data_path = opts.filename is_hour_data = 'hour' in data_path # loading data data = pd.read_csv(data_path, parse_dates=[1], index_col=1, date_parser=lambda x: pd.datetime.strptime(x, "%Y-%m-%d")) logging.info('Cleaning and normalizing the feature space') # we have no need in an index feature features = data.drop(['instant'], axis=1) fields_to_encode = ['season', 'yr', 'mnth', 'weekday', 'weathersit'] if is_hour_data: fields_to_encode.append('hr') mt.hot_encode(features, fields_to_encode)