def index(request): if request.method == 'POST' and 'csvFile' in request.FILES: csv = request.FILES['csvFile'] csv_file_name = str(request.FILES['csvFile']) parsed_csv = parse_csv(csv) if parsed_csv == 'Not a CSV': return JsonResponse({"notCsv": "true"}) header = parsed_csv.pop(0) response_data = create_response(parsed_csv, header, False) context = {"html": response_data['html'], "data": response_data["unique_mpn"]} return JsonResponse(context) elif len(request.POST.getlist('rawHTML')) != 0: html = request.POST.getlist("rawHTML")[0] parsed_html = parse_html(html) head = [value.get_text() for value in parsed_html['header']] table = get_html_text(parsed_html['rows']) if request.POST.getlist('sort')[0] == "false": file_name = request.POST.getlist("filename")[0] for row in table[2:-1]: Build_Data.objects.create(file_name=file_name, designator=row[0], footprint=row[1], mid_x=row[2], mid_y=row[3], ref_x=row[4], ref_y=row[5], pad_y=row[6], pad_x=row[7], layer=row[8], rotation=row[9], comment=row[10]) return HttpResponse("") else: response_data = create_response(table, head, True) context = {"html": response_data['html'], "data": response_data["unique_mpn"]} return JsonResponse(context) return render(request, 'home.html')
def main(): args = get_args() utils.configure_logging(verbose=args.verbose, debug=args.debug, error=args.error) session = http_session.FingertipsSession() # Get data if args.indicator_id: lines = objects.Data.by_indicator_id( session, indicator_ids={args.indicator_id}, child_area_type_id=args.area_type_id, parent_area_type_id=args.parent_area_type_id) elif args.profile_id: lines = objects.Data.by_profile_id( session, child_area_type_id=args.area_type_id, parent_area_type_id=args.parent_area_type_id, profile_id=args.profile_id) else: raise argparse.ArgumentError( None, 'Either indicator_id or profile_id are required') rows = utils.parse_csv(lines) # Filter rows = (row for row in rows if row_filter(row, args=args)) # Write to file (or to screen) buffer = args.output.open('w', newline='\n') if args.output else sys.stdout utils.write_csv(rows, buffer=buffer, write_header=args.write_header)
def show_report(report_id): custom_report = CustomReport.query.get(report_id) if request.method == 'POST': session['username'] = request.form['username'] session['password'] = request.form['password'] username = session.get('username', None) password = session.get('password', None) if username and password: csv_document = utils.get_csv_from_url(custom_report.url, username=username, password=password) if '<!DOCTYPE html PUBLIC' in csv_document: session['username'] = None session['password'] = None return render_template('reports/report.html', require_auth=True, failure=True) reports = utils.parse_csv(csv_document) return render_template('reports/report.html', reports=reports) else: return render_template('reports/report.html', require_auth=True)
def main(file_path, logger_p): # read input file - returns list of lists data_records = parse_csv(file_path, smoothing_level=0, should_shuffle=False) logger_p.log( '{num_records} records loaded'.format(num_records=len(data_records))) # # run offline predictors # offline_predictor_errors = \ # run_offline_predictors( # logger_p, # data_records, # [ # (OfflineAutoRegressionHandler, 1), # # (MovingAverageHandler, 1), # ] # ) # run offline predictors predictor = OnlineAutoRegressionHandler(logger_p, p=1, lag_size=13) online_predictor_errors = \ run_online_predictor(logger_p, data_records, predictor=predictor) # log results # utils.log_metrics_dict(logger_p, offline_predictor_errors) utils.log_metrics_dict(logger_p, online_predictor_errors)
def main(): movies = parse_csv() with open("ebert.csv", 'w') as out: writer = csv.writer(out) writer.writerow(EBERT_FIELDS) for title, year in movies: result = get_ebert_ratings(title, year) writer.writerow([result[f] for f in EBERT_FIELDS])
def calc_rate(logger): logger.log('load records..') data_records = utils.parse_csv(DATASET_FILE_PATH, smoothing_level=1, should_shuffle=False) logger.log('calculate rates..') logger.log(utils.calc_mid_end_rate(data_records)) logger.log('done.')
def main(): movies = parse_csv() with open("metacritic.csv", 'w') as out: writer = csv.writer(out) writer.writerow(METACRITIC_FIELDS) for title, year in movies: result = get_metacritic_ratings(title, year) writer.writerow([result[f].encode("utf-8") for f in METACRITIC_FIELDS])
def __init__(self, examples=None, attrs=None, attr_names=None, target=-1, inputs=None, values=None, distance=mean_boolean_error, name='', source='', exclude=()): """Accepts any of DataSet's fields. Examples can also be a string or file from which to parse examples using parse_csv. Optional parameter: exclude, as documented in .set_problem(). >>> DataSet(examples='1, 2, 3') <DataSet(): 1 examples, 3 attributes>""" self.name = name self.source = source self.values = values self.distance = distance self.got_values_flag = bool(values) # initialize .examples from string, file, or list if isinstance(examples, str): self.examples = parse_csv(examples) else: try: self.examples = parse_csv( open('datasets/' + name + '.csv').read()) except FileNotFoundError: self.examples = examples # attrs are the indices of examples, unless otherwise stated. if self.examples and not attrs: attrs = list(range(len(self.examples[0]))) self.attrs = attrs # initialize .attr_names from string, file, or by default if isinstance(attr_names, str): self.attr_names = attr_names.split() else: self.attr_names = attr_names or attrs self.set_problem(target, inputs=inputs, exclude=exclude)
def main(): movies = parse_csv() with open("metacritic.csv", 'w') as out: writer = csv.writer(out) writer.writerow(METACRITIC_FIELDS) for title, year in movies: result = get_metacritic_ratings(title, year) writer.writerow( [result[f].encode("utf-8") for f in METACRITIC_FIELDS])
def main(): movies = parse_csv() with open("rottentomatoes.csv", 'w') as out: writer = csv.writer(out) writer.writerow(RT_FIELDS) for title, year in movies: result = get_rottentomatoes_ratings(title, year) writer.writerow([result[f] for f in RT_FIELDS]) time.sleep(1)
def main(): movies = parse_csv() with open('box_office_mojo.csv', 'w') as out: writer = csv.writer(out) writer.writerow(result_fields) for m in movies: title, year = m html = fetch_site(title, year) result = process_site(html, title, year) writer.writerow([result[f] for f in result_fields])
def main(args): if args.load_from is not None: parsed_articles = parse_csv(args.load_from) for parsed in parsed_articles: print "Storing parsed article %s" % parsed.article_id store(parsed) if args.build_model is not None: data = database.session.query( Article.article_id, Article.body).filter(Article.body != '').all() sim_model = SimilarityModel(redis_db, data, args.build_model) sim_model.save()
def collect_data(input_file="script.csv", output_file="movies.csv"): movies = parse_csv(input_file) with open(output_file, "w") as out: writer = csv.writer(out) writer.writerow(FIELDS) for title, year in movies: result = defaultdict(lambda: "N/A", {'title':title, 'year': year}) result = get_box_office_mojo_results(title, year, result) result = get_rottentomatoes_ratings(title, year, result) result = get_ebert_ratings(title, year, result) result = get_metacritic_ratings(title, year, result) writer.writerow([unicode(result[f]).encode("utf-8") for f in FIELDS])
def process_file(file_path): utils.file_exists(file_path) filename, mod_time = utils.get_file_metadata(file_path) file_hash = utils.get_file_hash(filename, mod_time) if data.logic.file_already_proccessed(file_hash): log.info('The file {} has been already processed') else: log.info('Start processing {}'.format(filename)) data.logic.create_file_processed(file_hash, filename, mod_time) input_records = utils.parse_csv(file_path) records_with_candidates = data.logic.infer_candidates(input_records) process_rows(file_hash, records_with_candidates) log.info('Finished processing {}'.format(filename))
def collect_data(input_file="script.csv", output_file="movies.csv"): movies = parse_csv(input_file) with open(output_file, "w") as out: writer = csv.writer(out) writer.writerow(FIELDS) for title, year in movies: result = defaultdict(lambda: "N/A", {'title': title, 'year': year}) result = get_box_office_mojo_results(title, year, result) result = get_rottentomatoes_ratings(title, year, result) result = get_ebert_ratings(title, year, result) result = get_metacritic_ratings(title, year, result) writer.writerow( [unicode(result[f]).encode("utf-8") for f in FIELDS])
def algorithm_quality(): A = [] sample = parse_csv('samples/smaller_data.csv') # Step 0 training_data_set, testing_data_set = data_set_division(sample) # # # Step 1 # similarity_vectors = calc_similarity_vectors(tmp_val, training_data_set) # # # Step 2 # category_similarity = calc_category_similarity(similarity_vectors) # # # Step 3 # decision = calc_decision(category_similarity) # Step 4 m0 = 0 loading_total = len(testing_data_set) loading_current = 0 # Step 4.1 for category, passw_arr in testing_data_set.items(): for x in passw_arr: # a) similarity_vectors = calc_similarity_vectors(x, training_data_set) # b) category_similarity = calc_category_similarity(similarity_vectors) # c) decision = calc_decision(category_similarity) # d) if decision == category: m0 += 1 loading_current += 1 print(loading_current / loading_total) # Step 4.2 quality = m0 / len(testing_data_set) return f'Quality: {quality}'
def main(): # The slope defines how many iterations the program has to go through before adding a new frame to the graph slope = 1 # Does this need any explanation ? max_iterations = 1000 # Learning ratio ratio = 0.1 # Parsing CSV to build the data tmp_tab_x, tmp_tab_y = utils.parse_csv('data.csv', 'km', 'price') if (tmp_tab_x is None or tmp_tab_y is None): return # Creating new dataset for cost function test data = train_model.Dataset(tmp_tab_x, tmp_tab_y) plot_cost_function(data, max_iterations, ratio) # Creating new dataset for linear regression test data = train_model.Dataset(tmp_tab_x, tmp_tab_y) plot_linear_regression(data, slope, max_iterations, ratio)
def analyse(directory, survey_file, config_file): """Analyses survey responses Args: directory(str): path to the folder containing the excel file and config file For example: "./static/uploads/4SikvVjjqlWV44AW/" survey_file(str): name of survey file (excel/csv) For example: "responses.xlsx" or "responses.csv" config_file(str): name of config file For example: "config_file.txt" Returns: A dictionary mapping each survey question to the analysis of its responses. """ if survey_file.endswith(".csv"): parsed_file = parse_csv(os.path.join(directory, survey_file)) else: parsed_file = parse_excel(os.path.join(directory, survey_file)) categorised_responses = categorise( parsed_file, parse_config(os.path.join(directory, config_file)), ) analysis = {} analysed = None for qn, responses in categorised_responses.items(): category = responses[0] list_of_responses = responses[1] if category == "numerical": analysed = ("numerical", numerical(list(map(int, list_of_responses)))) elif category == "multicategorical": analysed = ("categorical", multi_categorical(list_of_responses)) elif category == "categorical": analysed = ("categorical", categorical(list_of_responses)) elif category == "openended": analysed = ("openended", openended(list_of_responses, directory)) analysis[qn] = analysed analysed = None return analysis
def preprocess_csv(csv_file_name, processed_file_name, service_port, test_file=False): with open(processed_file_name, 'w') as save_to_file: csv_writer = csv.writer(save_to_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) for num_tweets, row in enumerate(parse_csv(csv_file_name, service_port)): sentiment = int(row.field[0]) tweet_id = row.field[1][1:-1] text = row.field[2] processed_text = preprocess_tweet(text) if not test_file: csv_writer.writerow([tweet_id, sentiment, processed_text]) else: csv_writer.writerow([tweet_id, processed_text]) print( f'Saved [{1+num_tweets}] processed tweets to [{processed_file_name}]') return processed_file_name
def classify(processed_csv, service_port, test_file=True, **params): positive_words = file_to_wordset(params.pop('positive_words'), service_port) negative_words = file_to_wordset(params.pop('negative_words'), service_port) predictions = [] for row in parse_csv(processed_csv, service_port): tweet_id = row.field[1] tweet = row.field[2] if not test_file: label = row.field[0] pos_count, neg_count = 0, 0 for word in tweet.split(): if word in positive_words: pos_count += 1 elif word in negative_words: neg_count += 1 # print pos_count, neg_count prediction = 1 if pos_count >= neg_count else 0 if test_file: predictions.append((tweet_id, prediction)) else: predictions.append((tweet_id, int(label), prediction)) return predictions
def main(): # Get dataset - Get min-max values - Normalize values - Sort tabs - Set both thetas to 0 tmp_tab_x, tmp_tab_y = utils.parse_csv('data.csv', 'km', 'price') if (tmp_tab_x is None or tmp_tab_y is None): return data = Dataset(tmp_tab_x, tmp_tab_y) # Get number of trainings and ratio loops = utils.parse_input_int(message=utils.bcolors.YELLOW + "How much trainings do you want our program to go through:\n" + utils.bcolors.ENDC) if loops == -1: return ratio = utils.parse_input_float(message=utils.bcolors.YELLOW + "At what rate:\n" + utils.bcolors.ENDC) if ratio == -1: return # train model for _ in range(loops): data.norm_theta0, data.norm_theta1 = train_model(ratio, data.norm_theta0, data.norm_theta1, data.norm_tab_x, data.norm_tab_y) # de-normalize thetas and save them data.theta0 = utils.de_normalize(data.norm_theta0, data.max_y, data.min_y) data.theta1 = utils.de_normalize(data.norm_theta1, data.max_y, data.min_y) / (data.max_x - data.min_x) utils.save_thetas(data.theta0, data.theta1)
def kmeans_run_all(): pd.set_option('expand_frame_repr', True) pd.set_option('max_rows', 100) np.set_printoptions(precision=3, floatmode='fixed') for fn in c.ALL: k = c.ks[fn] t = 1 df, class_id = parse_csv(fn) clusters, centroids = kmeans(df, k, t) results = evaluate_clusters(clusters, centroids, verbose=False) totals = results.sum() totals.name = c.TOTALS results = results.append(totals) sfn = strip_file_path(fn) print(f'\nSummary - {sfn}') print(results) for idx, (cluster, centroid) in enumerate(zip(clusters, centroids)): print(f'\nCluster {idx + 1}') print(f'Centroid: {centroid}') print(cluster) if 2 <= clusters[0].shape[1] <= 3: plot_clusters([df], np.array([df.mean().values]), f'kmeans {sfn}') plot_clusters(clusters, centroids, f'kmeans clustered {sfn}')
def main(): ''' Driver program ''' # Preprocess and prep data to be manipulated #preprocess() attr, table = utils.parse_csv("clean_data.csv") utils.convert_data_to_numeric(table) # Gather attribute indexes, attribute domains, and classifying attribute index attr_indexes = list(range(len(attr))) class_index = attr_indexes.pop(len(attr) - 1) attr_domains = utils.get_attr_domains(table, attr, attr_indexes) # Naive Bayes #naive_bayes(table, attr, attr_indexes, class_index) # Decision Trees # k-Means Clustering attr_indexes = list(range(len(attr))) attr_domains = utils.get_attr_domains(table, attr, attr_indexes) utils.randomize_data(table) clustering(table, attr, attr_indexes, attr_domains)
def preprocess(): ''' KEEP ATTRIBUTES: 'date_time_intake' 'intake_type' 'breed_intake' 'color_intake' 'date_time_outcome' 'outcome_type' 'outcome_age' 'gender_intake' 'fixed_outcome' 'age_bucket' 'retriever' 'shepherd' 'beagle' 'terrier' 'boxer' 'poodle' 'rottweiler' 'dachshund' 'chihuahua' 'pitbull' 'time_bucket' DELETE ATTRIBUTES: 'animal_id' 'name_intake' 'date_time_intake' 'found_location' 'animal_type_intake' 'intake_condition' 'month_year_intake' 'intake_sex' 'age' 'breed_intake' 'color_intake' 'name_outcome' 'date_time_outcome' 'month_year_outcome' 'outcome_subtype' 'outcome_sex' 'outcome_age' 'gender_outcome' 'fixed_intake' 'fixed_changed' 'date_time_length' ''' attr, table = utils.parse_csv("adoption_data.csv") # Preserve animal entries for dogs and classifying attribute entry animal_index = attr.index('animal_type_intake') class_index = attr.index('time_bucket') table = [ row for row in table if row[animal_index] == 'Dog' and row[class_index] != '' ] # Remove all duplicate entries animal_ids = set() animal_id_index = attr.index('animal_id') gender_index = attr.index('gender_intake') for row in table: # Check for duplicates if row[animal_id_index] in animal_ids: table.remove(row) else: print(row[animal_id_index]) animal_ids.add(row[animal_id_index]) # Check that entry has gender if row[gender_index] == '': table.remove(row) dogs_data = copy.deepcopy(table) utils.write_csv('dogs_data.csv', attr, dogs_data) # Remove attributes not to be trained on from instances in the dataset remove_attr = [ 'animal_id', 'name_intake', 'date_time_intake', 'found_location', 'animal_type_intake', 'month_year_intake', 'intake_sex', 'age', 'breed_intake', 'color_intake', 'name_outcome', 'date_time_outcome', 'month_year_outcome', 'outcome_subtype', 'outcome_sex', 'outcome_age', 'gender_outcome', 'fixed_intake', 'fixed_changed', 'date_time_length' ] # Remove each attribute from all rows for col in remove_attr: index = attr.index(col) attr.pop(index) for row in table: row.pop(index) utils.write_csv('clean_data.csv', attr, table)
def sigmoid_test(logger, is_online, should_plot, lag_size): logger.log('load records..') data_records = utils.parse_csv(DATASET_FILE_PATH, smoothing_level=1, should_shuffle=False) logger.log('get predictions') model_error_metrics = dict() for record_id in range(len(data_records)): record = data_records[record_id] if len(record) < INITIAL_HISTORY_SIZE + NUMBER_OF_PREDICTIONS_AHEAD: continue else: if (record_id % LOGGING_INTERVAL) == 0: logger.log('* record #{record_id}'.format(record_id=record_id)) # split to train and test sets train_set = record[:INITIAL_HISTORY_SIZE] test_set = record[INITIAL_HISTORY_SIZE:INITIAL_HISTORY_SIZE + NUMBER_OF_PREDICTIONS_AHEAD] # fit model and calculate predictions sigmoid_predictions = list() if is_online: mid_max_rate = SigmoidCurve.MID_MAX_RATE for i in range(NUMBER_OF_PREDICTIONS_AHEAD): tmp_history = train_set + test_set[:i] next_prediction = \ SigmoidCurve.fit_and_predict_recursive( tmp_history[-lag_size:], 1, mid_max_rate=mid_max_rate )[0] sigmoid_predictions.append(next_prediction) mid_max_rate *= 0.85 mid_max_rate = max(1.7, mid_max_rate) else: sigmoid_predictions = \ SigmoidCurve.fit_and_predict_recursive( train_set[-lag_size:], NUMBER_OF_PREDICTIONS_AHEAD ) # plot predictions if should_plot: utils.plot_graph_and_prediction( train_set + test_set, sigmoid_predictions, INITIAL_HISTORY_SIZE + 1, 'sigmoid__{record_id}'.format(record_id=record_id)) error_metrics = utils.get_all_metrics(test_set, sigmoid_predictions) for metric_name in error_metrics.keys(): if metric_name not in model_error_metrics.keys(): model_error_metrics[metric_name] = list() model_error_metrics[metric_name].append( error_metrics[metric_name]) # log metrics logger.log('-- avg. performance:') utils.log_metrics_dict(logger, model_error_metrics)
#!/usr/bin/python # Script to retrain the network from utils import parse_csv, simple_generator, plot_history from cnn_models import simple_model, nvidia_net from keras.models import load_model # New dataset path dataset_path='additional_data/' training_samples, validation_samples=parse_csv(dataset_path,val_split=0.2,correction=0.15) train_generator=simple_generator(training_samples,batch_size=32) validation_generator=simple_generator(validation_samples,batch_size=32) # Load the pretrained model pretrained_model="model2.h5" model=load_model(pretrained_model) print("Re-training with: {} samples".format(len(training_samples[0]))) # Trin the mode with new dataset generator hist_obj=model.fit_generator(train_generator, samples_per_epoch=len(training_samples[0]), validation_data=validation_generator, nb_val_samples=len(validation_samples), nb_epoch=7) retrained_model="model2_re.h5" plot_name="retrainig.png" model_arch="model_arch.png" # Save retrained model model.save(retrained_model) # Plot history of trainig epochs
if __name__ == '__main__': if len(sys.argv) != 3: print('Usage: python stats.py <preprocessed-CSV> <service-port>') exit() num_tweets, num_pos_tweets, num_neg_tweets = 0, 0, 0 num_mentions, max_mentions = 0, 0 num_emojis, num_pos_emojis, num_neg_emojis, max_emojis = 0, 0, 0, 0 num_urls, max_urls = 0, 0 num_words, num_unique_words, min_words, max_words = 0, 0, 1e6, 0 num_bigrams, num_unique_bigrams = 0, 0 all_words = [] all_bigrams = [] csv_path = sys.argv[1] service_port = int(sys.argv[2]) for num_tweets, row in enumerate(parse_csv(csv_path, service_port)): t_id = row.field[0] if_pos = int(row.field[1]) tweet = row.field[2] if if_pos: num_pos_tweets += 1 else: num_neg_tweets += 1 result, words, bigrams = analyze_tweet(tweet) num_mentions += result['MENTIONS'] max_mentions = max(max_mentions, result['MENTIONS']) num_pos_emojis += result['POS_EMOS'] num_neg_emojis += result['NEG_EMOS'] max_emojis = max(max_emojis, result['POS_EMOS'] + result['NEG_EMOS']) num_urls += result['URLS'] max_urls = max(max_urls, result['URLS'])
metavar='Path', help='Path to output scores') args = parser.parse_args() file_list = glob.glob(args.data_path + '*.csv') print(file_list) assert len( file_list ) > 1, 'Not enough files found in the specified folder. At least two files with score should be available in the folder.' score_files = [] for score_file in file_list: score_files.append(parse_csv(score_file)) out_data = [get_header(file_list[0])] classes = out_data[0][2:] idx_to_class = {} for i, clss in enumerate(classes): idx_to_class[str(i)] = clss with torch.no_grad(): iterator = tqdm(score_files[0], total=len(score_files)) for filename in iterator: out = 0.0
def predict_using_online_mode(logger, ar_order, ma_order, with_c=True, initial_history_size=5, number_of_predictions_ahead=10, lag_size=0, start_params=None): # read series data # read input file - returns list of lists data_records = utils.parse_csv(DATASET_FILE_PATH, smoothing_level=1, should_shuffle=False) logger.log( 'records loaded: {num_records}'.format(num_records=len(data_records))) # define min error storage model_error_metrics = dict() valid_samples_counter = 0 # make predictions for each record logger.log('** ARMA settings: p={p}, q={q}'.format(p=ar_order, q=ma_order)) start_time = time.time() for record_index in range(len(data_records)): if (record_index % LOGGING_INTERVAL) == 0: logger.log( '-- record #{record_index}'.format(record_index=record_index)) # test sample size and split to train and test sets current_sample = data_records[record_index] if len(current_sample ) < initial_history_size + number_of_predictions_ahead: # logger.log('Not enough info in record. record size={record_size}'.format(record_size=len(current_sample))) continue train_set, test_set = \ current_sample[:initial_history_size], \ current_sample[initial_history_size:initial_history_size+number_of_predictions_ahead] # run ARMA model arma_model = ARMAModel(logger, p=ar_order, q=ma_order, with_c=with_c, lag_size=lag_size) predictions = '<not initialized>' try: arma_model.learn_model_params(train_set, start_params=np.array(start_params)) if not IS_ONLINE: predictions = arma_model.predict_using_learned_params( train_set, number_of_predictions_ahead) else: predictions = list() for i in range(number_of_predictions_ahead): # predict next value predicted_value = arma_model.predict_using_learned_params( train_set, 1) # store prediction predictions.append(predicted_value[0]) # update model with test value arma_model.update_model([test_set[i]]) error_metrics = utils.get_all_metrics(test_set, predictions) except Exception as ex: if 'Not enough info' not in str(ex): logger.log(ex) # logger.log('series: {ser}'.format(ser=current_sample)) # logger.log('predictions: {preds}'.format(preds=predictions)) continue for metric_name in error_metrics.keys(): if metric_name not in model_error_metrics.keys(): model_error_metrics[metric_name] = list() model_error_metrics[metric_name].append(error_metrics[metric_name]) valid_samples_counter += 1 logger.log('total valid predictions: {valid_predictions}'.format( valid_predictions=valid_samples_counter)) logger.log('total time: {total_secs} secs'.format(total_secs=time.time() - start_time)) return model_error_metrics
help='Flag to print results in a parser friendly format') parser.add_argument('--normalize', action='store_true', help='Flag to normalize input data') parser.add_argument('--mode', type=str, default='matrix', help='Following input modes: matrix, middle, aggregate') args = parser.parse_args() mode = args.mode normalize = False if not args.normalize else True if args.train: trX, trY = parse_csv(args.train_file, num_hpc=12, normalize=normalize, mode=mode) if args.testing: teX, teY = parse_csv(args.test_file, num_hpc=12, normalize=normalize, mode=mode) # Network parameters learning_rate = 0.001 reg_param = 0.01 dropout_prob = 0.5 training_epochs = 4 display_step = 1 std_pram = 1.0 num_input = len(trX[0][0]) if args.train else len(teX[0][0]) num_steps = len(trX[0]) if args.train else len(teX[0]) num_units = 15 if args.num_units == None else args.num_units
def analysis_page(): # Methods survey_file, config = request.args.get("survey_file"), request.args.get( "config") if (not request.method == "POST") and (not survey_file and not config): return redirect(url_for("main")) # Checking for files do_analysis = False if request.method == "GET": if survey_file and config: do_analysis = True elif request.method == "POST" and (request.files["file"] and request.files["config"]): do_analysis = True # Do analysis if do_analysis: # Saving files if request.method == "POST": save = save_file(survey_file=request.files["file"], config_file=request.files["config"]) directory, filename, config_filename = ( save["Directory"], save["File"], save["Config"], ) else: directory, filename = os.path.split(survey_file) config_filename = os.path.basename(config) if filename.endswith(".xlsx"): questions = list( utils.parse_excel(os.path.join(directory, filename)).keys()) else: questions = list( utils.parse_csv(os.path.join(directory, filename)).keys()) types = utils.parse_config(os.path.join(directory, config_filename)) # Excel but incomplete config if len(questions) != len(types): session["TEMP_FOLDER"] = directory predictor = utils.Predictor() qn_dict = {} for i, qn in enumerate(questions): if i + 1 not in types.keys(): datatype = predictor.predict([qn]) qn_dict[i + 1] = (qn, datatype[0]) else: qn_dict[i + 1] = (qn, types[i + 1]) questions_index = [(i[0], i[1][0], i[1][1]) for i in qn_dict.items()] return render_template("config.html", questions=questions_index, error=None) # Start analysis try: session["ANALYSIS"] = analyse.analyse(directory, filename, config_filename) except ValueError as e: return render_template( "error.html", error= "ValueError! Perhaps you chose a wrong category for your data", error_no="500", error_message=error_messages[500], ) except Exception as e: return render_template( "error.html", error=f"Unknown error: {str(e)}", error_no="500", error_message=error_messages[500], ) graphs, clouds, numerical = [], [], [] for question, analysis in session["ANALYSIS"].items(): if analysis: if analysis[0] == "categorical": graphs.append([ question, utils.pie( question, [x for x in analysis[1]["Percentages"].keys()], [y for y in analysis[1]["Percentages"].values()], ), ]) elif analysis[0] == "openended": clouds.append([question, analysis[1]]) elif analysis[0] == "numerical": numerical.append([question, analysis[1]]) graphs = tuple(utils.chunk(graphs, 3)) clouds = tuple(utils.chunk(clouds, 2)) numerical = tuple(utils.chunk(numerical, 4)) return render_template( "analysis.html", graphs=graphs, clouds=clouds, numerical=numerical, filename=filename, path=os.path.split(directory)[1], ) elif not request.files["file"]: # No excel return render_template("upload.html", error="Missing Excel/CSV file!") elif request.files[ "file"] and not request.files["config"]: # Excel but no config save = save_file(survey_file=request.files["file"]) directory, filename = save["Directory"], save["File"] session["TEMP_FOLDER"] = directory if filename.endswith(".xlsx"): questions = list( utils.parse_excel(os.path.join(directory, filename)).keys()) else: questions = list( utils.parse_csv(os.path.join(directory, filename)).keys()) predictions = utils.Predictor().predict(questions) questions_index = [(i + 1, question, predictions[i]) for i, question in enumerate(questions)] return render_template("config.html", questions=questions_index, error=None)
args = parser.parse_args() mode = args.mode # helper function def debug(msg): if args.debug: print('DEBUG: {}'.format(msg)) normalize = False if not args.normalize else True if args.train: trX, trY = parse_csv(args.train_file, num_hpc=args.hpc, normalize=normalize, mode=mode) if args.testing: teX, teY = parse_csv(args.test_file, num_hpc=args.hpc, normalize=normalize, mode=mode) # Network parameters learning_rate = 0.001 reg_param = 0.0 noise_param_value = 1.0 dropout_prob = 1.0 training_epochs = 4 display_step = 1