def tag_images(): """ reads all the rows from read_csv_file into a list, displays one image url and name to the user at a time using a global counter, and keeps going until the end of the list is reached. At this point, a "Done Template" will be displayed, the counter will be reset, and the read_csv_file will be deleted. Once a user submits a tag, it will be appended to the corresponding list data item, and then written out to the write_csv_file. :return: rendered form with list(image_url, image_name) as context """ global LIST_INDEX # create a sample read_csv_file using the image_list_data at the top # list_to_csv(csv_read_file, sample_data_list) # read the read_csv_file into a list IMAGE_DATA_LIST = csv_to_list(csv_read_file) image_data = IMAGE_DATA_LIST[LIST_INDEX] new_image_data = image_data[:] for value in request.form.values(): if value is not None: new_image_data.append(value) CSV_WRITE_DATA.append(new_image_data) LIST_INDEX += 1 if LIST_INDEX == len(IMAGE_DATA_LIST): LIST_INDEX = 0 list_to_csv(csv_write_file, CSV_WRITE_DATA) return render_template('done.html') image_data = IMAGE_DATA_LIST[LIST_INDEX] return render_template('show_image.html', image_data=image_data)
def main(): raw_list = csv_to_list(csv_file)[:100] total_len = len(raw_list) counter = 0 result_dict = dict() print "Commencing Web Scraping..." start_time = time.time() for raw_link in raw_list: try: raw_link = raw_link[0] whois_link = "http://www.whois.com/whois/" + raw_link ipaddress_link = "http://" + raw_link + ".ipaddress.com/" whois_soup = link_to_lxmlsoup(whois_link) ipaddress_soup = link_to_lxmlsoup(ipaddress_link) result_dict.setdefault('Raw Link', []).append(str(raw_link)) result_dict = whois_parser(whois_soup, result_dict) result_dict = ipaddress_parser(ipaddress_soup, result_dict) counter, total_len = print_counter(counter, total_len) if counter % 400 == 0: print "Commencing 30 Second Sleep after 400 iterations" time.sleep(30) time_elapsed = time.time() - start_time print_progress(time_elapsed, counter, total_len) except: dict_to_json(result_dict, 'output.json') dict_to_csv(result_dict, 'output.csv') print "Unexpected Error", sys.exc_info()[0] raise dict_to_json(result_dict, 'output.json') dict_to_csv(result_dict, 'output.csv')
def clean(self, value): try: value = csv_to_list(value) except csv.csv.Error: e = _(u"Please provide a comma separated value list.") raise forms.ValidationError(e) return super(FieldArray, self).clean(value)
def to_python(self, value): if value is None: return value if isinstance(value, basestring): value = csv_to_list(value) if isinstance(value, (list,tuple)): if not self._fieldtype.blank: value = [v for v in value if v] if self._fieldtype.unique: value = list(set(value)) to_python = self._fieldtype.to_python value = [to_python(v) for v in value] return value e = default_error_messages['invalid'] raise ValidationError(e)
def to_python(self, value): if value is None: return value if isinstance(value, basestring): value = csv_to_list(value) if isinstance(value, (list, tuple)): if not self._fieldtype.blank: value = [v for v in value if v] if self._fieldtype.unique: value = list(set(value)) to_python = self._fieldtype.to_python value = [to_python(v) for v in value] return value e = default_error_messages['invalid'] raise ValidationError(e)
def main(path_inputxt, path_outputyt, prefix_outdir, max_l): """ Run CSSR, transCSSR, and transCSSR_bc on the given input files and generate output files and conditional measures in the given directory. Arguments: path_inputxt: str Path to the inputXt csv file. E.g.: 'csv/inputXt.csv' path_outputyt: str Path to the outputYt csv file. E.g.: 'csv/outputYt.csv' prefix_outdir: str Prefix naming of the output directory. Actual output directory will be appended with @max_l. E.g.: 'output_trans' with @max_l=1 will have outputs under './output_trans_L1' max_l: int The maximum L value for computing the transducer. Returns """ # create the final output directory dir_out = prefix_outdir + '_L' + str(max_l) PATH_DOT_RESULTS = os.path.join(dir_out, 'dot_results') os.makedirs(PATH_DOT_RESULTS, exist_ok=True) # load in the .csv as column-major 2d-lists cols_x = csv_to_list(path_inputxt) cols_y = csv_to_list(path_outputyt) # Find a set of unique outcomes for each of x & y axs = get_uniques_from_2d_list(cols_x) ays = get_uniques_from_2d_list(cols_y) # List to save C_X & h_X for each pair so we can write to results.csv later results = [] # Headers of CSV: header = [ 'machine_name', 'machine_H[X_{0}]', "machine_E", 'machine_C_mu', 'machine_h_mu', 'transducer_name', 'transducer_C_mu', 'transducer_h_mu', 'transducer_bc_name', 'transducer_bc_C_mu', 'transducer_bc_h_mu' ] # Loop through each pair of columns in the CSVs for col_x, col_y in zip(cols_x, cols_y): xt_name, yt_name = col_x[0], col_y[0] name_machine = '+%s.dot' % xt_name name_transducer = '%s+%s.dot' % (xt_name, yt_name) stringX, stringY = ''.join(col_x[1:]), ''.join(col_y[1:]) cssr(stringX, axs, xt_name, max_l) HLs, _, h_mu_mach, _, E, C_mu_mach, _ = tC_bc.compute_ict_measures( join('transCSSR_results', name_machine), axs, 'transCSSR', L_max=max_l ) # generate output files for transducer without BC trans(stringX, stringY, axs, ays, xt_name, yt_name, max_l) C_mu, h_mu = tC.compute_conditional_measures( join('transCSSR_results', name_machine), join('transCSSR_results', name_transducer), axs, ays, inf_alg='transCSSR') # Move transducer output files out of the way so we can generate # trans_bc. # Transducer output files: 'X1+Y1.dot' & 'X1+Y1.dat_results' # name_transducer has 'X1+Y1.dot' so just need the latter name_transducer_dat = name_transducer[:-4] + '.dat_results' os.rename( join('transCSSR_results', name_transducer), join( PATH_DOT_RESULTS, 'trans_' + name_transducer.replace('+', '_') ) ) os.rename( join('transCSSR_results', name_transducer_dat), join( PATH_DOT_RESULTS, 'trans_' + name_transducer_dat.replace('+', '_') ) ) # Now that trans outputs are moved,let's generate trans_bc trans_bc(stringX, stringY, axs, ays, xt_name, yt_name, max_l) C_mu_bc, h_mu_bc = tC.compute_conditional_measures( join('transCSSR_results', name_machine), join('transCSSR_results', name_transducer), axs, ays, inf_alg='transCSSR') # Move them away too but append a '_bc' os.rename( join('transCSSR_results', name_transducer), join( PATH_DOT_RESULTS, 'trans_bc_' + name_transducer.replace('+', '_') ) ) os.rename( join('transCSSR_results', name_transducer_dat), join( PATH_DOT_RESULTS, 'trans_bc_' + name_transducer_dat.replace('+', '_') ) ) # Also move away the cssr. e.g.: +X1.dot & +X1.dat_results name_machine_dat = name_machine[:-4] + '.dat_results' os.rename( join('transCSSR_results', name_machine), join( PATH_DOT_RESULTS, 'mach_' + name_machine.replace('+', '_') ) ) os.rename( join('transCSSR_results', name_machine_dat), join( PATH_DOT_RESULTS, 'mach_' + name_machine_dat.replace('+', '_') ) ) results.append([ 'mach_' + name_machine.replace('+', '_'), HLs[0], E, C_mu_mach, h_mu_mach, 'trans_' + name_transducer.replace('+', '_'), C_mu, h_mu, 'trans_bc_' + name_transducer.replace('+', '_'), C_mu_bc, h_mu_bc ]) # Now that we have moved all of the .dot & .dat_results, let's create the # results.csv that has the C_mu & h_mu with open(join(dir_out, 'results.csv'), 'w') as f: writer = csv.writer(f, quoting=2) writer.writerow(header) for row in results: writer.writerow(row)
def main(path_outputyt, prefix_outdir, max_l): """ Run CSSR on the given input files and generate output files and conditional measures in the given directory. Arguments: path_outputyt: str Path to the outputYt csv file. E.g.: 'csv/outputYt.csv' prefix_outdir: str Prefix naming of the output directory. Actual output directory will be appended with @max_l. E.g.: 'output_trans' with @max_l=1 will have outputs under './output_trans_L1' max_l: int The maximum L value for computing the transducer. Returns """ # create the final output directory dir_out = prefix_outdir + '_L' + str(max_l) PATH_DOT_RESULTS = os.path.join(dir_out, 'dot_results') os.makedirs(PATH_DOT_RESULTS, exist_ok=True) # load in the .csv as column-major 2d-lists cols_y = csv_to_list(path_outputyt) # Find a set of unique outcomes for each of x & y ays = get_uniques_from_2d_list(cols_y) # List to save C_X & h_X for each pair so we can write to results.csv later results = [] # Headers of CSV: header = [ 'machine_name', 'machine_H[X_{0}]', "machine_E", 'machine_C_mu', 'machine_h_mu', ] # Loop through each pair of columns in the CSVs for col_y in cols_y: yt_name = col_y[0] name_machine = '+%s.dot' % yt_name stringY = ''.join(col_y[1:]) cssr(stringY, ays, yt_name, max_l) HLs, _, h_mu_mach, _, E, C_mu_mach, _ = compute_ict_measures( join('transCSSR_results', name_machine), ays, 'transCSSR', L_max=max_l) # Also move away the cssr. e.g.: +Y1.dot & +Y1.dat_results name_machine_dat = name_machine[:-4] + '.dat_results' os.rename( join('transCSSR_results', name_machine), join(PATH_DOT_RESULTS, 'mach_' + name_machine.replace('+', '_'))) os.rename( join('transCSSR_results', name_machine_dat), join(PATH_DOT_RESULTS, 'mach_' + name_machine_dat.replace('+', '_'))) results.append([ 'mach_' + name_machine.replace('+', '_'), HLs[0], E, C_mu_mach, h_mu_mach, ]) # Now that we have moved all of the .dot & .dat_results, let's create the # results.csv that has the C_mu & h_mu with open(join(dir_out, 'results.csv'), 'w') as f: writer = csv.writer(f, quoting=2) writer.writerow(header) for row in results: writer.writerow(row)