def rescale_fnc_by_distribution(input_path): label_distribution = { "support": 22.46, "deny": 14.13, "comment": 18.34, "unrelated": 45.07 } file_name = ntpath.basename(input_path) output_dir = os.path.dirname(input_path) fnc_data = utils.read_csv(input_path, load_header=False, delimiter="\t") fnc_label_data_map = {label: [] for label in label_distribution.keys()} [fnc_label_data_map[example[3]].append(example) for example in fnc_data] rescaled_label_size = {} for fixed_label in label_distribution.keys(): fixed_label_size = len(fnc_label_data_map[fixed_label]) rescaled_label_size = {fixed_label: fixed_label_size} for label, freq in label_distribution.items(): if label != fixed_label: rescaled_size = label_distribution[label] / label_distribution[fixed_label] * fixed_label_size if rescaled_size <= len(fnc_label_data_map[label]): rescaled_label_size[label] = rescaled_size else: break if len(rescaled_label_size) == len(label_distribution): break print("Rescaled label size {}".format(rescaled_label_size)) rescaled_data = [] for label, data in fnc_label_data_map.items(): rescaled_data += data[:(int(rescaled_label_size[label])+1)] np.random.shuffle(rescaled_data) utils.write_csv(rescaled_data, None, os.path.join(output_dir, "{}.scaled".format(file_name)), delimiter="\t")
def add_filled_reactions(WD, reacs, repair, draft, json = False): """Function to add the retrieved reactions to the model. ARGS: WD (str) -- the path to the working directory where there are the repair model and the draft. reacs (list of str) -- the list of reaction names. repair -- the repair model file (SBML format). draft -- the draft model file (SBML format). json (boolean) -- True if you want a save of the model in the JSON format instead of SBML (default). """ repair_model = cobra.io.read_sbml_model(WD + repair) draft_model = cobra.io.read_sbml_model(WD + draft) old_size = len(draft_model.reactions) reac_to_blast = [] for reac in reacs: reac = tools.cobra_compatibility(reac.rstrip()) try: new_reac = copy.deepcopy(repair_model.reactions.get_by_id(reac)) reac_to_blast.append([new_reac.id, new_reac.gene_reaction_rule]) new_reac.gene_reaction_rule = "" draft_model.add_reactions([new_reac]) except KeyError: print("No match for this reaction : ", reac) print("Number of reactions of the unrepaired model : %i\nNumber of reactions of the repaired model : %i" %(old_size, len(draft_model.reactions))) if json: cobra.io.save_json_model(draft_model, WD + "filled_" + draft.split(".")[0] + ".json") else: cobra.io.write_sbml_model(draft_model, WD + "filled_" + draft.split(".")[0] + ".sbml") tools.write_csv(WD, reac_to_blast, "rxns_to_blast_" + draft_model.id, "\t")
def update_relevancies(results, user_relevancies=None): if user_relevancies is None: user_relevancies = {} # list of unique (query, url) pairs relevancies = {(query, doc.url): relevance for (query, algo), docs_with_relevance in results.items() for doc, relevance in docs_with_relevance} user_relevancies = {(query, url): relevance for (query, url), relevance in user_relevancies.items() if relevance > -1} for query_url_pair, relevance in relevancies.items(): if query_url_pair not in user_relevancies: user_relevancies[query_url_pair] = relevance final_relevancies = [(query, url, relevance) for (query, url), relevance in user_relevancies.items()] write_csv(sorted(final_relevancies, key=lambda x: (x[0].lower(), x[1])), filename=relevancies_file, headers=headers)
def merge_loop(double_set, list_name, file=None): """ 进行团合并操作,循环直到不能合并 :param double_set: :return:团成员最大数,最终的团 """ bestSet = set() oldSet = double_set num_list = [] count_list = [] group_list = [] while len(oldSet) > 0: print('成员数:', len(list(oldSet)[0])) print('个数:', len(oldSet)) print(oldSet) num_list.append(len(list(oldSet)[0])) count_list.append(len(oldSet)) group_list.append(oldSet) bestSet = oldSet oldSet = merge_group(oldSet, double_set) if file is not None: group_list = utils.num_2_word(list_name, group_list) utils.write_csv(['成员数', '个数', '团'], file, num_list, count_list, group_list) utils.save_pickle(file + '.pkl', group_list) return len(list(bestSet)[0]), bestSet
def present_token_sizes_table(configurations, algorithm): headers = [ 'networkSize', 'noFaults', 'fiveN', 'differenceFiveN', 'ninety', 'differenceNinety' ] network_sizes = 5 rows = [] for i in range(network_sizes): c_0 = configurations['0'][i] c_5n = configurations['5n'][i] c_90 = configurations['90'][i] mean_0 = round(statistics.mean(c_0.get_token_bytes())) mean_5n = round(statistics.mean(c_5n.get_token_bytes())) mean_90 = round(statistics.mean(c_90.get_token_bytes())) overhead_5n = round(mean_5n / mean_0, 2) overhead_90 = round(mean_90 / mean_0, 2) rows.append([ c_0.number_of_nodes, mean_0, mean_5n, overhead_5n, mean_90, overhead_90 ]) write_csv('../report/figures/token-sizes-faulty-%s.csv' % algorithm, headers, rows)
def main(): args = get_args() config = utils.get_config(args.config) logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) session = http_session.StorageSession(**config['session'], access_key=utils.get_access_token()) root_dir = config['data']['root_dir'] raw_path = utils.build_path(root_dir=root_dir, sub_dir=args.raw, date=args.date, ext='json') data = download_data(session, path=raw_path) rows = parse_data(data) LOGGER.info("Retrieved %s rows", len(rows)) headers = utils.get_headers(config['fields']) rows = transform.clean(rows, data_types=headers, date=args.date) output_path = utils.build_path(root_dir=root_dir, sub_dir=args.output, date=args.date, ext='csv') utils.write_csv(path=output_path, rows=rows, header=args.header)
def rename_image(label_dict={}, out_file='train_new.csv'): ''' :param label_dict: :param out_file: :return: ''' new_label_dict = {} i = 1 with open(out_file, 'w') as f: for key in label_dict.keys(): # os.path.split() 'PATH''/' act split symbol # os.path.split('/home/zhex/soft/python/test.jpg'),return '/home/zhex/soft/python'和'test.jpg' image_name = os.path.split(key)[-1] new_image_name = '%09d' % i + '.jpg' i = i + 1 # rename new_key = key.replace(image_name, new_image_name) os.renames(key, new_key) # generate new dict new_label_dict.setdefault(new_key, label_dict.get(key, [])) utils.write_csv(new_label_dict, out_path=out_file) return out_file
def main(): args = get_args() utils.configure_logging(verbose=args.verbose, debug=args.debug, error=args.error) session = http_session.FingertipsSession() # Get data if args.indicator_id: lines = objects.Data.by_indicator_id( session, indicator_ids={args.indicator_id}, child_area_type_id=args.area_type_id, parent_area_type_id=args.parent_area_type_id) elif args.profile_id: lines = objects.Data.by_profile_id( session, child_area_type_id=args.area_type_id, parent_area_type_id=args.parent_area_type_id, profile_id=args.profile_id) else: raise argparse.ArgumentError( None, 'Either indicator_id or profile_id are required') rows = utils.parse_csv(lines) # Filter rows = (row for row in rows if row_filter(row, args=args)) # Write to file (or to screen) buffer = args.output.open('w', newline='\n') if args.output else sys.stdout utils.write_csv(rows, buffer=buffer, write_header=args.write_header)
def main(): records = read_csv(input_fname) records = _subset_extractor(records, INFO_HEADER + VITAL_SIGNS_HEADER \ + LAB_HEADER) recordss = hashing(records, 'ICUSTAY_ID') _sorting(recordss, 'TIMESTAMP') print('#icu_stays: %d' % len(recordss)) for key, records in recordss.items(): _imputation(records, VITAL_SIGNS_HEADER + LAB_HEADER) avgs = _get_normal_value(recordss, VITAL_SIGNS_HEADER + LAB_HEADER) _sorting(recordss, 'TIMESTAMP', reverse=True) for key, records in recordss.items(): _imputation(records, VITAL_SIGNS_HEADER + LAB_HEADER) _impute_NAs(recordss, VITAL_SIGNS_HEADER + LAB_HEADER, avgs) _sorting(recordss, 'TIMESTAMP') recordss_csv = [] for key in sorted(recordss.keys()): records = recordss[key] recordss_csv.extend(records) write_csv(output_fname, INFO_HEADER + VITAL_SIGNS_HEADER + \ LAB_HEADER, recordss_csv)
def add_points_clan(change, password): pass_location = 'password.txt' log_location = 'log.csv' db_location = 'db.csv' if is_production(): pass_location = '/home/guerredesclans/mysite/password.txt' log_location = '/home/guerredesclans/mysite/log.csv' db_location = '/home/guerredesclans/mysite/db.csv' if check_pass(pass_location, password): points = get_clan_points() for p in points: if p[0] == change[0]: print(type(p[1])) print(type(change[1])) p[1] = int(p[1]) + int(change[1]) # Log update log = read_csv(log_location) log.append([datetime.now(), change[0], change[1]]) write_csv(log_location, log) write_csv(db_location, points) return change[0] else: return False
def sort_orders(path_dir="data/process/order_201611.csv"): content = load_csv(path_dir) content = list(map(map_str2int, content)) for i in range(len(content)): content[i][1] = max(content[i][1] - content[i][0], 1) content.sort(key=lambda x: x[0]) write_csv(content, path_dir)
def get_route(): headers = { 'accept': "application/json, text/javascript, */*; q=0.01", 'origin': "https://cn.airbusan.com", 'x-requested-with': "XMLHttpRequest", 'user-agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36", 'content-type': "application/x-www-form-urlencoded", 'referer': "https://cn.airbusan.com/content/individual/?", 'accept-encoding': "gzip, deflate, br", 'accept-language': "zh-CN,zh;q=0.9", 'cache-control': "no-cache", } url = ' https://cn.airbusan.com/web/bookingApi/bookingCity' res = requests.get(url, headers=headers, verify=False) data = json.loads(res.text) index = ['domBlockCity', 'intBlockCity', 'listRouteYearAvail'] routes = set() for li in index: item_li = data.get(li) for item in item_li: dep = item.get('depCity') arrs = item.get('arrCity').split(',') routes.update(set(['%s_%s' % (dep, arr) for arr in arrs])) routes = [i.split('_') for i in routes] utils.write_csv('all_route', 'BX', routes)
def generate_k_fold(label_file, output_dir, fold): # 用label_file进行fold次划分,用于交叉验证 # 每次划分,都有1/fold的样本作为验证集 # 划分后的train_*.csv和valid_*.csv保存在output_dir中 image_name = classified_by_label(label_file) # 1折的大小 size = len(image_name[0]) // fold # 生成保存k-fold的.csv的文件夹 os.system('rm -rf %s' % output_dir) os.makedirs(output_dir) # 生成k-fold for i in range(fold): train_list, valid_list = [], [] for j in range(3): for k, file_id in enumerate(image_name[j]): if k in range(i * size, (i + 1) * size): valid_list.append([file_id, str(j)]) else: train_list.append([file_id, str(j)]) write_csv(os.path.join(output_dir, 'train_%d.csv' % i), train_list) write_csv(os.path.join(output_dir, 'valid_%d.csv' % i), valid_list)
def default(): areas = {"belfast": "belfast"} area_uris = utils.get_area_uris(areas) utils.print_uris(area_uris) JsonHelper("area_uris").dump(area_uris) utils.write_csv(area_uris)
def main(): records = read_csv(input_fname) recordss = hashing(records, 'ICUSTAY_ID') print('#icu_stays: %d' % len(recordss)) recordss = _excluding(recordss) print('#icu_stays: %d (excluding stays having <= %d records)'\ %(len(recordss), 6)) _sorting(recordss, 'TIMESTAMP') recordss_avg = {} #for key, records in recordss.items(): for i, key in enumerate(sorted(recordss.keys())): records = recordss[key] records_avg = _averaging(records) recordss_avg[key] = records_avg print('%d/%d' % (i, len(recordss.keys()))) recordss_csv = [] for key in sorted(recordss_avg.keys()): records = recordss_avg[key] recordss_csv.extend(records) write_csv(output_fname, INFO_HEADER + VITAL_SIGNS_HEADER + \ LAB_HEADER, recordss_csv)
def export_sts_format(self, path): content = [] for i in range(self.size): source, target = self.feature_set[i][0], self.feature_set[i][1] stance = self.label_set[i] idx = i if self.idxs is None else self.idxs[i] content.append([idx, "none", "none", "none", "none", "none", "none", source, target, stance]) utils.write_csv(content, STS_HEADER, path, delimiter="\t")
def data_venn(WD, model, name): """Function to gather the reactions in a model and write them into a CSV file. Used to make VENN graph""" list_id = [] for reac in model.reactions: list_id.append([reac.id]) utils.write_csv(WD, list_id, name + "_id_reac")
def __init__(self): self.designers = [ s.strip() for s in open(os.path.join(DATA_DIR, FILE_DESIGNERS)) if s.strip() ] header = [[ 'Name', 'Username', 'Profile', 'Shots', 'Projects', 'Followers', 'Likes', 'Tags', 'Email', 'Website', 'Instagram', 'Facebook', 'Twitter', 'Linkedin', 'Vimeo' ]] write_csv(header, DATA_DIR, CSV_DESIGNERS)
def save_relatives(list_name, relatives_list): relative_list = [[] for x in list_name] for i, item in enumerate(relatives_list): name = list_name[i] for j in item: if (name != j[0]): relative_list[i].append(j[0]) if (name != j[1]): relative_list[i].append(j[1]) utils.write_csv(['症状', '亲友团'], 'data/relatives.csv', list_name, relative_list)
def export_full(self, path, feature_set=None, label_set=None, idxs=None, delimiter=","): feature_set = feature_set if feature_set is not None else self.feature_set label_set = label_set if label_set is not None else self.label_set idxs = idxs if idxs is not None else self.idxs assert len(feature_set) == len(label_set) content = [] for i in range(len(feature_set)): source, target = feature_set[i][0], feature_set[i][1] stance = label_set[i] idx = i if idxs is None else idxs[i] content.append([idx, source, target, stance]) utils.write_csv(content, HEADER, path, delimiter)
def help_treshold(WD, ini): """Function to help choosing the treshold value, personal use only, to combine with the R script 'tresholdSearch.R'""" listValues = [] for i in range(101): test = 10 * i ###Putting the test values instead of default values draft = blasting.pipeline_blast(ini, 0, 100, 1, 0, test) listValues.append([test, len(draft.genes), len(draft.reactions)]) listValues.insert(0, ["Bit_Score", "Nb genes", "Nb reactions"]) utils.write_csv(WD, listValues, "Treshold")
def run(self): """ Scrape data and report any changes. """ if self.soup is not None: jobs, jobs_html, colnames = self.scrape() # look for differences between URL and CSV and report them (print and/or email) if os.path.isfile(self.filepath): self.report_changes(jobs, jobs_html, colnames) # update the CSV file utils.write_csv(self.filepath, jobs, colnames)
def get_routes(): url = 'https://www.vietjetair.com/AirportList.aspx?lang=zh-CN' res = requests.get(url, timeout=30) data = json.loads(res.text) pairs = data.get('Pair') routes = [] for pair in pairs: dep = pair.get('DepartureAirport').get('Code') arrs = pair.get('ArrivalAirports').get('Airport') for port in arrs: arr = port.get('Code') routes.append([dep, arr]) utils.write_csv('all_route', 'VJ', routes)
def main(): refined = [] records = read_csv(input_fname) for record in records: _refined_record = _refiner(record) _vital_record = _subset_extractor(_refined_record, VITAL_SIGNS_HEADER) _info_record = _subset_extractor(_refined_record, INFO_HEADER) _lab_record = _subset_extractor(_refined_record, LAB_HEADER) refined.append({**_vital_record, **_info_record, **_lab_record}) write_csv(output_fname, INFO_HEADER + VITAL_SIGNS_HEADER \ + LAB_HEADER, refined)
def convert_to_sst_format(input_path): file_name = ntpath.basename(input_path) output_path = os.path.join(os.path.dirname(input_path), "{}.sst".format(file_name)) sst_data = [] sst_header = ["sentence", "label"] sentiment_map = { "negative": 0, "neutral": 1 } sentiment_data = utils.read_csv(input_path, load_header=True, delimiter="\t") for example in sentiment_data: print(example) sst_data.append([example[1], sentiment_map[example[2]]]) utils.write_csv(sst_data, sst_header, output_path, delimiter="\t")
def make_protein_corres(WD, name, dicoRegions): """Function to create a csv file with the correspondence between a protein and the associated gene. ARGS: WD (str) -- the path of the working directory to save the file. name (str) -- the name of the model being treated. dicoRegions -- the dictionary that contains the information needed (see pipelinePT() for the structure). """ corres = [] for region in dicoRegions.keys(): for gene in dicoRegions[region].keys(): for protein in dicoRegions[region][gene]["Proteins"].keys(): corres.append([gene.upper(), protein.upper()]) utils.write_csv(WD, corres, "protein_corres_" + name, "\t")
def main(): records = read_csv(input_fname) # first get statistics neg_values = {} for record in records: if record['case/control'] == 'case': continue for name in HEADER: val = record[name] if val != 'NA': neg_values[name] = neg_values.get(name, list()) neg_values[name].append(float(val)) print('negative mean/std') for name in HEADER: print('{:40s} | mean: {:.3f}, std: {:.3f}'.format( name, np.mean(neg_values[name]), np.std(neg_values[name]))) # and then imputing values with carry forward in the middle & negative mean with no value # multiproc needed recordss = hashing(records, 'ICUSTAY_ID') cnt = 0 for key, records in recordss.items(): print('processing {}... {} / {}'.format(key, cnt, len(recordss))) cnt += 1 records = sorted(records, key=itemgetter('TIMESTAMP')) # set intitial value prev_val = copy.copy(records[0]) for key in HEADER: if prev_val[key] == 'NA': prev_val[key] = np.mean(neg_values[key]) for record in records: for key in HEADER: val = record[key] if val == 'NA': record[key] = prev_val[key] else: prev_val[key] = val recordss_csv = [] for key in sorted(recordss.keys()): records = recordss[key] recordss_csv.extend(records) write_csv(output_fname, INFO_HEADER + HEADER, recordss_csv)
def main(): result = np.load(os.path.join(result_dir, 'result.npy')) pig_list = np.load(os.path.join(test_name_dir, 'test_A_name_resnet50.npy')) row_result = 0 # the row of result handled now all_result = [] # save all the handled date for pig in result: for predict in range(30): tmp_array = [pig_list[row_result][0], predict + 1, pig[predict]] tmp_array = u.str2num(tmp_array) all_result.append(tmp_array) row_result += 1 output_dir = os.path.join(result_dir, 'tidy_out.csv') u.write_csv(all_result, output_dir) print(all_result)
def main(): if shutil.which('textract') is None: sys.exit('Textract not found, halting execution...') dh_metadata = get_metadata(config.DH_FOLDER_PATH, Report.dh) ciop_metadata = get_metadata(config.CIOP_FOLDER_PATH, Report.ciop) utils.write_xml(dh_metadata) utils.write_xml(ciop_metadata) if config.SUMMARY_FOLDER_PATH: dh_summary_path = f'{config.SUMMARY_FOLDER_PATH}/dh_summary.csv' utils.write_csv(dh_summary_path, dh_metadata, dh_metadata[0].keys()) ciop_summary_path = f'{config.SUMMARY_FOLDER_PATH}/ciop_summary.csv' utils.write_csv(ciop_summary_path, ciop_metadata, ciop_metadata[0].keys())
def generate_dataset(element_list, generate_target, rows, write): ''' the intent of this function is to combine two elements like symptoms & conditions into a data set, with the goal of exploring all possible prediction relationships between variable sets the generate_target is the name of the element youre trying to predict ('conditions') this will require that you already generated the rows for the other element names in element_list right now youre still generating all metadata on every run, but youll need to add filtering by the metadata_keys parameter ''' dataset = [] filename = ''.join([s[0] for s in element_list ]) if len(element_list) > 3 else '_'.join(element_list) element_path = ''.join(['datasets/', filename, '.csv']) #make sure target variable isnt included in index set except in last position for r in rows: row = {} for field in r: if field in element_list: row[field] = ','.join(r[field]) if type( r[field]) == set else r[field] dataset.add(row) if len(dataset) > 0: if write: wrote = write_csv(rows, element_list, element_path) if wrote: return element_path return False
def export_scores(self, heat_id=None, n_best_waves=CherrypyWebInterface.N_BEST_WAVES, mode=None): heat_id = int(heat_id) heat_info = self.collect_heat_info(heat_id) id2color =self._get_id2color(heat_info.get('participants', [])) judges = set(heat_info.get('judges', [])) scores_by_surfer_wave = self._get_scores(heat_id, judges) average_scores = score_processing.compute_average_scores(scores_by_surfer_wave, judges) best_scores_average = score_processing.compute_best_waves(average_scores, n_best_waves) sorted_total_scores = score_processing.compute_places_total_scores(average_scores, n_best_waves) all_scores, best_scores_by_judge = score_processing.compute_by_judge_scores(scores_by_surfer_wave, n_best_waves) export_data = self._collect_export_data(all_scores, average_scores, best_scores_by_judge, best_scores_average, sorted_total_scores, heat_info, id2color, n_best_waves) heat_name = u'{} {} {}'.format(heat_info['tournament_name'], heat_info['category_name'], heat_info['heat_name']) directory = 'exports' if not os.path.exists(directory): os.makedirs(directory) filename = None if mode == 'judge_sheet': filename = u'export_{}_heat_sheet.csv'.format(heat_name) elif mode == 'best_waves': filename = u'export_{}_best_judge_waves.csv'.format(heat_name) elif mode == 'averaged_scores': filename = u'export_{}_best_average_waves.csv'.format(heat_name) if filename is not None: utils.write_csv(os.path.join(directory, filename), export_data[mode]['data'], export_data[mode]['header']) filename = os.path.abspath(os.path.join(directory, u'Auswertung_{}.xlsx'.format(heat_name))) utils.write_xlsx(filename, export_data) from cherrypy.lib.static import serve_file return serve_file(filename, "application/x-download", "attachment")
def start(self): self.status.set("Estado: -") self.leng.set("Iteración: -/- y Número: -/-") self.totaltime.set("Tiempo total: -") self.ones.set("Total de unos: -") self.types.set("Progreso: -") self.startButton.config(state = 'disabled') self.browseButton.config(state = 'disabled') self.cancelButton.config(state = 'normal') self.maxnumberSpinbox.config(state = 'disabled') self.complexSpinbox.config(state = 'disabled') if int(self.complexSpinbox.get()) in (1,2,3,4,5) and int(self.maxnumberSpinbox.get()) in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21) and self.completeName != "": start_time = time.time() if self.name.get().split('.')[1] == 'csv': self.g = generator.Generator(self.maxnumberSpinbox.get(), self.complexSpinbox.get(), utils.read_csv(self.completeName), self.cancelButton, self.types) else: self.g = generator.Generator(self.maxnumberSpinbox.get(), self.complexSpinbox.get(), utils.read_json(self.completeName), self.cancelButton, self.types) self.g.count_one() self.ones.set("Total de unos: {0}".format(len(self.g.table_uno))) i = 0 while self.g.maxim > 1: i += 1 self.leng.set("Iteración: {0}/{1} y Número: {2}".format(i, self.complexSpinbox.get(), self.g.maxim)) self.status.set("Estado: Generando puzzle...") self.g.step_one() tim = utils.sec_to(int(time.time() - start_time)) self.totaltime.set("Tiempo total: {0}h:{1}m:{2}s".format(tim[0], tim[1], tim[2])) self.status.set("Estado: Aplicando condición uno...") self.g.cond_dos(1) tim = utils.sec_to(int(time.time() - start_time)) self.totaltime.set("Tiempo total: {0}h:{1}m:{2}s".format(tim[0], tim[1], tim[2])) self.status.set("Estado: Aplicando condición dos...") self.g.cond_dos(2) tim = utils.sec_to(int(time.time() - start_time)) self.totaltime.set("Tiempo total: {0}h:{1}m:{2}s".format(tim[0], tim[1], tim[2])) if self.g.maxim >= 4: self.status.set("Estado: Aplicando condición tres...") self.g.cond_dos(3) tim = utils.sec_to(int(time.time() - start_time)) self.totaltime.set("Tiempo total: {0}h:{1}m:{2}s".format(tim[0], tim[1], tim[2])) self.g.count_one() self.ones.set("Total de unos: {0}".format(len(self.g.table_uno))) if i == self.g.iters: self.g.maxim -= 1 i = 0 if self.name.get().split('.')[1] == 'csv': utils.write_csv(self.g.table_all) else: utils.write_json(self.g.table_all) if self.g.cancel: self.status.set("Estado: Cancelado") else: self.status.set("Estado: Completado") self.g = None self.startButton.config(state = 'normal') self.browseButton.config(state = 'normal') self.cancelButton.config(state = 'disabled') self.maxnumberSpinbox.config(state = 'normal') self.complexSpinbox.config(state = 'normal')
from django.conf import settings settings.configure() import django django.setup() from rest_framework import serializers as rf_serializers from utils import write_csv import marshmallow import serpy class SimpleRF(rf_serializers.Serializer): foo = rf_serializers.ReadOnlyField() class SimpleM(marshmallow.Schema): foo = marshmallow.fields.Str() class SimpleS(serpy.Serializer): foo = serpy.Field() if __name__ == '__main__': data = {'foo': 'bar'} write_csv(__file__, data, SimpleRF, SimpleM().dump, SimpleS, 100)
import sys import os import docreader import utils statdir = sys.argv[1] taggeddir = sys.argv[2] outfile = sys.argv[3] taggedfiles = os.listdir(taggeddir) # create bigdocs header = ['label', 'tweet_id', 'author_id', 'date', 'time', 'authorname', 'text', 'tagged'] bigdocs = [header] for taggedfile in taggedfiles: label = taggedfile.split('_')[1][:-4] with open(statdir + 'sequence_' + label + '.txt', 'r', encoding = 'utf-8') as stat_in: lines = stat_in.read().split('\n') event_info = lines[0] date = event_info.split('\t')[1] reader = docreader.Docreader() lines = reader.parse_csv(taggeddir + taggedfile) tagged = '\n'.join([x[-1] for x in lines[1:]]) text = ' '.join([x[-2] for x in lines[1:]]) bigdoc = [label, '-', '-', 'date', '-', '-', text, tagged] bigdocs.append(bigdoc) utils.write_csv(bigdocs, outfile)
date = catdict['date'] if 'date' in catdict.keys() else False time = catdict['time'] if 'time' in catdict.keys() else False sepdict = {'tab' : '\t', 'space' : ' '} delimiter = sepdict[dp['separator']] if 'separator' in dp.keys() else False header = dp.getboolean('header') reader = docreader.Docreader() reader.parse_doc(doc, delimiter, header, date, time) new_lines, other_lines = reader.set_lines(fields, columndict) csv_doc = doc[:-4] + '_standard.csv' if new_lines[1][-1] != '-': new_lines_tags = [] for line in new_lines: t = line[-1] line[-1] = '\n'.join(['\t'.join(x.split('|')) for x in t.split(' ')]) new_lines_tags.append(line) utils.write_csv(new_lines_tags, csv_doc) else: utils.write_csv(new_lines, csv_doc) doc = csv_doc if other_lines: if len(other_lines) > 0: meta_doc = doc[:-4] + '_meta.csv' utils.write_csv(other_lines, meta_doc) ##### Tagging data ##### if dp.getboolean('tag'): tagged_csv = doc[:-4] + '_tagged.csv' if dp['tagger'] == 'frog': os_string = 'python3 frog_data.py ' + doc + ' ' + tagged_csv + ' ' if dp.getboolean('tweets'): os_string += '1' else:
def get_x(self, obj): return obj.x + 10 class ComplexS(serpy.Serializer): foo = serpy.StrField() bar = serpy.IntField(call=True) sub = SubS() subs = SubS(many=True) if __name__ == '__main__': data = { 'foo': 'bar', 'bar': lambda: 5, 'sub': { 'w': 1000, 'x': 20, 'y': 'hello', 'z': 10 }, 'subs': [{ 'w': 1000 * i, 'x': 20 * i, 'y': 'hello' * i, 'z': 10 * i } for i in range(10)] } write_csv(__file__, data, ComplexRF, ComplexM().dump, ComplexS, 1)
import docreader import utils eventdir = sys.argv[1] all_files = os.listdir(eventdir) txt_files = [x for x in all_files if x[-4:] == '.txt'] csv_files = [x for x in all_files if x[-4:] == '.csv'] txt_stripped = [x[:-4] for x in txt_files] csv_stripped = [x[:-4] for x in csv_files] txt_unique = list(set(txt_stripped) - set(csv_stripped)) fields = ['label', 'doc_id', 'author_id', 'date', 'time', 'authorname', 'text', 'tagged'] columndict = {0 : 'doc_id', 1 : 'author_id', 2 : 'authorname', 3 : 'date', 4 : 'time', 5 : 'text'} while len(txt_unique) > 0: eventfile = txt_unique[0] print(eventfile) reader = docreader.Docreader() reader.parse_doc(eventdir + eventfile + '.txt', '\t', False, False, False) new_lines, other_lines = reader.set_lines(fields, columndict) csv_doc = eventdir + eventfile + '.csv' utils.write_csv(new_lines, csv_doc) all_files = os.listdir(eventdir) txt_files = [x for x in all_files if x[-4:] == '.txt'] csv_files = [x for x in all_files if x[-4:] == '.csv'] txt_stripped = [x[:-4] for x in txt_files] csv_stripped = [x[:-4] for x in csv_files] txt_unique = list(set(txt_stripped) - set(csv_stripped))