def fromCSV(TimeData,dataPath1,dataPath2, dataName): """Prepares data of survival over time for model definition. Initialize from a csv file using class method fromCSV. Input: - dataPath1, dataPath2 (str): paths to csv files corresponding to the survival over time of each group. The csv files should have the days of observations in each column, starting from 0 (which will indicate how many hosts were challenged in each dose) in second column (the first column will have the doses). The last column will indicate the last day of observation and the number of hosts which survived each of the challenges. The results from each of the challenge doses should be put in each line, starting from the second line (the first line will have the days of observation). The doses and final day of observation should be the same for both groups. - dataName (str): a text string which should have no spaces and should be descriptive of the data. Will be used to name folder and files of saved results (ex: 'survWolb2012'). Returns a Data object. """ (timesDeath1,survivors1,tmax1,times1,doses1,ndoses1,nhosts1)=ut.readcsv(dataPath1) (timesDeath2,survivors2,tmax2,times2,doses2,ndoses2,nhosts2)=ut.readcsv(dataPath2) if ~((tmax1==tmax2)&(sum(times1==times2)==len(times1))): raise DataError("Times of observation not the same in two datasets, please check the data in %s and %s"%(dataPath1,dataPath2)) if ~((ndoses1==ndoses2)&(sum(doses1==doses2)==ndoses1)): raise DataError("Doses not the same in two datasets, please check the data in %s and %s"%(dataPath1,dataPath2)) return TimeData(timesDeath1,timesDeath2,survivors1,survivors2,nhosts1,nhosts2,tmax1,times1,doses1,ndoses1,dataName,dataPath1,dataPath2)
def generate_report(date): script_path = os.path.dirname(os.path.abspath(__file__)) csv_file = f'watch-{date}.csv' if not os.path.exists(os.path.join(script_path, 'csv_data', csv_file)): return False shutil.copy2(os.path.join(script_path, 'csv_data', csv_file), os.path.join(script_path, 'web')) report_date = datetime.strptime(date, '%m-%d-%Y').strftime("%A %-d %B %Y") generation_date = datetime.now().strftime('%d %B %Y %H:%M') csv_data = readcsv(os.path.join(script_path, 'csv_data', csv_file)) colors_data = readjson(os.path.join(script_path, 'web', 'colors.json')) json_data = json.dumps({ 'report_date': report_date, 'generation_date': generation_date, 'csv_file': csv_file, 'csv_data': csv_data, 'colors_data': colors_data }) rendered = _render_template(json_data) report_filename = os.path.join(script_path, 'web', f'report-{date}.html') with open(os.path.join(report_filename), 'w') as f: f.write(rendered) return report_filename
def load_eval_data(data_path, w2v_path, w2v_dim, sequence_length, num_classes, labels): print("[LOADING] data: ", data_path) data = utils.readcsv(data_path) tagged_sents = [] e1_list = [] e2_list = [] y_list = [] for line in data: tagged_s, e1, e2 = utils.pos_tag(line[0]) tagged_sents.append(tagged_s) e1_list.append(e1) e2_list.append(e2) one_hot_label = one_hot_encodding(labels.index(line[1]), num_classes) y_list.append(one_hot_label) bow, embed = const_bow(tagged_sents, w2v_path, w2v_dim) sentences = [] en1_pos = [] en2_pos = [] for i in range(len(tagged_sents)): pos_tagged_sent = tagged_sents[i] e1 = e1_list[i] e2 = e2_list[i] tmp_sent = [] tmp_pos1 = [] tmp_pos2 = [] for idx, token in enumerate(pos_tagged_sent): if token in bow: tmp_sent.append(bow[token]) tmp_pos1.append(pos_embed(e1 - idx)) tmp_pos2.append(pos_embed(e2 - idx)) sent_len = len(pos_tagged_sent) while len(tmp_sent) != sequence_length: tmp_sent.append(bow["<zero>"]) tmp_pos1.append(122) tmp_pos2.append(122) sent_len += 1 sentences.append(tmp_sent) en1_pos.append(tmp_pos1) en2_pos.append(tmp_pos2) return sentences, en1_pos, en2_pos, y_list, bow, embed
def parse(path, label, output=None, recursive=None, clean=False): """ Read csv to extract and parse html from single or multiple files and write results to .txt. @params: path - Required : file or directory to parse (Str) label - Required : csv variable name in which html is stored output - Optional : output filename (use only if parsing single files) (Str) recursive - Optional : recursive execution for directories (Bool) clean - Optional : preprocessing of the input with utils.clean (Bool) """ output = output if output else path + '.html' if recursive: files = [f for f in os.listdir(path) if not f.startswith('.')] files = [f for f in files if f.endswith('.csv')] for index, filename in enumerate(files): parse(path + filename, label, None, False, clean) print 'Parsed document ' + str(index + 1) + ' of ' + str( len(files)) else: utils.progressbar_update(0, path) read_size = 0 html_file = open(output, 'w+') for index, row in enumerate(utils.readcsv(path)): read_size += row['size'] row = row[label] regex = re.compile(ur'[^\x00-\x7F]+', re.UNICODE) row = re.sub(regex, '', row) row = re.sub('[\n\t\r]', '', row) row = re.sub('<article', '\n<article', row) html_file.write(row) utils.progressbar_update(read_size, path) html_file.close() html_parse.parse(path=output, output=None, recursive=False, clean=clean)
def load_agent_test_data(data_path, sequence_length, num_classes, vocab_, labels, entity2_dict): print("[LOADING] data: ", data_path) data = utils.readcsv(data_path) sentences = [] en1_pos = [] en2_pos = [] y_list = [] entities = [] for line in tqdm(data): sent = line[0] entity1 = re.findall("<e1>.*?</e1>", sent)[0] entity2 = re.findall("<e2>.*?</e2>", sent)[0] entity1 = entity1.replace("<e1>", "").replace("</e1>", "") entity2 = entity2.replace("<e2>", "").replace("</e2>", "") one_hot_label = one_hot_encodding(labels.index(line[1]), num_classes) pos_tagged_sent, e1, e2 = utils.pos_tag(sent) tmp_sent = [] tmp_pos1 = [] tmp_pos2 = [] for idx, token in enumerate(pos_tagged_sent): if token in vocab_: tmp_sent.append(vocab_[token]) tmp_pos1.append(pos_embed(e1 - idx)) tmp_pos2.append(pos_embed(e2 - idx)) sent_len = len(pos_tagged_sent) while len(tmp_sent) != sequence_length: tmp_sent.append(vocab_["<zero>"]) tmp_pos1.append(122) tmp_pos2.append(122) sent_len += 1 sentences.append(tmp_sent) en1_pos.append(tmp_pos1) en2_pos.append(tmp_pos2) y_list.append(one_hot_label) entity_pair = (entity1, entity2) entities.append(entity_pair) Bags = constBag(entities, y_list, entity2_dict) return sentences, en1_pos, en2_pos, y_list, Bags
predicted_y = [] probs=[] for batch in tqdm(range(total_batch_size)): st = batch*batch_size en = min((batch+1)*batch_size,total_data_size) batch_sents = sents_list[st:en] batch_y = y_list[st:en] batch_pos1 = en1_position_list[st:en] batch_pos2 = en2_position_list[st:en] feed_dict = { cnn.input_text: batch_sents, cnn.input_y: batch_y, cnn.pos1: batch_pos1, cnn.pos2: batch_pos2 } prediction,probabilities = sess.run([cnn.prediction,cnn.probabilities], feed_dict=feed_dict) for i in range(len(batch_sents)): predicted_y.append(properties_list[prediction[i]]) probs.append(probabilities[i][prediction[i]]) result=[] eval_data = utils.readcsv(test_data) for i,line in enumerate(eval_data): w = [line[0],predicted_y[i],line[2],line[3],line[4],probs[i]] result.append(w) utils.writecsv(output_path,result)
plugload_name = sys.argv[0]; state_fname = sys.argv[1]; postgresops.check_evil(plugload_name); plugload_row = devicedb.get_devicemetas(where="key='PLUGLOAD' and value='%s'"%plugload_name,limit=1); if ( len(plugload_row)==0 ): print "Cannot find plugload metadata for "+plugload_name sys.exit(1) plugload_id = plugload_row[0].ID else: print "Not enough (or too many ) input arguments"; sys.exit(1); if dtype=="GHMM": data = utils.readcsv(state_fname); if ( len(data) == 0 ): print "No data in file"; sys.exit(1); elif len(data[0]) != 4: print "GHMM data file must be CSV with format \"state_no,counts,mean,variance\"" sys.exit(1); print "Read %d states"%len(data); if not pretend: import learningdb learningdb.connect() learningdb.initdb(); learningdb.insertHMMGaussianEmissions(plugload_id,fromtime,totime,[i[0] for i in data],[i[1] for i in data],[i[2] for i in data],[i[3] for i in data]);
for g_line, p_line in zip(gold_data, pred_data): rel = g_line[1] if rel in dict_data: dict_data[rel]["gold_data"].append(g_line[-1]) dict_data[rel]["pred_data"].append(p_line[-1]) else: dict_data[rel] = {} dict_data[rel]["gold_data"] = [g_line[-1]] dict_data[rel]["pred_data"] = [p_line[-1]] return dict_data goldfile = "../data/ko/agent_test.csv" predfile = "../result/agent_target_addBOTH_44_result.csv" gold_data = utils.readcsv(goldfile) pred_data = utils.readcsv(predfile) gold_tags = exTag(gold_data) pred_tags = exTag(pred_data) print(classification_report(gold_tags, pred_tags, digits=4)) macro_p, macro_r, macro_f1, _ = precision_recall_fscore_support( gold_tags, pred_tags, average="macro") micro_p, micro_r, micro_f1, _ = precision_recall_fscore_support( gold_tags, pred_tags, average="micro") acc = accuracy_score(gold_tags, pred_tags) print("Macro: {:.4f}\t{:.4f}\t{:.4f}".format(macro_p, macro_r, macro_f1)) print("Micro: {:.4f}\t{:.4f}\t{:.4f}".format(micro_p, micro_r, micro_f1)) print("acc: {:.4f}".format(acc)) dict_data = rel_data(gold_data, pred_data)
import numpy as np import os import sys cwd = os.getcwd() utilspath = cwd + '/../utils/' sys.path.append(utilspath) import utils basefolder = cwd + '/../../data/filter/' #first get the LNA gain: datafolder2 = '../../data/gain2/' snarray = ['022966','022967','022968','022969','022970','023230','023231','023232'] lnagain = [] for sn in snarray: fgainname = datafolder2 + 'wenteqS21'+ sn + '.csv' data = utils.readcsv(fgainname) lnagain.append(data) # get the on board filter data (CBP-B1230C+_Plus25DegC.s2p) fref = 1400 #MHz freq to be normalized to (because resistor calibration was done at this frequency) fname = '/CBP-B1230C+_Plus25DegC.s2p' boardfilter = utils.readminicircuitdata(basefolder + fname) # (return [freq,gain]) attenatfref = np.interp([fref],boardfilter[0],boardfilter[1]) boardfilter[1] = boardfilter[1] - attenatfref # normalize the gain to the reference frequency #get the biasT filter from data measbiast = utils.getjacquesmeas(basefolder+'/scope_42.csv',basefolder+'/scope_43.csv') #get the cable loss from data meascable = utils.getjacquesmeas(basefolder+'/scope_43.csv',basefolder+'/scope_44.csv')