def test_Should_ReturnIteratorOverNamedTupleRows_When_Called(self): with mock.patch('read_csv.get_file_lines', mock.MagicMock()) as m_get_file_lines: m_get_file_lines.return_value = ['id,name', '1,John'] result = list(read_csv('myfile')) Row = namedtuple('Row', 'id,name') expected = [Row('1', 'John')] self.assertEqual(expected, result)
def main(): print(constants.WELCOME_MESSAGE) students = read_csv.read_csv(sys.argv[1]) print(students) [ workspaces.get_workspaces_details(student, str(sys.argv[2])) for student in students ]
def load_email_data(build_data, extension, stemmer='PorterStemmer', vectorizer='TfidfVectorizer', num_features = None, labels=np.array([])): train_data = read_csv(data_directory + 'request_info_train.txt') test_data = read_csv(data_directory + 'request_info_test.txt') train_length = len(train_data) all_data = train_data + test_data if build_data in (3, 4): print "Loading Email Text Data" #Get Subjects for Text Analysis body_features = buildEmailText(np.array(all_data), stemmer=stemmer, vectorizer=vectorizer, num_features = num_features) f = open('pickle_data/body_features_' + extension + '_' + stemmer + '.pkl', 'w') pickle.dump(body_features, f) f.close() else: print "Pickling Email Text Features" f = open('pickle_data/body_features_' + extension + '_' + stemmer + '.pkl', 'r') body_features = pickle.load(f) f.close() if labels.any(): chi2 = fs.chi2(body_features, labels) strong = [] weak = [] for i, p in enumerate(chi2[1]): if p < 0.1: weak.append(i) if p < .05: strong.append(i) strongFeatures = body_features[:,strong] weakFeatures = body_features[:,weak] f = open('pickle_data/body_features_' + '_chi2_strong_' + extension + '_' + stemmer + '.pkl', 'w') pickle.dump(strongFeatures, f) f.close() f = open('pickle_data/body_features_' + '_chi2_weak_' + extension + '_' + stemmer + '.pkl', 'w') pickle.dump(weakFeatures, f) f.close() print "Text Data Returned" train = body_features[:train_length] test = body_features[train_length:] return train, test
def setUp(self): self.chessPlayers = [] self.playerA = Chessplayer('Corr', 'Fiona', 'Fiona Corr', 'USA', '14/07/1998', 'n/a') self.playerB = Chessplayer('Patterson', 'Peter', 'Petter Patterson', 'UK', '21/05/1957', 'n/a') self.playerC = Chessplayer('Ramsay', 'Gordon', 'Gordon Ramsey', 'UK', '03/02/1977', 'n/a') self.chessPlayers.append(self.playerA) self.chessPlayers.append(self.playerB) self.chessPlayers.append(self.playerC) self.players = read_csv('chess-players.csv')
def main(): period = 90 price = read_csv("./data/7267_2018.csv") bband = algo.algo.bband(price['Close'], 9, 2) _, *ax = plt.subplots(nrows=2, figsize=(15, 8), sharex=True) candlestick(ax[0][0], price, periods=period) bband_plot(ax[0][0], bband, periods=period) show_hv(ax[0][1], price, periods=period) plt.show()
def handle_foreign(self): seeds = [] pp = read_csv() pp.read() field = pp.getField() url = pp.getUrl() rows = pp.getrows() for i in url: seed_obj = seed(i) seeds.append(seed_obj) for i in seeds: print(i) print(i.URL)
def open_file(*args): """ Opens a .csv-File and returns headers in a list """ tmp_filename = filedialog.askopenfilename() filename.set(tmp_filename) tmp_header = read_csv.read_csv(tmp_filename) tmp_header_new = checkheader(tmp_header) header.set("") result.set("") column = ttk.Combobox(mainframe, textvariable=header, values=tmp_header_new, state="readonly").grid(column=2, row=2, sticky=(W, E)) # enable or disable options if header is empty if tmp_header_new: create_options(True) else: create_options(False)
def geojson(viewid, address_columns, description_columns): csv = read_csv(os.path.join(ROWS_DIR, viewid + '.csv')) for row in csv: latlng_column = find_latlng_column(row) street_column, zipcode_column = address_columns if latlng_column: coords = get_lnglat(row[latlng_column]) elif not street_column and annoying_get(row, zipcode_column): address = 'New York, NY, %s' % annoying_get(row, zipcode_column) coords = geocode(address) elif not zipcode_column and annoying_get(row, street_column): address = '%s, New York, NY' % annoying_get(row, street_column) coords = geocode(address) elif annoying_get(row, street_column) and annoying_get( row, zipcode_column): street = annoying_get(row, street_column) zipcode = annoying_get(row, zipcode_column) if zipcode in street: address = '%s, New York, NY' % street else: address = '%s, New York, NY, %s' % (street, zipcode) coords = geocode(address) else: coords = random_lnglat() description = ',\n'.join( filter(None, [row.get(a, '') for a in description_columns])) # Skip addresses that could not be geocoded. if coords: lng, lat = coords yield { "type": "Feature", "properties": { "popupContent": description, }, "geometry": { "type": "Point", "coordinates": [lng, lat], } }
def test(): file = "ex1data2.txt" data = read_csv(file) X = data['X'] y = data['y'] fig, axes = plt.subplots(1, 2) axes[0].set_xlabel('Square Footage') axes[0].set_ylabel('Price') axes[0].set_title('Price by Size') axes[0].plot(X[:,0], y, 'bo') axes[1].set_xlabel('Number of Bedrooms') axes[1].set_ylabel('Price') axes[1].set_title('Price by Bedrooms') axes[1].plot(X[:,1], y, 'ro') fig.tight_layout() plt.savefig("fig1.ps")
def write_sale(): """ Write sale object from csv """ sale_reader = read_csv('ventas') sale_writer = model() sale_writer.unlink_sale() for s in sale_reader: sql = """ INSERT INTO SCO$TCBFC_SGMA (FCSG_TPDC, FCSG_SERIEFC, FCSG_NROFAC, FCSG_MTIN, FCSG_CODUNI, FCSG_CTDDOC, FCSG_PRCUNI, FCSG_MTOREN, FCSG_MTODTO1, FCSG_PSPDCTO, FCSG_MTOOBJR, FCSG_MTOIMP, FCSG_FECFACT, FCSG_NRORUC, FCSG_CODCLIE, FCSG_PSPNRO,FCSG_COPA, FCSG_UOCI, FCSG_CTRL) VALUES (?,? ,? ,? ,? ,? ,? ,? ,? ,? ,? ,? ,? ,?,? ,? ,?,?,? ); """ sale_writer.write_sale(sql, s) sale_writer.validate_sales() sale_writer.read_validation_log()
def main(): algorithm_chosen = choose_algorithm() map_chosen = choose_map() mapa, start, end = read_csv(map_chosen) path = [] num_visited_nodes = 0 start_time = time.time() if algorithm_chosen == 1: path, num_visited_nodes = a_star_(mapa, start, end) elif algorithm_chosen == 2: path, num_visited_nodes = dijkstra(mapa, start, end) elif algorithm_chosen == 3: path, num_visited_nodes = best_first(mapa, start, end) end_time = time.time() print('\nAlgorithm has checked: %s nodes' % (num_visited_nodes)) print('\nPATH:') print(path) print('\nPATH has: %s nodes' % (len(path))) print("\n--- RUN TIME: ---") print("--- %s ms ---\n" % ((end_time - start_time) * 1000))
from create_output import create_output from read_csv import read_csv if __name__ == '__main__': parsed_by_date = read_csv('./testfiles/example.csv') create_output(parsed_by_date)
import subprocess, os import argparse import config from read_csv import read_csv from theory import * from plot import * def script(cmd): print cmd subprocess.call(cmd, shell=True) if __name__ == "__main__": read_csv("sample/601318.csv") sub_day_list = get_sub_day_list(s_date(year=2014, month=12, day=1), s_date(year=2015, month=4, day=1)) print sub_day_list #plot_main(sub_day_list) plot_candlestick(sub_day_list) if os.path.exists(config.output_path): script("rm -rf {0}".format(config.output_path)) os.makedirs(config.output_path) print "Please refer '{0}' for result.".format(config.output_path) #theory_up_down_days_count() #theory_pre_next_count() parser = argparse.ArgumentParser()
from read_csv import read_csv from operator import attrgetter from binary_search import find # setting variable to chess player's attribute by_lname = attrgetter('lname') arr = read_csv('chess-players.csv') arr.sort(key=by_lname) # Invoke find method from binary_search.py result = find(arr, value='Zhao', key=by_lname) if (result): print("Player found in the list") print( f'First Name: {result.fname}, Last Name: {result.lname}, Country: {result.country}, Born: {result.born}, ' f'Died: {result.died}') else: print("Not found Player")
def test_normal(self): self.assertEqual(read_csv.read_csv(file1_normal), self.header1) self.assertEqual(read_csv.read_csv(file2_normal), self.header1) self.assertEqual(read_csv.read_csv(file3_normal), self.header1) self.assertEqual(read_csv.read_csv(file4_normal), self.header1)
import numpy as np from feature_normalization import feature_normalization from read_csv import read_csv from compute_cost import compute_cost from gradient_descent import gradient_descent from predict_regression import predict_regression #data = read_csv("ex1data2.txt") data = read_csv("ex1data1.txt") X = data["X"] y = data["y"] #normalization flag normalize = False if normalize: #Normalize features and keep old X around for kicks X_old = X data_norm = feature_normalization(X) #Get our normalized features and stats X = data_norm["X"] X_mean = data_norm["X_mean"] X_std = data_norm["X_std"] m = X.shape[0] #Pad the data with 1's for the intercept term pad = np.matrix(np.ones(m)).T X = np.hstack([pad, X])
def test_read_csv(): assert read_csv('test1.csv') == [['name', 'age', 'eye colour'], ['Bob', '5', 'blue'], ['Mary', '27', 'brown'], ['Vij', '54', 'green']]
from a4 import header_map, select, row2dict, check_row, filter_table from read_csv import read_csv table = read_csv('test2.csv') def test_header_map_1(): hmap = header_map(table[0]) assert hmap == { 'UNITID': 0, 'INSTNM': 1, 'CITY': 2, 'STABBR': 3, 'ZIP': 4, 'SCH_DEG': 5 } def test_select_1(): assert select(table[:6], {'INSTNM', 'CITY', 'STABBR'}) == [ ['INSTNM', 'CITY', 'STABBR'], ['Alabama A & M University', 'Normal', 'AL'], ['University of Alabama at Birmingham', 'Birmingham', 'AL'], ['Amridge University', 'Montgomery', 'AL'], ['University of Alabama in Huntsville', 'Huntsville', 'AL'], ['Alabama State University', 'Montgomery', 'AL'] ] def test_row2dict(): hmap = header_map(table[0])
import matplotlib.pyplot as plt import time import string import operator from read_csv import read_csv # from read_csv import read_csv_to_numpy_array data_directory = '/Users/rjohnson/Documents/DS/DataScience/FinalProject/data/' # Get TRAIN the data request_data = read_csv(data_directory + 'request_info_train.txt') # Get Labels labels = [request[11] for request in request_data] #Get Subjects for Text Analysis subjects = [request[9] for request in request_data] start = time.time() # def bigrams(words): # wprev = None # for w in words: # yield (wprev, w) # wprev = w # yield (wprev, None) bigrams = True huck = {} for subject in subjects: line = subject.split() if not bigrams:
import matplotlib matplotlib.use('qt4agg') import matplotlib.pyplot as plt from mpl_toolkits.mplot3d.axes3d import Axes3D import numpy as np from compute_cost import compute_cost from read_csv import read_csv file = "ex1data1.txt" data = read_csv(file) X_dat = data['X'] y_dat = data['y'] ones = np.matrix(np.ones(X_dat.shape[0])).T X_dat = np.hstack([ones, X_dat]) X = np.linspace(-5, 1, 30) Y = np.linspace(-1, 2, 30) X, Y = np.meshgrid(X,Y) Z = [] Z_flat = [] for i in range(0, len(X)): Z.append([]) Z_flat.append([]) for j in range(0, len(Y)): Z[i].append(compute_cost(X_dat, y_dat, np.matrix( [X[i][j], Y[i][j]] ).T )) Z_flat[i].append(4.48339)
kp={} tag1="ancient" tag2="modern" data_all_csv_path="/home/masaya/research/test/all_test.csv" #data_all_csv_path="/home/masaya/research/real/AllFont.csv" tag1_csv_path="/home/masaya/research/test/ancient_test.csv" #tag1_csv_path="/home/masaya/research/real/ancient.csv" tag2_csv_path="/home/masaya/research/test/modern_test.csv" #tag2_csv_path="/home/masaya/research/real/modern.csv" # # # 全フォントのCSVファイルを読み込み,画像からSIFT特徴量を抽出し,クラスタリングする # # # #data_all = pd.read_csv(data_all_csv_path,header=None).values.tolist() #allfont_list=data_all[0] allfont_list=read_csv(data_all_csv_path) print(allfont_list) print(len(allfont_list)) features["all"]=np.empty((0,128)) for i in range(len(allfont_list)): Font_name=allfont_list[i] #全フォント画像のファイル名を獲得 path=os.path.join('/home/masaya/ダウンロード/dataset/fontimage', Font_name) path=path+'*.png' files = glob.glob(path) #それぞれの画像からSIFT記述子を計算 features[Font_name]=detectAndCompute(files) features["all"]=np.concatenate([features["all"],features[Font_name]])
def plot_Energy_vs_cores_all_freq(): #Loop all freqs for each number of active cores for i, cpu_freq in enumerate(cpu_frequencies): energy_inc = [] for j in range(1, num_physical_cores_total): var = 'line' + str(i) csv_path = 'freq_proc_csv/nodeperf_' + str(cpu_freq) + 'Ghz_' + str(num_physical_cores_total-j) + 'act_cores.csv' time, time_stamp, inst_per_cycle, instr_intensity, energy, energy_incremental, energy_SKT0, energy_SKT1 = read_csv.read_csv(csv_path) #Plot total_energy vs cores, for all frequencies var, = ax2.plot(i, energy_incremental[-1], '*', color = colors[i] , markersize=10, label= str(cpu_freq) + ' Ghz') energy_inc.append(energy_incremental[-1]) #Plot spline Energy vs cores x = range(1,num_physical_cores_total) tck = interpolate.splrep(x, energy_inc, s=0) xnew = np.arange(1,num_physical_cores_total,np.pi/150) ynew = interpolate.splev(xnew, tck, der=0) ax2.plot(xnew, ynew) #Add legends legends = [] legends.append(ax2.legend(loc=2, shadow=True, numpoints=1, bbox_to_anchor=(1.01, 1.0))) for legend in legends: #The frame is matplotlib.patches.Rectangle instance surrounding the legend. frame = legend.get_frame() frame.set_facecolor('0.90') # Set the fontsize for label in legend.get_texts(): label.set_fontsize('8') for label in legend.get_lines(): label.set_linewidth(1) # the legend line width
cnn_out_ch1 = sess.run('eval/rounded:0', feed_dict={'inputs/X:0': ch1}) cnn_out_ch2 = sess.run('eval/rounded:0', feed_dict={'inputs/X:0': ch2}) if len(allCNNOutput) == 0: allCNNOutput = cnn_out_ch1 allCNNOutput = np.append(allCNNOutput, cnn_out_ch2, axis=0) else: allCNNOutput = np.append(allCNNOutput, cnn_out_ch1, axis=0) allCNNOutput = np.append(allCNNOutput, cnn_out_ch2, axis=0) csv_dir_enc = os.fsencode(csv_dir_path) for file in sorted(os.listdir(csv_dir_enc)): filename = os.fsdecode(file) if not filename.endswith('.csv'): continue print(filename) expData = read_csv(os.path.join(csv_dir_path, filename)) if len(allExpectedOutput) == 0: allExpectedOutput = expData else: allExpectedOutput = np.append(allExpectedOutput, expData, axis=0) outBeforeProc = allCNNOutput.copy().reshape([-1]) for i in range(len(allCNNOutput)): eliminateLessThanChunks(allCNNOutput[i], 0, 9) eliminateLessThanChunks(allCNNOutput[i], 1, 5) outAfterProc = allCNNOutput.reshape([-1]) expOut = allExpectedOutput.reshape([-1]) print(outBeforeProc.shape) print(outAfterProc.shape) print(expOut.shape)
else: print("1. sorted distance matrix..") # memoize? dmat = parfor(dmat_row, idx) pickle.dump(dmat, open(pkl_f, 'wb')) print("2. density estimation..") rho() # density estimation print("rho", rho) print("3. recursive hillclimbing..") for i in idx: climb(i) from read_csv import read_csv, write_output data, class_label = read_csv(input_file) if scale_data: # scale data to [0, 1] min_x, max_x = copy.deepcopy(data[0]), copy.deepcopy(data[0]) for p in data: for k in range(0, len(data[0])): min_x[k] = p[k] if p[k] < min_x[k] else min_x[k] max_x[k] = p[k] if p[k] > max_x[k] else max_x[k] for i in range(0, len(data)): for k in range(0, len(data[0])): data[i][k] -= min_x[k] denom = (max_x[k] - min_x[k]) if denom != 0.: data[i][k] /= denom
obj = ET.SubElement(root, 'object') name = ET.SubElement(obj, 'name') name.text = 'car' bndbox = ET.SubElement(obj, 'bndbox') xmin = ET.SubElement(bndbox, 'xmin') xmin.text = str(int(bbox[0])) ymin = ET.SubElement(bndbox, 'ymin') ymin.text = str(int(bbox[1])) xmax = ET.SubElement(bndbox, 'xmax') xmax.text = str(int(bbox[0] + bbox[2])) ymax = ET.SubElement(bndbox, 'ymax') ymax.text = str(int(bbox[1] + bbox[3])) tree = minidom.parseString(ET.tostring(root)) xml_str = tree.toprettyxml() dom_string = '\n'.join([s for s in xml_str.splitlines() if s.strip()]) with open(os.path.join(filefolder, im.replace('jpg', 'xml')), 'w') as f: f.write(dom_string) if __name__ == '__main__': annos, num = read_csv('train_1w.csv') wrong_im = [] with open('wrong_dets.csv') as f: for line in f.readlines(): items = line.strip().split(',') if items[0].endswith('.jpg'): wrong_im.append(items[0]) for im in wrong_im: print(im) gen_xml(annos, im, 'train_1w')
def graficos(cant_dias_eval = 15, x = 14, y = 14, texto = "RSI(x)", archivo1 = "CLP900.txt", archivo2 = "CLP_Hoy.txt", porcentaje = 0.0, hist = 0, fecha = "01/01/2014", hora = "1000", figures = []): #dia es el dia que se desea r con datos anteriores en formato '01/01/1900' #es la hora final donde se desea r en formato string de '0901' hasta '1329' print "Comiezo pre-procesamiento" data = read_csv(archivo1) if hist == 0: data_hoy = read_csv(archivo2) else: data_hoy = read_csv("aa") indice_dia = 0 largo_data = len(data.data_close) for i in range(0, largo_data): if data.data_date[i][0] == fecha: indice_dia = i break indice = data.data_time[indice_dia].index(hora) data_hoy.data_open.append(data.data_open[indice_dia][0:indice]) data_hoy.data_close.append(data.data_close[indice_dia][0:indice]) data_hoy.data_high.append(data.data_high[indice_dia][0:indice]) data_hoy.data_low.append(data.data_low[indice_dia][0:indice]) data_hoy.data_date.append(data.data_date[indice_dia][0:indice]) data_hoy.data_time.append(data.data_time[indice_dia][0:indice]) data_hoy.max_hora = hora data_hoy.min_hora = data.min_hora largo = len(data.data_close) acciones_hoy = pre_procesamiento(data_hoy.data_close[0], data_hoy.data_time[0], data.min_hora, data_hoy.max_hora, data_hoy.data_date[0], x, y, texto) n_hora = len(acciones_hoy.acciones) acc = [] fitness = [] largo_dia = 0 for i in range(0, largo): acc_aux = pre_procesamiento(data.data_close[i], data.data_time[i], data.min_hora, data.max_hora, data.data_date[i], x, y, texto) if largo_dia < int(data.data_time[i][-1]): largo_dia = int(data.data_time[i][-1]) if (len(acc_aux.acciones) == 0) | (len(acciones_hoy.acciones) == 0) | (n_hora > len(acc_aux.acciones)-1): continue try: acc_aux.fitness = acc_aux.acciones[-1] - acc_aux.acciones[n_hora+1] except ValueError: print "error" continue fitness.append(acc_aux.fitness) acc.append(acc_aux) #Se buscan los N dias mas similares Similes, fit_sim, Ns = similitud(cant_dias_eval, acc, acciones_hoy, porcentaje) yFormatter = FormatStrFormatter('%.2f') ax0 = figures[0].add_subplot(111) mes, dia, ano = acciones_hoy.date.split('/') fecha = date(int(ano), int(mes), int(dia)) ax0.set_title('%s' % (fecha.strftime("%A %d, %B %Y") )) if cant_dias_eval > 1: ax1 = figures[1].add_subplot(111) if cant_dias_eval > 2: ax2 = figures[2].add_subplot(111) if cant_dias_eval > 3: ax3 = figures[3].add_subplot(111) if cant_dias_eval > 4: ax4 = figures[4].add_subplot(111) if cant_dias_eval > 5: ax5 = figures[5].add_subplot(111) if cant_dias_eval > 6: ax6 = figures[6].add_subplot(111) if cant_dias_eval > 7: ax7 = figures[7].add_subplot(111) if cant_dias_eval > 8: ax8 = figures[8].add_subplot(111) if cant_dias_eval > 9: ax9 = figures[9].add_subplot(111) if cant_dias_eval > 10: ax10 = figures[10].add_subplot(111) if cant_dias_eval > 11: ax11 = figures[11].add_subplot(111) if cant_dias_eval > 12: ax12 = figures[12].add_subplot(111) if cant_dias_eval > 13: ax13 = figures[13].add_subplot(111) if cant_dias_eval > 14: ax14 = figures[14].add_subplot(111) if cant_dias_eval > 15: ax15 = figures[15].add_subplot(111) if cant_dias_eval > 16: ax16 = figures[16].add_subplot(111) ax0.plot(acciones_hoy.acciones_mean) ax0.grid() ax0.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 1: ax1.plot(acc[Ns[0]].acciones) ax1.plot(acc[Ns[0]].acciones_mean) ax1.grid() ax1.plot(n_hora, acc[Ns[0]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[0]].acciones) ax1.plot(largo_dia, acc[Ns[0]].acciones[-1], b'o') ax1.text(n_hora, acc[Ns[0]].acciones[n_hora], acc[Ns[0]].acciones[n_hora], fontsize=11) ax1.text(largo_dia, acc[Ns[0]].acciones[-1], acc[Ns[0]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[0]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[0]].acciones[0] - acc[Ns[0]-1].acciones[-1] gap_p = 100*(acc[Ns[0]].acciones[0] - acc[Ns[0]-1].acciones[-1])/acc[Ns[0]-1].acciones[-1] ax1.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[0], acc[Ns[0]].fitness, gap, gap_p) ) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[0], acc[Ns[0]].fitness, gap, gap_p) ax1.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 2: ax2.plot(acc[Ns[1]].acciones) ax2.plot(acc[Ns[1]].acciones_mean) ax2.grid() ax2.plot(n_hora, acc[Ns[1]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[1]].acciones) ax2.plot(largo_dia, acc[Ns[1]].acciones[-1], b'o') ax2.text(n_hora, acc[Ns[1]].acciones[n_hora], acc[Ns[1]].acciones[n_hora], fontsize=11) ax2.text(largo_dia, acc[Ns[1]].acciones[-1], acc[Ns[1]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[1]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[1]].acciones[0] - acc[Ns[1]-1].acciones[-1] gap_p = 100*(acc[Ns[1]].acciones[0] - acc[Ns[1]-1].acciones[-1])/acc[Ns[1]-1].acciones[-1] ax2.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[1], acc[Ns[1]].fitness, gap, gap_p) ) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[1], acc[Ns[1]].fitness, gap, gap_p) ax2.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 3: ax3.plot(acc[Ns[2]].acciones) ax3.plot(acc[Ns[2]].acciones_mean) ax3.grid() ax3.plot(n_hora, acc[Ns[2]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[2]].acciones) ax3.plot(largo_dia, acc[Ns[2]].acciones[-1], b'o') ax3.text(n_hora, acc[Ns[2]].acciones[n_hora], acc[Ns[2]].acciones[n_hora], fontsize=11) ax3.text(largo_dia, acc[Ns[2]].acciones[-1], acc[Ns[2]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[2]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[2]].acciones[0] - acc[Ns[2]-1].acciones[-1] gap_p = 100*(acc[Ns[2]].acciones[0] - acc[Ns[2]-1].acciones[-1])/acc[Ns[2]-1].acciones[-1] ax3.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[2], acc[Ns[2]].fitness , gap, gap_p) ) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[2], acc[Ns[2]].fitness , gap, gap_p) ax3.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 4: ax4.plot(acc[Ns[3]].acciones) ax4.plot(acc[Ns[3]].acciones_mean) ax4.grid() ax4.plot(n_hora, acc[Ns[3]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[3]].acciones) ax4.plot(largo_dia, acc[Ns[3]].acciones[-1], b'o') ax4.text(n_hora, acc[Ns[3]].acciones[n_hora], acc[Ns[3]].acciones[n_hora], fontsize=11) ax4.text(largo_dia, acc[Ns[3]].acciones[-1], acc[Ns[3]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[3]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[3]].acciones[0] - acc[Ns[3]-1].acciones[-1] gap_p = 100*(acc[Ns[3]].acciones[0] - acc[Ns[3]-1].acciones[-1])/acc[Ns[3]-1].acciones[-1] ax4.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[3], acc[Ns[3]].fitness, gap, gap_p) ) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[3], acc[Ns[3]].fitness, gap, gap_p) ax4.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 5: ax5.plot(acc[Ns[4]].acciones) ax5.plot(acc[Ns[4]].acciones_mean) ax5.grid() ax5.plot(n_hora, acc[Ns[4]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[4]].acciones) ax5.plot(largo_dia, acc[Ns[4]].acciones[-1], b'o') ax5.text(n_hora, acc[Ns[4]].acciones[n_hora], acc[Ns[4]].acciones[n_hora], fontsize=11) ax5.text(largo_dia, acc[Ns[4]].acciones[-1], acc[Ns[4]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[4]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[4]].acciones[0] - acc[Ns[4]-1].acciones[-1] gap_p = 100*(acc[Ns[4]].acciones[0] - acc[Ns[4]-1].acciones[-1])/acc[Ns[4]-1].acciones[-1] ax5.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[4], acc[Ns[4]].fitness, gap, gap_p) ) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[4], acc[Ns[4]].fitness, gap, gap_p) ax5.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 6: ax6.plot(acc[Ns[5]].acciones) ax6.plot(acc[Ns[5]].acciones_mean) ax6.grid() ax6.plot(n_hora, acc[Ns[5]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[5]].acciones) ax6.plot(largo_dia, acc[Ns[5]].acciones[-1], b'o') ax6.text(n_hora, acc[Ns[5]].acciones[n_hora], acc[Ns[5]].acciones[n_hora], fontsize=11) ax6.text(largo_dia, acc[Ns[5]].acciones[-1], acc[Ns[5]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[5]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[5]].acciones[0] - acc[Ns[5]-1].acciones[-1] gap_p = 100*(acc[Ns[5]].acciones[0] - acc[Ns[5]-1].acciones[-1])/acc[Ns[5]-1].acciones[-1] ax6.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[5], acc[Ns[5]].fitness, gap, gap_p) ) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[5], acc[Ns[5]].fitness, gap, gap_p) ax6.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 7: ax7.plot(acc[Ns[6]].acciones) ax7.plot(acc[Ns[6]].acciones_mean) ax7.grid() ax7.plot(n_hora, acc[Ns[6]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[6]].acciones) ax7.plot(largo_dia, acc[Ns[6]].acciones[-1], b'o') ax7.text(n_hora, acc[Ns[6]].acciones[n_hora], acc[Ns[6]].acciones[n_hora], fontsize=11) ax7.text(largo_dia, acc[Ns[6]].acciones[-1], acc[Ns[6]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[6]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[6]].acciones[0] - acc[Ns[6]-1].acciones[-1] gap_p = 100*(acc[Ns[6]].acciones[0] - acc[Ns[6]-1].acciones[-1])/acc[Ns[6]-1].acciones[-1] ax7.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[6], acc[Ns[6]].fitness, gap, gap_p) ) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[6], acc[Ns[6]].fitness, gap, gap_p) ax7.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 8: ax8.plot(acc[Ns[7]].acciones) ax8.plot(acc[Ns[7]].acciones_mean) ax8.grid() ax8.plot(n_hora, acc[Ns[7]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[7]].acciones) ax8.plot(largo_dia, acc[Ns[7]].acciones[-1], b'o') ax8.text(n_hora, acc[Ns[7]].acciones[n_hora], acc[Ns[7]].acciones[n_hora], fontsize=11) ax8.text(largo_dia, acc[Ns[7]].acciones[-1], acc[Ns[7]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[7]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[7]].acciones[0] - acc[Ns[7]-1].acciones[-1] gap_p = 100*(acc[Ns[7]].acciones[0] - acc[Ns[7]-1].acciones[-1])/acc[Ns[7]-1].acciones[-1] ax8.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[7], acc[Ns[7]].fitness, gap, gap_p) ) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[7], acc[Ns[7]].fitness, gap, gap_p) ax8.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 9: ax9.plot(acc[Ns[8]].acciones) ax9.plot(acc[Ns[8]].acciones_mean) ax9.grid() ax9.plot(n_hora, acc[Ns[8]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[8]].acciones) ax9.plot(largo_dia, acc[Ns[8]].acciones[-1], b'o') ax9.text(n_hora, acc[Ns[8]].acciones[n_hora], acc[Ns[8]].acciones[n_hora], fontsize=11) ax9.text(largo_dia, acc[Ns[8]].acciones[-1], acc[Ns[8]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[8]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[8]].acciones[0] - acc[Ns[8]-1].acciones[-1] gap_p = 100*(acc[Ns[8]].acciones[0] - acc[Ns[8]-1].acciones[-1])/acc[Ns[8]-1].acciones[-1] ax9.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[8], acc[Ns[8]].fitness, gap, gap_p) ) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[8], acc[Ns[8]].fitness, gap, gap_p) ax9.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 10: ax10.plot(acc[Ns[9]].acciones) ax10.plot(acc[Ns[9]].acciones_mean) ax10.grid() ax10.plot(n_hora, acc[Ns[9]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[9]].acciones) ax10.plot(largo_dia, acc[Ns[9]].acciones[-1], b'o') ax10.text(n_hora, acc[Ns[9]].acciones[n_hora], acc[Ns[9]].acciones[n_hora], fontsize=11) ax10.text(largo_dia, acc[Ns[9]].acciones[-1], acc[Ns[9]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[9]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[9]].acciones[0] - acc[Ns[9]-1].acciones[-1] gap_p = 100*(acc[Ns[9]].acciones[0] - acc[Ns[9]-1].acciones[-1])/acc[Ns[9]-1].acciones[-1] ax10.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[9], acc[Ns[9]].fitness, gap, gap_p) ) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[9], acc[Ns[9]].fitness, gap, gap_p) ax10.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 11: ax11.plot(acc[Ns[10]].acciones) ax11.plot(acc[Ns[10]].acciones_mean) ax11.grid() ax11.plot(n_hora, acc[Ns[10]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[10]].acciones) ax11.plot(largo_dia, acc[Ns[10]].acciones[-1], b'o') ax11.text(n_hora, acc[Ns[10]].acciones[n_hora], acc[Ns[10]].acciones[n_hora], fontsize=11) ax11.text(largo_dia, acc[Ns[10]].acciones[-1], acc[Ns[10]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[10]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[10]].acciones[0] - acc[Ns[10]-1].acciones[-1] gap_p = 100*(acc[Ns[10]].acciones[0] - acc[Ns[10]-1].acciones[-1])/acc[Ns[10]-1].acciones[-1] ax11.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[10], acc[Ns[10]].fitness, gap, gap_p) ) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[10], acc[Ns[10]].fitness, gap, gap_p) ax11.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 12: ax12.plot(acc[Ns[11]].acciones) ax12.plot(acc[Ns[11]].acciones_mean) ax12.grid() ax12.plot(n_hora, acc[Ns[11]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[11]].acciones) ax12.plot(largo_dia, acc[Ns[11]].acciones[-1], b'o') ax12.text(n_hora, acc[Ns[11]].acciones[n_hora], acc[Ns[11]].acciones[n_hora], fontsize=11) ax12.text(largo_dia, acc[Ns[11]].acciones[-1], acc[Ns[11]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[11]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[11]].acciones[0] - acc[Ns[11]-1].acciones[-1] gap_p = 100*(acc[Ns[11]].acciones[0] - acc[Ns[11]-1].acciones[-1])/acc[Ns[11]-1].acciones[-1] ax12.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[11], acc[Ns[11]].fitness, gap, gap_p) ) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[11], acc[Ns[11]].fitness, gap, gap_p) ax12.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 13: ax13.plot(acc[Ns[12]].acciones) ax13.plot(acc[Ns[12]].acciones_mean) ax13.grid() ax13.plot(n_hora, acc[Ns[12]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[12]].acciones) ax13.plot(largo_dia, acc[Ns[12]].acciones[-1], b'o') ax13.text(n_hora, acc[Ns[12]].acciones[n_hora], acc[Ns[12]].acciones[n_hora], fontsize=11) ax13.text(largo_dia, acc[Ns[12]].acciones[-1], acc[Ns[12]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[12]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[12]].acciones[0] - acc[Ns[12]-1].acciones[-1] gap_p = 100*(acc[Ns[12]].acciones[0] - acc[Ns[12]-1].acciones[-1])/acc[Ns[12]-1].acciones[-1] ax13.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[12], acc[Ns[12]].fitness, gap, gap_p) ) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[12], acc[Ns[12]].fitness, gap, gap_p) ax13.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 14: ax14.plot(acc[Ns[13]].acciones) ax14.plot(acc[Ns[13]].acciones_mean) ax14.grid() ax14.plot(n_hora, acc[Ns[13]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[13]].acciones) ax14.plot(largo_dia, acc[Ns[13]].acciones[-1], b'o') ax14.text(n_hora, acc[Ns[13]].acciones[n_hora], acc[Ns[13]].acciones[n_hora], fontsize=11) ax14.text(largo_dia, acc[Ns[13]].acciones[-1], acc[Ns[13]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[13]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[13]].acciones[0] - acc[Ns[13]-1].acciones[-1] gap_p = 100*(acc[Ns[13]].acciones[0] - acc[Ns[13]-1].acciones[-1])/acc[Ns[13]-1].acciones[-1] ax14.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[13], acc[Ns[13]].fitness, gap, gap_p) ) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[13], acc[Ns[13]].fitness, gap, gap_p) ax14.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 15: ax15.plot(acc[Ns[14]].acciones) ax15.plot(acc[Ns[14]].acciones_mean) ax15.grid() ax15.plot(n_hora, acc[Ns[14]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[14]].acciones) ax15.plot(largo_dia, acc[Ns[14]].acciones[-1], b'o') ax15.text(n_hora, acc[Ns[14]].acciones[n_hora], acc[Ns[14]].acciones[n_hora], fontsize=11) ax15.text(largo_dia, acc[Ns[14]].acciones[-1], acc[Ns[14]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[14]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[14]].acciones[0] - acc[Ns[14]-1].acciones[-1] gap_p = 100*(acc[Ns[14]].acciones[0] - acc[Ns[14]-1].acciones[-1])/acc[Ns[14]-1].acciones[-1] ax15.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[14], acc[Ns[14]].fitness, gap, gap_p) ) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[14], acc[Ns[14]].fitness, gap, gap_p) ax15.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 16: ax16.plot(acc[Ns[15]].acciones) ax16.plot(acc[Ns[15]].acciones_mean) ax16.grid() ax16.plot(n_hora, acc[Ns[15]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[15]].acciones) ax16.plot(largo_dia, acc[Ns[15]].acciones[-1], b'o') ax16.text(n_hora, acc[Ns[15]].acciones[n_hora], acc[Ns[15]].acciones[n_hora], fontsize=11) ax16.text(largo_dia, acc[Ns[15]].acciones[-1], acc[Ns[15]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[15]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[15]].acciones[0] - acc[Ns[15]-1].acciones[-1] gap_p = 100*(acc[Ns[15]].acciones[0] - acc[Ns[15]-1].acciones[-1])/acc[Ns[15]-1].acciones[-1] ax16.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[15], acc[Ns[15]].fitness, gap, gap_p) ) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[15], acc[Ns[15]].fitness, gap, gap_p) ax16.yaxis.set_major_formatter(yFormatter) plt.show() #plt.ioff() return
objs = tree.findall('object') boxes = [] for ix, obj in enumerate(objs): bbox = obj.find('bndbox') # print unicode(cls_name, encoding="utf-8") x1 = int(bbox.find('xmin').text) y1 = int(bbox.find('ymin').text) x2 = int(bbox.find('xmax').text) y2 = int(bbox.find('ymax').text) boxes.append([x1, y1, x2 -x1, y2 - y1]) return boxes def annos2csv(annos, csv_file): with open(csv_file, 'w') as f: f.write('name,coordinate') for im in annos.keys(): f.write('\n{},'.format(im)) for box in annos[im]: f.write('{}_{}_{}_{};'.format(box[0], box[1], box[2], box[3])) if __name__ == '__main__': annos, num = read_csv('gtai_res_101_fc_0705.csv') for file in os.listdir(ANNO_DIR): file = file.replace('xml', 'jpg') if file in annos.keys(): annos[file] = xml2anno(file) else: print('{} not in csv\n'.format(file)) annos2csv(annos, 'test_anno.csv')
from read_csv import read_csv table = read_csv('test1.csv') def test_read_csv(): assert read_csv('test1.csv') == [['name', 'age', 'eye colour'], ['Bob', '5', 'blue'], ['Mary', '27', 'brown'], ['Vij', '54', 'green']]
from read_csv import read_csv from simpletrend import * pandas_tags = read_csv("pandas_tags.csv") simpletrend = SimpleTrend() for idx, row in pandas_tags.iterrows(): simpletrend.index_tag(row) print(simpletrend.end_of_timestep_analysis()) simpletrend.next_timestep() print('--------------------')
from read_csv import read_csv import logging import argparse from pmi_process import get_frequency,get_all_frequency from csv_writer import csv_writer import csv import synonyms logging.basicConfig(level=logging.DEBUG) path = '/Users/charilie/Desktop/PMI/l1.csv' if __name__ == '__main__': parser=argparse.ArgumentParser() parser.add_argument("--dict",dest="dict",action="store_true",default=False) parser.add_argument("--enlarge", dest="enlarge", action="store_true", default=False) args=parser.parse_args() if args.dict: labelset = read_csv(path) logging.debug(labelset) dict_all = get_all_frequency(path) # 所有词的字典 for label in labelset: res_list = get_frequency(path, label, dict_all) csv_writer('pmi_dict_l1.csv', res_list, label) if args.enlarge: dict_path="pmi_dict_l1.csv" output_path="l1_enlarged_dict.csv" with open(dict_path,'r',encoding='utf-8-sig') as csvfile: #完美解决ufeff reader=csv.DictReader(csvfile) for line in reader: word=line['word'] word_label=line['label'] word_list=synonyms.nearby(word)[0] csv_writer(output_path,word_list,word_label)#把每个同义词都写进去
output = [] query_to_run = query % list_to_string(request_list) reqs = connection.query(query_to_run).getresult() for req in reqs: req_id = req[0] if req[1]: message = re.sub('(<[^>]*>)|(\n|\r|\|)', ' ', req[1]).strip() else: message = ' ' output.append([req_id, message]) return output if __name__ == "__main__": data_directory = '/Users/rjohnson/Documents/DS/DataScience/FinalProject/data/' test_requests = read_csv.read_csv(data_directory + 'request_info_test.txt') train_requests = read_csv.read_csv(data_directory + 'request_info_train.txt') requests = train_requests + test_requests num_requests = len(requests) print "Number of Request = %i " % len(requests) hsdb = pg.connect(helpspotDB['db_name'], helpspotDB['db_server'], helpspotDB['db_port'], None, None, helpspotDB['db_username'], helpspotDB['db_password']) for i in range(0, num_requests/1000): output = collect_text(requests[i*1000:min((i+1)*1000 -1, num_requests)] , hsdb) print "Saving to CSV %i" % i out = csv.writer(open("data/email_text/email_text_tmp_test_%i.txt" %i,"w"), delimiter='|') for row in output: out.writerow(row) hsdb.close()
import csv import get_names import read_csv #CONSTANTS FIRST_FILE_NAME = "mini_first_event.csv" SECOND_FILE_NAME = "mini_second_event.csv" THIRD_FILE_NAME = "mini_third_event.csv" #GETTING THE ENROLLMENTS FOR EACH EVENT FIRST_EVENT, SECOND_EVENT, THIRD_EVENT = read_csv.read_csv() #These three are responsible for writing to three different files. The process is roughly the same. The names of ea file are different def write_first(): with open(FIRST_FILE_NAME, 'w') as csvfile: filewriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL, lineterminator='\n') write_data(FIRST_EVENT, filewriter) def write_second(): with open(SECOND_FILE_NAME, 'w') as csvfile: filewriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL,
import json from urllib import urlencode from cache import get from read_csv import read_csv VIEWS_DIR = os.path.join('downloads', 'views') ROWS_DIR = os.path.join('downloads', 'rows') GEOJSON_DIR = 'geojson' ANNOYING = re.compile(r'[^a-zA-Z]') LATLNG = re.compile(r'\(([0-9][0-9].[0-9]+), (-[0-9][0-9].[0-9]+)\)') from random import sample sxx4_xhzg = [ tuple(map(float, [row['Longitude'], row['Latitude']])) for row in read_csv('downloads/rows/sxx4-xhzg.csv') if row['Latitude'] ] def random_lnglat(): print 'selecting a random coordinate' return sample(sxx4_xhzg, 1)[0] def main(): try: os.mkdir(GEOJSON_DIR) except OSError: pass views = filter(lambda view: '.json' == view[-5:], os.listdir(VIEWS_DIR))
def test_corrupt(self): self.assertEqual(read_csv.read_csv(file1_corrupt), self.header1_check) self.assertEqual(read_csv.read_csv(file2_corrupt), self.header2_check) self.assertEqual(read_csv.read_csv(file3_corrupt), self.header3_check) self.assertEqual(read_csv.read_csv(file4_corrupt), self.header4_check)
from read_csv import read_csv, header_map, select, row2dict, check_row, filter_table table = read_csv('2013-100.csv') hmap = header_map(table[0]) # print(hmap) print(table[1]) # def test_row2dict(): # assert table
def graficos(cant_dias_eval=15, x=14, y=14, texto="RSI(x)", archivo1="CLP900.txt", archivo2="CLP_Hoy.txt", porcentaje=0.0, hist=0, fecha="01/01/2014", hora="1000", figures=[]): #dia es el dia que se desea r con datos anteriores en formato '01/01/1900' #es la hora final donde se desea r en formato string de '0901' hasta '1329' print "Comiezo pre-procesamiento" data = read_csv(archivo1) if hist == 0: data_hoy = read_csv(archivo2) else: data_hoy = read_csv("aa") indice_dia = 0 largo_data = len(data.data_close) for i in range(0, largo_data): if data.data_date[i][0] == fecha: indice_dia = i break indice = data.data_time[indice_dia].index(hora) data_hoy.data_open.append(data.data_open[indice_dia][0:indice]) data_hoy.data_close.append(data.data_close[indice_dia][0:indice]) data_hoy.data_high.append(data.data_high[indice_dia][0:indice]) data_hoy.data_low.append(data.data_low[indice_dia][0:indice]) data_hoy.data_date.append(data.data_date[indice_dia][0:indice]) data_hoy.data_time.append(data.data_time[indice_dia][0:indice]) data_hoy.max_hora = hora data_hoy.min_hora = data.min_hora largo = len(data.data_close) acciones_hoy = pre_procesamiento(data_hoy.data_close[0], data_hoy.data_time[0], data.min_hora, data_hoy.max_hora, data_hoy.data_date[0], x, y, texto) n_hora = len(acciones_hoy.acciones) acc = [] fitness = [] largo_dia = 0 for i in range(0, largo): acc_aux = pre_procesamiento(data.data_close[i], data.data_time[i], data.min_hora, data.max_hora, data.data_date[i], x, y, texto) if largo_dia < int(data.data_time[i][-1]): largo_dia = int(data.data_time[i][-1]) if (len(acc_aux.acciones) == 0) | (len(acciones_hoy.acciones) == 0) | ( n_hora > len(acc_aux.acciones) - 1): continue try: acc_aux.fitness = acc_aux.acciones[-1] - acc_aux.acciones[n_hora + 1] except ValueError: print "error" continue fitness.append(acc_aux.fitness) acc.append(acc_aux) #Se buscan los N dias mas similares Similes, fit_sim, Ns = similitud(cant_dias_eval, acc, acciones_hoy, porcentaje) yFormatter = FormatStrFormatter('%.2f') ax0 = figures[0].add_subplot(111) mes, dia, ano = acciones_hoy.date.split('/') fecha = date(int(ano), int(mes), int(dia)) ax0.set_title('%s' % (fecha.strftime("%A %d, %B %Y"))) if cant_dias_eval > 1: ax1 = figures[1].add_subplot(111) if cant_dias_eval > 2: ax2 = figures[2].add_subplot(111) if cant_dias_eval > 3: ax3 = figures[3].add_subplot(111) if cant_dias_eval > 4: ax4 = figures[4].add_subplot(111) if cant_dias_eval > 5: ax5 = figures[5].add_subplot(111) if cant_dias_eval > 6: ax6 = figures[6].add_subplot(111) if cant_dias_eval > 7: ax7 = figures[7].add_subplot(111) if cant_dias_eval > 8: ax8 = figures[8].add_subplot(111) if cant_dias_eval > 9: ax9 = figures[9].add_subplot(111) if cant_dias_eval > 10: ax10 = figures[10].add_subplot(111) if cant_dias_eval > 11: ax11 = figures[11].add_subplot(111) if cant_dias_eval > 12: ax12 = figures[12].add_subplot(111) if cant_dias_eval > 13: ax13 = figures[13].add_subplot(111) if cant_dias_eval > 14: ax14 = figures[14].add_subplot(111) if cant_dias_eval > 15: ax15 = figures[15].add_subplot(111) if cant_dias_eval > 16: ax16 = figures[16].add_subplot(111) ax0.plot(acciones_hoy.acciones_mean) ax0.grid() ax0.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 1: ax1.plot(acc[Ns[0]].acciones) ax1.plot(acc[Ns[0]].acciones_mean) ax1.grid() ax1.plot(n_hora, acc[Ns[0]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[0]].acciones) ax1.plot(largo_dia, acc[Ns[0]].acciones[-1], b'o') ax1.text(n_hora, acc[Ns[0]].acciones[n_hora], acc[Ns[0]].acciones[n_hora], fontsize=11) ax1.text(largo_dia, acc[Ns[0]].acciones[-1], acc[Ns[0]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[0]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[0]].acciones[0] - acc[Ns[0] - 1].acciones[-1] gap_p = 100 * (acc[Ns[0]].acciones[0] - acc[Ns[0] - 1].acciones[-1] ) / acc[Ns[0] - 1].acciones[-1] ax1.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[0], acc[Ns[0]].fitness, gap, gap_p)) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % ( fecha.strftime("%A %d, %B %Y"), fit_sim[0], acc[Ns[0]].fitness, gap, gap_p) ax1.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 2: ax2.plot(acc[Ns[1]].acciones) ax2.plot(acc[Ns[1]].acciones_mean) ax2.grid() ax2.plot(n_hora, acc[Ns[1]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[1]].acciones) ax2.plot(largo_dia, acc[Ns[1]].acciones[-1], b'o') ax2.text(n_hora, acc[Ns[1]].acciones[n_hora], acc[Ns[1]].acciones[n_hora], fontsize=11) ax2.text(largo_dia, acc[Ns[1]].acciones[-1], acc[Ns[1]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[1]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[1]].acciones[0] - acc[Ns[1] - 1].acciones[-1] gap_p = 100 * (acc[Ns[1]].acciones[0] - acc[Ns[1] - 1].acciones[-1] ) / acc[Ns[1] - 1].acciones[-1] ax2.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[1], acc[Ns[1]].fitness, gap, gap_p)) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % ( fecha.strftime("%A %d, %B %Y"), fit_sim[1], acc[Ns[1]].fitness, gap, gap_p) ax2.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 3: ax3.plot(acc[Ns[2]].acciones) ax3.plot(acc[Ns[2]].acciones_mean) ax3.grid() ax3.plot(n_hora, acc[Ns[2]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[2]].acciones) ax3.plot(largo_dia, acc[Ns[2]].acciones[-1], b'o') ax3.text(n_hora, acc[Ns[2]].acciones[n_hora], acc[Ns[2]].acciones[n_hora], fontsize=11) ax3.text(largo_dia, acc[Ns[2]].acciones[-1], acc[Ns[2]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[2]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[2]].acciones[0] - acc[Ns[2] - 1].acciones[-1] gap_p = 100 * (acc[Ns[2]].acciones[0] - acc[Ns[2] - 1].acciones[-1] ) / acc[Ns[2] - 1].acciones[-1] ax3.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[2], acc[Ns[2]].fitness, gap, gap_p)) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % ( fecha.strftime("%A %d, %B %Y"), fit_sim[2], acc[Ns[2]].fitness, gap, gap_p) ax3.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 4: ax4.plot(acc[Ns[3]].acciones) ax4.plot(acc[Ns[3]].acciones_mean) ax4.grid() ax4.plot(n_hora, acc[Ns[3]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[3]].acciones) ax4.plot(largo_dia, acc[Ns[3]].acciones[-1], b'o') ax4.text(n_hora, acc[Ns[3]].acciones[n_hora], acc[Ns[3]].acciones[n_hora], fontsize=11) ax4.text(largo_dia, acc[Ns[3]].acciones[-1], acc[Ns[3]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[3]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[3]].acciones[0] - acc[Ns[3] - 1].acciones[-1] gap_p = 100 * (acc[Ns[3]].acciones[0] - acc[Ns[3] - 1].acciones[-1] ) / acc[Ns[3] - 1].acciones[-1] ax4.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[3], acc[Ns[3]].fitness, gap, gap_p)) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % ( fecha.strftime("%A %d, %B %Y"), fit_sim[3], acc[Ns[3]].fitness, gap, gap_p) ax4.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 5: ax5.plot(acc[Ns[4]].acciones) ax5.plot(acc[Ns[4]].acciones_mean) ax5.grid() ax5.plot(n_hora, acc[Ns[4]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[4]].acciones) ax5.plot(largo_dia, acc[Ns[4]].acciones[-1], b'o') ax5.text(n_hora, acc[Ns[4]].acciones[n_hora], acc[Ns[4]].acciones[n_hora], fontsize=11) ax5.text(largo_dia, acc[Ns[4]].acciones[-1], acc[Ns[4]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[4]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[4]].acciones[0] - acc[Ns[4] - 1].acciones[-1] gap_p = 100 * (acc[Ns[4]].acciones[0] - acc[Ns[4] - 1].acciones[-1] ) / acc[Ns[4] - 1].acciones[-1] ax5.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[4], acc[Ns[4]].fitness, gap, gap_p)) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % ( fecha.strftime("%A %d, %B %Y"), fit_sim[4], acc[Ns[4]].fitness, gap, gap_p) ax5.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 6: ax6.plot(acc[Ns[5]].acciones) ax6.plot(acc[Ns[5]].acciones_mean) ax6.grid() ax6.plot(n_hora, acc[Ns[5]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[5]].acciones) ax6.plot(largo_dia, acc[Ns[5]].acciones[-1], b'o') ax6.text(n_hora, acc[Ns[5]].acciones[n_hora], acc[Ns[5]].acciones[n_hora], fontsize=11) ax6.text(largo_dia, acc[Ns[5]].acciones[-1], acc[Ns[5]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[5]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[5]].acciones[0] - acc[Ns[5] - 1].acciones[-1] gap_p = 100 * (acc[Ns[5]].acciones[0] - acc[Ns[5] - 1].acciones[-1] ) / acc[Ns[5] - 1].acciones[-1] ax6.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[5], acc[Ns[5]].fitness, gap, gap_p)) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % ( fecha.strftime("%A %d, %B %Y"), fit_sim[5], acc[Ns[5]].fitness, gap, gap_p) ax6.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 7: ax7.plot(acc[Ns[6]].acciones) ax7.plot(acc[Ns[6]].acciones_mean) ax7.grid() ax7.plot(n_hora, acc[Ns[6]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[6]].acciones) ax7.plot(largo_dia, acc[Ns[6]].acciones[-1], b'o') ax7.text(n_hora, acc[Ns[6]].acciones[n_hora], acc[Ns[6]].acciones[n_hora], fontsize=11) ax7.text(largo_dia, acc[Ns[6]].acciones[-1], acc[Ns[6]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[6]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[6]].acciones[0] - acc[Ns[6] - 1].acciones[-1] gap_p = 100 * (acc[Ns[6]].acciones[0] - acc[Ns[6] - 1].acciones[-1] ) / acc[Ns[6] - 1].acciones[-1] ax7.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[6], acc[Ns[6]].fitness, gap, gap_p)) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % ( fecha.strftime("%A %d, %B %Y"), fit_sim[6], acc[Ns[6]].fitness, gap, gap_p) ax7.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 8: ax8.plot(acc[Ns[7]].acciones) ax8.plot(acc[Ns[7]].acciones_mean) ax8.grid() ax8.plot(n_hora, acc[Ns[7]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[7]].acciones) ax8.plot(largo_dia, acc[Ns[7]].acciones[-1], b'o') ax8.text(n_hora, acc[Ns[7]].acciones[n_hora], acc[Ns[7]].acciones[n_hora], fontsize=11) ax8.text(largo_dia, acc[Ns[7]].acciones[-1], acc[Ns[7]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[7]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[7]].acciones[0] - acc[Ns[7] - 1].acciones[-1] gap_p = 100 * (acc[Ns[7]].acciones[0] - acc[Ns[7] - 1].acciones[-1] ) / acc[Ns[7] - 1].acciones[-1] ax8.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[7], acc[Ns[7]].fitness, gap, gap_p)) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % ( fecha.strftime("%A %d, %B %Y"), fit_sim[7], acc[Ns[7]].fitness, gap, gap_p) ax8.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 9: ax9.plot(acc[Ns[8]].acciones) ax9.plot(acc[Ns[8]].acciones_mean) ax9.grid() ax9.plot(n_hora, acc[Ns[8]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[8]].acciones) ax9.plot(largo_dia, acc[Ns[8]].acciones[-1], b'o') ax9.text(n_hora, acc[Ns[8]].acciones[n_hora], acc[Ns[8]].acciones[n_hora], fontsize=11) ax9.text(largo_dia, acc[Ns[8]].acciones[-1], acc[Ns[8]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[8]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[8]].acciones[0] - acc[Ns[8] - 1].acciones[-1] gap_p = 100 * (acc[Ns[8]].acciones[0] - acc[Ns[8] - 1].acciones[-1] ) / acc[Ns[8] - 1].acciones[-1] ax9.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[8], acc[Ns[8]].fitness, gap, gap_p)) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % ( fecha.strftime("%A %d, %B %Y"), fit_sim[8], acc[Ns[8]].fitness, gap, gap_p) ax9.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 10: ax10.plot(acc[Ns[9]].acciones) ax10.plot(acc[Ns[9]].acciones_mean) ax10.grid() ax10.plot(n_hora, acc[Ns[9]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[9]].acciones) ax10.plot(largo_dia, acc[Ns[9]].acciones[-1], b'o') ax10.text(n_hora, acc[Ns[9]].acciones[n_hora], acc[Ns[9]].acciones[n_hora], fontsize=11) ax10.text(largo_dia, acc[Ns[9]].acciones[-1], acc[Ns[9]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[9]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[9]].acciones[0] - acc[Ns[9] - 1].acciones[-1] gap_p = 100 * (acc[Ns[9]].acciones[0] - acc[Ns[9] - 1].acciones[-1] ) / acc[Ns[9] - 1].acciones[-1] ax10.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[9], acc[Ns[9]].fitness, gap, gap_p)) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % ( fecha.strftime("%A %d, %B %Y"), fit_sim[9], acc[Ns[9]].fitness, gap, gap_p) ax10.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 11: ax11.plot(acc[Ns[10]].acciones) ax11.plot(acc[Ns[10]].acciones_mean) ax11.grid() ax11.plot(n_hora, acc[Ns[10]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[10]].acciones) ax11.plot(largo_dia, acc[Ns[10]].acciones[-1], b'o') ax11.text(n_hora, acc[Ns[10]].acciones[n_hora], acc[Ns[10]].acciones[n_hora], fontsize=11) ax11.text(largo_dia, acc[Ns[10]].acciones[-1], acc[Ns[10]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[10]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[10]].acciones[0] - acc[Ns[10] - 1].acciones[-1] gap_p = 100 * (acc[Ns[10]].acciones[0] - acc[Ns[10] - 1].acciones[-1] ) / acc[Ns[10] - 1].acciones[-1] ax11.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[10], acc[Ns[10]].fitness, gap, gap_p)) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % ( fecha.strftime("%A %d, %B %Y"), fit_sim[10], acc[Ns[10]].fitness, gap, gap_p) ax11.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 12: ax12.plot(acc[Ns[11]].acciones) ax12.plot(acc[Ns[11]].acciones_mean) ax12.grid() ax12.plot(n_hora, acc[Ns[11]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[11]].acciones) ax12.plot(largo_dia, acc[Ns[11]].acciones[-1], b'o') ax12.text(n_hora, acc[Ns[11]].acciones[n_hora], acc[Ns[11]].acciones[n_hora], fontsize=11) ax12.text(largo_dia, acc[Ns[11]].acciones[-1], acc[Ns[11]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[11]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[11]].acciones[0] - acc[Ns[11] - 1].acciones[-1] gap_p = 100 * (acc[Ns[11]].acciones[0] - acc[Ns[11] - 1].acciones[-1] ) / acc[Ns[11] - 1].acciones[-1] ax12.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[11], acc[Ns[11]].fitness, gap, gap_p)) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % ( fecha.strftime("%A %d, %B %Y"), fit_sim[11], acc[Ns[11]].fitness, gap, gap_p) ax12.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 13: ax13.plot(acc[Ns[12]].acciones) ax13.plot(acc[Ns[12]].acciones_mean) ax13.grid() ax13.plot(n_hora, acc[Ns[12]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[12]].acciones) ax13.plot(largo_dia, acc[Ns[12]].acciones[-1], b'o') ax13.text(n_hora, acc[Ns[12]].acciones[n_hora], acc[Ns[12]].acciones[n_hora], fontsize=11) ax13.text(largo_dia, acc[Ns[12]].acciones[-1], acc[Ns[12]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[12]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[12]].acciones[0] - acc[Ns[12] - 1].acciones[-1] gap_p = 100 * (acc[Ns[12]].acciones[0] - acc[Ns[12] - 1].acciones[-1] ) / acc[Ns[12] - 1].acciones[-1] ax13.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[12], acc[Ns[12]].fitness, gap, gap_p)) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % ( fecha.strftime("%A %d, %B %Y"), fit_sim[12], acc[Ns[12]].fitness, gap, gap_p) ax13.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 14: ax14.plot(acc[Ns[13]].acciones) ax14.plot(acc[Ns[13]].acciones_mean) ax14.grid() ax14.plot(n_hora, acc[Ns[13]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[13]].acciones) ax14.plot(largo_dia, acc[Ns[13]].acciones[-1], b'o') ax14.text(n_hora, acc[Ns[13]].acciones[n_hora], acc[Ns[13]].acciones[n_hora], fontsize=11) ax14.text(largo_dia, acc[Ns[13]].acciones[-1], acc[Ns[13]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[13]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[13]].acciones[0] - acc[Ns[13] - 1].acciones[-1] gap_p = 100 * (acc[Ns[13]].acciones[0] - acc[Ns[13] - 1].acciones[-1] ) / acc[Ns[13] - 1].acciones[-1] ax14.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[13], acc[Ns[13]].fitness, gap, gap_p)) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % ( fecha.strftime("%A %d, %B %Y"), fit_sim[13], acc[Ns[13]].fitness, gap, gap_p) ax14.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 15: ax15.plot(acc[Ns[14]].acciones) ax15.plot(acc[Ns[14]].acciones_mean) ax15.grid() ax15.plot(n_hora, acc[Ns[14]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[14]].acciones) ax15.plot(largo_dia, acc[Ns[14]].acciones[-1], b'o') ax15.text(n_hora, acc[Ns[14]].acciones[n_hora], acc[Ns[14]].acciones[n_hora], fontsize=11) ax15.text(largo_dia, acc[Ns[14]].acciones[-1], acc[Ns[14]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[14]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[14]].acciones[0] - acc[Ns[14] - 1].acciones[-1] gap_p = 100 * (acc[Ns[14]].acciones[0] - acc[Ns[14] - 1].acciones[-1] ) / acc[Ns[14] - 1].acciones[-1] ax15.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[14], acc[Ns[14]].fitness, gap, gap_p)) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % ( fecha.strftime("%A %d, %B %Y"), fit_sim[14], acc[Ns[14]].fitness, gap, gap_p) ax15.yaxis.set_major_formatter(yFormatter) if cant_dias_eval > 16: ax16.plot(acc[Ns[15]].acciones) ax16.plot(acc[Ns[15]].acciones_mean) ax16.grid() ax16.plot(n_hora, acc[Ns[15]].acciones[n_hora], 'o') largo_dia = len(acc[Ns[15]].acciones) ax16.plot(largo_dia, acc[Ns[15]].acciones[-1], b'o') ax16.text(n_hora, acc[Ns[15]].acciones[n_hora], acc[Ns[15]].acciones[n_hora], fontsize=11) ax16.text(largo_dia, acc[Ns[15]].acciones[-1], acc[Ns[15]].acciones[-1], fontsize=11) mes, dia, ano = acc[Ns[15]].date.split('/') fecha = date(int(ano), int(mes), int(dia)) gap = acc[Ns[15]].acciones[0] - acc[Ns[15] - 1].acciones[-1] gap_p = 100 * (acc[Ns[15]].acciones[0] - acc[Ns[15] - 1].acciones[-1] ) / acc[Ns[15] - 1].acciones[-1] ax16.set_title('%s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % (fecha.strftime("%A %d, %B %Y"), fit_sim[15], acc[Ns[15]].fitness, gap, gap_p)) print 'Fecha : %s, Simil : %.2f, Fit : %.2f, GAP : %.2f (%.1f%%)' % ( fecha.strftime("%A %d, %B %Y"), fit_sim[15], acc[Ns[15]].fitness, gap, gap_p) ax16.yaxis.set_major_formatter(yFormatter) plt.show() #plt.ioff() return
if middle_element == value: return middle if middle_element < value: left = middle + 1 elif middle_element > value: right = middle - 1 def find(elements, value, key=identity): index = find_index(elements, value, key) return None if index is None else elements[index] arr = read_csv('chess-players.csv') sub_players = read_csv('players.csv') # sort players using bubble sort algarithm bubble_sort(arr) # find player with binary search for player in sub_players: print('****** Searching in progress.... *****') result = binary_search(arr, player) if (result): print("Player found in the list") print(f'First Name: {arr[result].fname}, Last Name: {arr[result].lname}, Country: {arr[result].country}, Born: {arr[result].born}, Died: {arr[result].died}') else:
raise Exception('# Unable to predict; weights and bias not set') return self.hypothesis(x_i, self._weights, self._bias) def predict_multiple(self, x_params): """ Predict the output value for a set of size n :param x_params: The matrix containing x-vectors :return: A numpy array of all predicted values """ predicted = np.array() for i in x_params: np.append(predicted, self.predict(x_params[i])) return predicted if __name__ == "__main__": training_data = read_csv(training=True) test_data = read_csv(training=False) lr = LinearRegression() theta, bias, loss_history = lr.gradient_descent(*training_data, test_data=test_data) print('# Weights and bias after training: W%s, B%s' % (theta, bias)) cost_training = lr.cost(*training_data, theta, bias) cost_test = lr.cost(*test_data, theta, bias) print('# Error: %s / %s (training/test)' % (cost_training, cost_test)) plot_loss_history(loss_history, save=True) plot_regression(*training_data, lr, save=True)
data_path = sys.argv[1] csv_path = os.path.join(data_path, 'CSV_Files_Final') # wav_path = os.path.join(data_path, 'Sound Files') wav_path = os.path.join(data_path, 'Downsampled') speaker_datas = [] sound_datas = [] # Read in the data from the csv files csv_dir = os.fsencode(csv_path) for file in sorted(os.listdir(csv_dir)): filename = os.fsdecode(file) file_path = os.path.join(csv_path, filename) speaker_datas.append(read_csv(file_path)) # Flatten into an array of (time segments x samples) speakers = np.concatenate(np.array(speaker_datas)) # And try to clean up some memory because this is a lot of data... del speaker_datas # Read in the data from the wav files wav_dir = os.fsencode(wav_path) for file in sorted(os.listdir(wav_dir)): filename = os.fsdecode(file) file_path = os.path.join(wav_path, filename) sound_datas.append(read_wav(file_path)) print('Imported: {}'.format(filename)) # Flatten into an array of (time segments x samples)
def test_read_csv(some_parameters): test_input = some_parameters[0] expected_output = some_parameters[1] print(test_input) assert read_csv.read_csv(test_input) == expected_output
def plot_RL_all_freq(num_active_cores): #Set graph properties ax.set_title('Roofline for ' + str(num_active_cores) + ' active cores',fontsize=14,fontweight='bold') #Set the axes properties ax.set_yscale('log', basey=2) ax.set_xscale('log', basex=2) X_MIN=0.01 X_MAX=900 Y_MIN=0.1 Y_MAX=8 graph_lims = [X_MIN,X_MAX,Y_MIN,Y_MAX] ax.axis([X_MIN,X_MAX,Y_MIN,Y_MAX]) #Get balance points for each frequency and add compute-bound RL PEAK_PERF=[4.0] PEAK_PERF_LABELS=['Scalar Peak Performance'] PEAK_BW_LABELS = ['Bandwidth'] balance_points = [] for i, cpu_freq in enumerate(cpu_frequencies): balance_points.append(20/(cpu_freq*num_active_cores)) #70GBps = Mem_BW for p,l in zip(PEAK_PERF, PEAK_PERF_LABELS): addPerfLine(p,l, ax, float(balance_points[i]), graph_lims) for bw,l in zip([balance_points[i]], PEAK_BW_LABELS): addBWLine(bw,l, graph_lims) #Loop all freqs for each number of active cores for i, cpu_freq in enumerate(cpu_frequencies): var = 'line' + str(i) csv_path = 'freq_proc_csv/nodeperf_' + str(cpu_freq) + 'Ghz_' + str(num_active_cores) + 'act_cores.csv' time, time_stamp, inst_per_cycle, instr_intensity, energy, energy_incremental, energy_SKT0, energy_SKT1 = read_csv.read_csv(csv_path) var, = ax.plot(instr_intensity, inst_per_cycle, '.', color = colors[i], markersize=0.8, label= str(cpu_freq) + 'Ghz') #Add legends legend = ax.legend(loc=2, shadow=True, numpoints=10, bbox_to_anchor=(1.01, 1.0)) #The frame is matplotlib.patches.Rectangle instance surrounding the legend. frame = legend.get_frame() frame.set_facecolor('0.90') # Set the fontsize for label in legend.get_texts(): label.set_fontsize('8') for label in legend.get_lines(): label.set_linewidth(1) # the legend line width
#!/usr/bin/env python import subprocess,os import argparse import config from read_csv import read_csv from theory import * from plot import * def script(cmd): print cmd subprocess.call(cmd, shell=True) if __name__ == "__main__": read_csv("sample/601318.csv") sub_day_list = get_sub_day_list(s_date(year=2014,month=12,day=1),s_date(year=2015,month=4,day=1)) print sub_day_list #plot_main(sub_day_list) plot_candlestick(sub_day_list) if os.path.exists(config.output_path): script("rm -rf {0}".format(config.output_path)) os.makedirs(config.output_path) print "Please refer '{0}' for result.".format(config.output_path) #theory_up_down_days_count() #theory_pre_next_count() parser = argparse.ArgumentParser() parser.add_argument("-u", "--Tup", help="theory: price up fast with setting percent")
def load_feature_data(build_data=0, test_train='train'): if build_data in (1, 4) : print "Building Data Fresh" # Get TRAIN the data, this is the main data with the label request_data = read_csv(data_directory + 'request_info_' + test_train + '.txt') # Append BTF sale btf_info = read_csv_to_dict(data_directory + 'btf_info_' + test_train + '.txt') append_data(request_data, btf_info) # Append OD sale od_info = read_csv_to_dict(data_directory + 'od_info_' + test_train + '.txt') append_data(request_data, od_info) # Append Opportunity INFO opp_info = read_csv_to_dict(data_directory + 'opp_info_' + test_train + '.txt') append_data(request_data, opp_info) # Append sale info sale_info = read_csv_to_dict(data_directory + 'sale_info_' + test_train + '.txt') append_data(request_data, sale_info) # Get Labels labels = get_labels(request_data, lambda x: x == 1) request_data = [tuple(row) for row in request_data] np_request_data = np.array(request_data) convert_category_to_int(np_request_data, 'region') convert_category_to_int(np_request_data, 'day_name') only_features = remove_data(np_request_data, sorted(vars_to_remove)) only_features = np.array(only_features).astype('i8') #Bring the Dollar Amounts down to Log Scale dollar_features = ['prev_sales_dollars_email', 'prev_sales_dollars_email_month', 'prev_sales_dollars_email_domain', 'prev_sales_dollars_email_domain_month'] for feature in dollar_features: index = ind[feature] - offset # Minus 8 because we removed 8 features above for row in only_features: amount = row[index] if amount < 0: row[index] = (-1)*math.log(float((-1)*amount)) elif amount > 0: row[index] = math.log(float(amount)) print "Data Built Fresh. It looks like this:" print only_features[0] f = open('pickle_data/features_'+test_train+ '.pkl', 'w') pickle.dump(only_features, f) f.close() f = open('pickle_data/labels_'+test_train+ '.pkl', 'w') pickle.dump(labels, f) f.close() else: print "Loading Basic Features Pickled Data" f = open('pickle_data/features_'+test_train+ '.pkl', 'r') only_features = np.array(pickle.load(f)) f.close() f = open('pickle_data/labels_'+test_train+'.pkl', 'r') labels = np.array(pickle.load(f)) f.close() print "Data Loaded from pickle. It looks like this:" print only_features[0] return only_features, labels