def calc_R(beta, save_data_for_one_cluster=False): params = { 'Lx': L, 'Ly': L, 'frames': frames, 'beta': beta, 'size': [ 3, ] * 1, 'plot': False, 'save_image': False, 'strings': [{ 'id': 1, 'x': L / 4, 'y': L / 2, 'vec': [0, 4] }], 'pre_function': calc_radius_of_rotation } main = Main(**params) radius_of_rotation = main.pre_func_res ## save data (for one cluster) if save_data_for_one_cluster: base = "./results/data/radius/frames=%d_beta=%2.2f_" % (frames, beta) save_data.save(base, frames=frames, beta=beta, L=L, radius_of_rotation=radius_of_rotation) return radius_of_rotation
def get_carlos_params(self): for i, animal in enumerate(self.classes): current_path = path.join(self.audio_path, animal) sounds = os.listdir(current_path) self.features[i] = [] self.class_to_number[animal] = i self.number_to_class[i] = animal one_class_json = {} for sound in sounds: if not sound.endswith('.wav'): continue AUDIO_FILE = path.join(current_path, sound) print("processing " + AUDIO_FILE) feat_ = get_params(AUDIO_FILE) print(AUDIO_FILE + " processed") one_class_json[sound] = feat_ if len(feat_) == 0: continue self.features[i].append(feat_) save(one_class_json, path.join(current_path, animal + "_features.json")) print(animal + " features has been json saved") self.feat_amount = np.array(self.features[0][0]).shape #joblib.dump(self.features, "features.pkl") save(self.features, "features.json")
def main(num_of_strings=30, beta=0., frames=1000, L=100, save_result=True, plot_result=True): params = { 'beta': beta, 'L': L, 'frames': frames, 'save_result': False, 'plot_raw_result': False, '_plot_dist_to_verify': False, } relative_positions = {} for i in tqdm(range(num_of_strings)): runner = CuttingProfile(**params) runner.start() for j in range(6): if runner.relative_positions.has_key(j): if not relative_positions.has_key(j): relative_positions[j] = runner.relative_positions[j] else: relative_positions[j] = np.vstack( (relative_positions[j], runner.relative_positions[j])) if save_result: sd.save("results/data/cutting_profile/" + "frames=%d_beta=%2.2f_" % (frames, beta), beta=beta, L=L, frames=frames, weight_const=runner.weight_const, num_of_strings=num_of_strings, relative_positions=relative_positions ) if plot_result: # plot_all_points(relative_positions) plot_hist(relative_positions)
def start(self): self.main = Main( Lx=self.L, Ly=self.L, size=[3,], plot=False, plot_surface=False, frames=self.frames, strings=[{'id': 1, 'x': self.L/4, 'y': self.L/2, 'vec': [0, 4]}], beta=self.beta, weight_const=self.weight_const, # pre_function=self.get_cutting_profiles ) self.cutting_profiles = self.get_cutting_profiles() self.relative_positions = self.get_relative_positions() if self.save_result: sd.save("results/data/cutting_profile/" + "frames=%d_beta=%2.2f_" % (self.frames, self.beta), beta=self.beta, L=self.L, frames=self.frames, weight_const=self.weight_const, cutting_profiles=self.cutting_profiles ) if self.plot_raw_result: self.plot_result() if self._plot_dist_to_verify: self.plot_dist_to_verify()
def execute_simulation_for_one_beta(beta, num_of_strings, L, frames, num_of_pairs, plot=True, save_image=False, save_data=False): print "beta = %2.2f, frames = %d" % (beta, frames) distance_list = [] path_length = [] for s in tqdm(range(num_of_strings)): d, pl = get_path_length_and_distances(beta, num_of_strings, L, frames, num_of_pairs) distance_list.append(d) path_length.append(pl) distance_list = np.array(distance_list).flatten() path_length = np.array(path_length).flatten() if save_data: # sd.save("results/data/distances/beta=%2.2f_" % beta, # beta=beta, num_of_strings=num_of_strings, # L=L, frames=frames, distance_list=distance_list, # path_length=path_length) sd.save("results/data/distances/frames=%d_beta=%2.2f_" % (frames, beta), beta=beta, num_of_strings=num_of_strings, L=L, frames=frames, distance_list=distance_list, path_length=path_length) if plot or save_image: fig, ax = plt.subplots() # heatmap ax.hist2d(distance_list, path_length, bins=25) ax.set_xlabel('Distance') ax.set_ylabel('Path length') ax.set_title( 'Path length and distances between two points in the cluster' + r'($\beta = %2.2f$)' % beta) if save_image: result_image_path = "results/img/distances/beta=%2.2f" % beta result_image_path += "_" + time.strftime("%y%m%d_%H%M%S") result_image_path += ".png" plt.savefig(result_image_path) plt.close() print "[saved] " + result_image_path else: plt.show()
def mass_for_beta_one(beta, frames_list, N_r=100, num_of_strings=100): frames = np.max(frames_list) center_sample = int(np.min(frames_list) / 2) L = (frames + 1) * 2 def calc_mass_in_r(self, i, s): N = len(s.vec) + 1 if N - 3 not in frames_list: return None pos = list(s.pos.T) x, y = self.lattice_X[pos], self.lattice_Y[pos] X, Y = np.average(x), np.average(y) R = np.sqrt(np.sum((x - X) ** 2 + (y - Y) ** 2) / float(N)) dist = np.sqrt((x - X) ** 2 + (y - Y) ** 2) r = np.logspace(1, np.log2(max(dist)), num=N_r, base=2.) centers_index = sorted(random.sample(range(N), center_sample)) M = [] for _r in r: res = [] for c in centers_index: index_x, index_y = s.pos[c] dist = np.sqrt((x - self.lattice_X[index_x, index_y]) ** 2 + (y - self.lattice_Y[index_x, index_y]) ** 2) res.append(len(np.where(dist < _r)[0])) M.append(np.average(res)) return np.array([r, M]).T main = Main(Lx=L, Ly=L, plot=False, frames=frames, beta=beta, strings=[{'id': 1, 'x': L/4, 'y': L/2, 'vec': [0, 4]}], post_function=calc_mass_in_r) _M = np.array([m for m in main.post_func_res if m is not None]) Ms = {frames_list[i]: _M[i] for i in range(len(frames_list))} for s in tqdm(range(num_of_strings - 1)): main = Main(Lx=L, Ly=L, plot=False, frames=frames, beta=beta, strings=[{'id': 1, 'x': L/4, 'y': L/2, 'vec': [0, 4]}], post_function=calc_mass_in_r) _M = np.array([m for m in main.post_func_res if m is not None]) # print _M.shape for i, frames in enumerate(frames_list): Ms[frames] = np.vstack((Ms[frames], _M[i])) for frames in frames_list: r, M = Ms[frames].T sorted_index = np.argsort(r) r, M = r[sorted_index], M[sorted_index] save_data.save("./results/data/mass_in_r/beta=%2.2f_frames=%d_" % (beta, frames), num_of_strings=num_of_strings, N_r=N_r, beta=beta, L=L, frames=frames, r=r, M=M)
def vectorize_text(self): dict_size = len(self.dictionary.keys()) # cantidad de palabras new_features = {} for num_class in self.features.keys(): a = np.zeros(dict_size) for word in self.features[ num_class]: # si la palabra esta repetida en la misma sentencia, no se transmite al vectorizar a[self.dictionary[word]] = 1 new_features[num_class] = a self.features = new_features save(new_features, "words_vector.json")
def calc_ave_R(num_of_strings=100): R_ave = np.zeros(frames) for s in tqdm(range(num_of_strings)): R_ave += calc_R(beta) R_ave = R_ave / float(num_of_strings) base = "./results/data/radius/" base += "frames=%d_beta=%2.2f_sample=%d_" % (frames, beta, num_of_strings) save_data.save(base, frames=frames, beta=beta, L=L, radius_of_rotation=R_ave)
def box_count(beta, frames_list, N_L=20, num_of_strings=100): frames = np.max(frames_list) string_num = 1 print 'string ({}/{})'.format(string_num, num_of_strings) bc = BoxCounting( frames=frames, beta=beta, frames_list=frames_list, N_L=N_L, # save_fitting=True, # save_fitting_dir="results/img/box_counting/2016-12-01/") ) bc.start() _N = np.array([n for n in bc.main.post_func_res if n is not None]) Ns = {frames_list[i]: _N[i] for i in range(len(frames_list))} for s in range(num_of_strings - 1): string_num += 1 print 'string ({}/{})'.format(string_num, num_of_strings) bc = BoxCounting( frames=frames, beta=beta, frames_list=frames_list, N_L=N_L, # save_fitting=True, # save_fitting_dir="results/img/box_counting/2016-12-01/") ) bc.start() _N = np.array([n for n in bc.main.post_func_res if n is not None]) for i, frames in enumerate(frames_list): Ns[frames] = np.vstack((Ns[frames], _N[i])) for frames in frames_list: Ls, N = Ns[frames].T sorted_index = np.argsort(Ls) Ls, N = Ls[sorted_index], N[sorted_index] # save_data.save("./results/data/box_counting/2017-01-27/" + save_data.save("./results/data/box_counting/2017-01-29/" + "beta=%2.2f_frames=%d_" % (beta, frames), num_of_strings=num_of_strings, N_L=N_L, beta=beta, L=bc.L, frames=frames, Ls=Ls, N=N)
def mass_in_r_for_one_beta(beta, num_of_strings, L, frames, plot=True, optimize=False, save_image=False, save_data=False): print "beta = %2.2f" % beta r = None rs = [] Ms = [] for s in tqdm(range(num_of_strings)): r, M = get_mass_in_r_for_one_string(L, frames, beta, r) rs.append(r) Ms.append(M) r = np.average(np.array(rs), axis=0) M = np.average(np.array(Ms), axis=0) if save_data: save_data.save("results/data/mass_in_r/beta=%2.2f_" % beta, num_of_strings=num_of_strings, beta=beta, L=L, frames=frames, r=r, M=M) if plot or save_image: fig, ax = plt.subplots() ax.loglog(r, M) ax.set_xlabel('Radius $r$ from the center of gravity') ax.set_ylabel('Mass in a circle with radius $r$') ax.set_title('$r$ vs. $M(r)$') if optimize: index_stop = len(r) - 5 optimizer = Optimize_powerlaw(args=(r[:index_stop], M[:index_stop]), parameters=[0., 2.]) result = optimizer.fitting() print "D = %f" % result['D'] ax.loglog(r[:index_stop], optimizer.fitted(r[:index_stop]), lw=2, label='D = %f' % result['D']) ax.legend(loc='best') if save_image: result_image_path = "results/img/mass_in_r/beta=%2.2f" % beta result_image_path += "_" + time.strftime("%y%m%d_%H%M%S") result_image_path += ".png" plt.savefig(result_image_path) plt.close() print "[saved] " + result_image_path else: plt.show()
def execute_simulation_for_one_beta(beta, num_of_strings, L, frames, num_of_pairs, plot=True, save_image=False, save_data=False): print "beta = %2.2f, frames = %d" % (beta, frames) distance_list = [] path_length = [] for s in tqdm(range(num_of_strings)): d, pl = get_path_length_and_distances(beta, num_of_strings, L, frames, num_of_pairs) distance_list.append(d) path_length.append(pl) distance_list = np.array(distance_list).flatten() path_length = np.array(path_length).flatten() if save_data: # sd.save("results/data/distances/beta=%2.2f_" % beta, # beta=beta, num_of_strings=num_of_strings, # L=L, frames=frames, distance_list=distance_list, # path_length=path_length) sd.save("results/data/distances/frames=%d_beta=%2.2f_" % (frames, beta), beta=beta, num_of_strings=num_of_strings, L=L, frames=frames, distance_list=distance_list, path_length=path_length) if plot or save_image: fig, ax = plt.subplots() # heatmap ax.hist2d(distance_list, path_length, bins=25) ax.set_xlabel('Distance') ax.set_ylabel('Path length') ax.set_title('Path length and distances between two points in the cluster' + r'($\beta = %2.2f$)' % beta) if save_image: result_image_path = "results/img/distances/beta=%2.2f" % beta result_image_path += "_" + time.strftime("%y%m%d_%H%M%S") result_image_path += ".png" plt.savefig(result_image_path) plt.close() print "[saved] " + result_image_path else: plt.show()
def most_common_words(connection): file = open('itis_kfu.txt', encoding="utf-8") text = file.read() stop_symbols = r'.,:\!/?*-_•–—0123456789&"' wordcount = {} for word in text.lower().split(): if word not in stop_symbols: if word not in wordcount: wordcount[word] = 1 else: wordcount[word] += 1 n_print = 100 print("\nOK. The {} most common words are as follows\n".format(n_print)) word_counter = collections.Counter(wordcount) for word, count in word_counter.most_common(n_print): save(connection, word, count) print(word, ": ", count) file.close()
def post(self): data_string = self.get_body_argument('data_string') sentences_string = self.get_body_argument('sentences_string') svg_string = self.get_body_argument('svg_string') major_dim = self.get_body_argument('major_name') second_dim = self.get_body_argument('second_name') user_name = self.get_body_argument('user_name') total_number = self.get_body_argument('total_number') # logger.info(sentences_string) data = {} data['user_name'] = user_name data['total_number'] = total_number data['data'] = json.loads(data_string) data['svg_string'] = svg_string data['sentences'] = json.loads(sentences_string) data['major_dim'] = major_dim data['second_dim'] = second_dim save(data) self.set_header('Content-Type', 'application/json; charset=UTF-8') self.write(json.dumps({'message': 'ok'})) self.finish()
def get_features(self, filter_args=None): # self.classes = ["cat", "Frog"] # inp = 1 # self.classes = [self.classes[inp]] for i, animal in enumerate(self.classes): current_path = path.join(self.audio_path, animal) sounds = os.listdir(current_path) self.features[i] = [] self.class_to_number[animal] = i self.number_to_class[i] = animal for sound in sounds: if not sound.endswith('.wav'): continue AUDIO_FILE = path.join(current_path, sound) try: audio = MonoLoader(filename=AUDIO_FILE)() except: continue if filter_args: audio = highpass_filter(audio, 44100, **filter_args) feat_ = [] for f in self.funcs_: aux = do_from_name(f, audio, AUDIO_FILE) # print("{0} dice {1}".format(self.classes[i], aux)) # print(len(aux)) if f == "text": if aux == -1: continue aux = aux.split() for w in aux: if not self.dictionary.__contains__(w): self.dictionary[w] = len( self.dictionary.keys()) feat_.extend(aux) if len(feat_) == 0: continue self.features[i].append(feat_) kaux = self.features[0][0] # self.feat_amount = len(self.features[0][0]) self.feat_amount = np.array(self.features[0][0]).shape #joblib.dump(self.features, "features.pkl") save(self.features, "features.json")
if __name__ == '__main__': current_time = time.strftime("%y%m%d_%H%M%S") L = 2000 frames = 1000 num_of_strings = 30 betas = [0., 5., 10., 15., 20.] Rs = [] for beta in betas: R = [] for s in range(num_of_strings): filled_kagome = FilledKagome(beta=beta, L=L, frames=frames) R.append(filled_kagome.R) Rs.append(np.average(R)) save_data.save("results/data/filled_kagome_radius/beta=%2.2f_" % beta, beta=beta, num_of_strings=num_of_strings, L=L, frames=frames, R=R) fig, ax = plt.subplots() # heatmap ax.plot(, path_length, bins=25) ax.set_xlabel('Distance') ax.set_ylabel('Path length') ax.set_title('Path length and distances between two points in the cluster' + r'($\beta = %2.2f$)' % beta)
# betas = [float(i) for i in range(11)] # betas = [20.] frames = 1000 L = (frames + 1) * 2 num_of_pairs = 100 fig, ax = plt.subplots() for beta in betas: print "beta = %2.2f" % beta Lp, Cs = get_correlation(beta, num_of_strings, L, frames, num_of_pairs) ax.plot(Lp, Cs, '.', label=r'$\beta = %2.2f$' % beta) # save the data save_data.save("results/data/correlation/beta=%2.2f_" % beta, num_of_strings=num_of_strings, beta=beta, L=L, frames=frames, Lp=Lp, Cs=Cs) ax.set_xlabel('Path length') ax.set_ylabel('Correlation of the vectors') ax.set_title('Correlation of the vectors') ax.legend(loc='best') result_image_path = "results/img/correlation/strings=%d" % num_of_strings result_image_path += "_" + start_time result_image_path += ".png" plt.savefig(result_image_path) plt.close() print "[saved] " + result_image_path # plt.show()
def mass_for_beta_one(beta, frames_list, N_r=100, num_of_strings=100): frames = np.max(frames_list) center_sample = int(np.min(frames_list) / 2) L = (frames + 1) * 2 def calc_mass_in_r(self, i, s): N = len(s.vec) + 1 if N - 3 not in frames_list: return None pos = list(s.pos.T) x, y = self.lattice_X[pos], self.lattice_Y[pos] X, Y = np.average(x), np.average(y) R = np.sqrt(np.sum((x - X)**2 + (y - Y)**2) / float(N)) dist = np.sqrt((x - X)**2 + (y - Y)**2) r = np.logspace(1, np.log2(max(dist)), num=N_r, base=2.) centers_index = sorted(random.sample(range(N), center_sample)) M = [] for _r in r: res = [] for c in centers_index: index_x, index_y = s.pos[c] dist = np.sqrt((x - self.lattice_X[index_x, index_y])**2 + (y - self.lattice_Y[index_x, index_y])**2) res.append(len(np.where(dist < _r)[0])) M.append(np.average(res)) return np.array([r, M]).T main = Main(Lx=L, Ly=L, plot=False, frames=frames, beta=beta, strings=[{ 'id': 1, 'x': L / 4, 'y': L / 2, 'vec': [0, 4] }], post_function=calc_mass_in_r) _M = np.array([m for m in main.post_func_res if m is not None]) Ms = {frames_list[i]: _M[i] for i in range(len(frames_list))} for s in tqdm(range(num_of_strings - 1)): main = Main(Lx=L, Ly=L, plot=False, frames=frames, beta=beta, strings=[{ 'id': 1, 'x': L / 4, 'y': L / 2, 'vec': [0, 4] }], post_function=calc_mass_in_r) _M = np.array([m for m in main.post_func_res if m is not None]) # print _M.shape for i, frames in enumerate(frames_list): Ms[frames] = np.vstack((Ms[frames], _M[i])) for frames in frames_list: r, M = Ms[frames].T sorted_index = np.argsort(r) r, M = r[sorted_index], M[sorted_index] save_data.save("./results/data/mass_in_r/beta=%2.2f_frames=%d_" % (beta, frames), num_of_strings=num_of_strings, N_r=N_r, beta=beta, L=L, frames=frames, r=r, M=M)
import time, datetime, pythonping, json, save_data with open("data.txt", "r") as f: count = len(json.load(f)) while True: count += 1 with open("data.txt", "r") as f: data = json.load(f) data[str(datetime.datetime.now())] = pythonping.ping('sz.de', size=256, count=1).rtt_avg_ms with open("data.txt", "w") as f: json.dump(data, f) time.sleep(4.9) print(count) if count > 10000: save_data.save() time.sleep(3)
def eval_simulation_for_one_beta(beta, num_of_strings=30): current_time = time.strftime("%y%m%d_%H%M%S") frames = 1000 params = { 'L': (frames + 2) * 2, 'frames': frames, 'beta': beta, 'plot': False } d = {k: {} for k in result_set.keys()} for s in tqdm(range(num_of_strings)): # _Ls, _N_sub = eval_simulation_for_each_string(params) _Ls, _res = eval_simulation_for_each_string(params) for k in result_set.keys(): for i, l in enumerate(_Ls): if d[k].has_key(l): d[k][l].append(_res[k][i]) else: d[k][l] = [_res[k][i],] if d.has_key('num_of_sub_clusters'): # # 以下のやり方だと,Lが存在しないサンプルに対して無視した結果となる # mean = [(k, np.average(v)) for k, v in d.items()] # カットサイズLが存在しない場合には0で置き換えたような平均のとり方 mean = [(k, np.sum(v) / float(num_of_strings)) for k, v in d['num_of_sub_clusters'].items()] Ls, N_sub = np.array(sorted(mean)).T else: N_sub = [] if d.has_key('size_dist_of_sub_clusters'): size_dist = {} for k, v in d['size_dist_of_sub_clusters'].items(): # size_dist[k] = map(sum, itertools.izip_longest(*v, fillvalue=0)) size_dist[k] = map(sum, itertools.izip_longest(*v, fillvalue=1)) Ls = sorted(size_dist.keys()) size_dist = [size_dist[k] for k in Ls] S = np.zeros((len(size_dist), max(map(len, size_dist)))) for i, s in enumerate(size_dist): for j, num in enumerate(s): S[i][j] = num size_dist = S else: size_dist = [] import numpy.ma as ma def masked_average(arr): return ma.array(arr, mask=np.array(arr) == -1).mean() if d.has_key('size_dist_ave_of_sub_clusters'): size_dist_ave = {} for k, v in d['size_dist_ave_of_sub_clusters'].items(): num_when_L = itertools.izip_longest(*v, fillvalue=-1) size_dist_ave[k] = map(masked_average, num_when_L) Ls = sorted(size_dist_ave.keys()) size_dist_ave = [size_dist_ave[k] for k in Ls] S = np.zeros((len(size_dist_ave), max(map(len, size_dist_ave)))) for i, s in enumerate(size_dist_ave): for j, num in enumerate(s): S[i][j] = num size_dist_ave = S else: size_dist_ave = [] save_data.save("../results/data/diecutting/beta=%2.2f_" % beta, beta=beta, num_of_strings=num_of_strings, L=params['L'], frames=params['frames'], Ls=Ls, N_sub=N_sub, size_dist=size_dist, size_dist_ave=size_dist_ave)
import docx import save_data import os path = '.\data_all' save_path = '.\save_data' files = os.listdir(path) if not os.path.exists(save_path): os.makedirs(save_path) save_data.delete(save_path) for file in files: if not os.path.isdir(file): doc = docx.Document(path + '/' + file) for para in doc.paragraphs: data = para.text if len(data) > 20: r1, r2, r3 = save_data.match(data) file_new = file.replace('.docx', '.csv') save_data.save(r1, r2, r3, save_path + '/' + file_new)