def testChunks(self): arr = [1, 2, 3, 4, 5, 6, 7, 8, 9] chunked = tools.chunks(arr, 3) i = 0 total = 0 for chunk in chunked: total += sum(chunk) i += 1 self.assertEqual(total, 45) self.assertEqual(i, 3)
def get_school_holidays(self): range_name = "school_data!X2:X" range_values = self.service.spreadsheets().values().get( spreadsheetId=self.spreadsheet_id, range=range_name).execute() values = [ datetime.datetime.strptime(x[0], "%d/%m/%Y") for x in range_values.get('values', []) ] grouped = tools.chunks(values, 2) r = list(grouped) return r
def iter_parts(self, parts=range(4)): '''Iterate over parts in self.trials part 0: trials 1-30 part 1: trials 31-60 part 2: trials 61-90 part 3: trials 91-120''' if type(parts) is not list: parts = [parts] assert all([0 <= t < 4 for t in parts]) trial_chunks = list(tls.chunks(self.trials, 30)) for i in parts: yield i, trial_chunks[i]
def __construct_data(self): indexed_content = [] for block in self.block_image.blocks: itt = ImageToText(block) char_index = self.char_database.get(str(itt), 32) indexed_content.append(str(char_index)) line_count = DATA_FIRST_LINE data_lines = [] for data in chunks(indexed_content, 16): line = DATA_TEMPLATE.format(line=line_count, content=",".join(data)) data_lines.append(line) line_count += 10 return data_lines
def run(self, circuits, meas_qubits=None, measure=None, count_chunks=False): """ :param circuits: list of QuantumCircuit or QuantumCircuit :param meas_qubits: list of qubits that will be measured. :param measure: optional. By default after channel transformation we do tomography. Not implemented now :return: depend on measure parameter. By default, it's list of density matrix """ if measure is not None: raise NotImplementedError if isinstance(circuits, QuantumCircuit): circuits = [circuits] meas_qubits, s_matrix = sort_list_and_transformation_matrix( meas_qubits) number_measure_experiments = 3**len(meas_qubits) jobs = [] for qc in circuits: qr = qc.qregs[0] tomo_circuits = state_tomography_circuits( qc, [qr[qubit_number] for qubit_number in meas_qubits]) jobs.extend(tomo_circuits) res = None for i, chunk_jobs in enumerate(chunks(jobs, self.max_jobs_per_one)): if count_chunks: print(f'chunk number: {i + 1}') execute_kwargs = { 'experiments': chunk_jobs, 'backend': self.backend, 'shots': self.shots, 'max_credits': 15 } new_res = execute(**execute_kwargs).result() if res is None: res = new_res else: res += new_res matrices = [] for i in range(int(len(res.results) / number_measure_experiments)): res_matrix = copy(res) res_matrix.results = res.results[i * number_measure_experiments:(i + 1) * number_measure_experiments] rho = StateTomographyFitter( res_matrix, jobs[i * number_measure_experiments:(i + 1) * number_measure_experiments]).fit() rho = np.linalg.inv(s_matrix) @ rho @ s_matrix matrices.append(rho) return matrices
max_colm_norm=False, max_norm=15.0, norm_gsup=norm_gsup, norm_gh=norm_gh) eval_fn, eval_fn_tr = fns["eval_fn"], fns["eval_fn_tr"] # Things to track during training: epoch and minibatch train_stats = {"tr_error_ep": [], "vl_error_ep": [], "tr_cost_ep": [], "tr_error_mn": [], "vl_error_mn": [], "tr_cost_mn": [], "current_nb_mb": 0, "best_epoch": 0, "best_mn": 0} names = [] for l, i in zip(layers, range(len(layers))): if l["hint"] is not None: names.append(i) debug = {"grad_sup": [], "grad_hint": [], "penalty": [], "names": names} # Eval before start training l_vl = chunks(range(validx.shape[0]), valid_batch_size) l_tr = chunks(range(trainx.shape[0]), valid_batch_size) vl_err_start = np.mean( [eval_fn(np.array(l_vl[kk])) for kk in range(len(l_vl))]) tr_err_start = np.mean( [eval_fn_tr(np.array(l_tr[kk])) for kk in range(len(l_tr))]) print vl_err_start, tr_err_start # Exp stamp time_exp = DT.datetime.now().strftime('%m_%d_%Y_%H_%M_%s') tag_text = "_".join([str(l["hint"]) for l in layers]) h_exp = "_".join([str(e) for e in h_ind]) fold_exp = "exps/" + tag + "_" + str(nbr_sup) + "_" + h_exp + "_" +\ size_model + "_" + time_exp if not os.path.exists(fold_exp): os.makedirs(fold_exp)
def get(self): results = self.credentials.user_playlist(self.user.user_id, self.id, fields="tracks,next") for i, item in enumerate(results['tracks']['items']): dtc = {} dtc['Title'] = item['track']['name'] dtc['TrackId'] = item['track']['id'] dtc['Album'] = item['track']['album']['name'] dtc['AlbumId'] = item['track']['album']['id'] dtc['Comments'] = '' try: dtc['Artist'] = item['track']['album']['artists'][0]['name'] except IndexError: dtc['Artist'] = 'none' try: # only take first artist ID dtc['ArtistId'] = item['track']['album']['artists'][0]['id'] except IndexError: dtc['ArtistId'] = 'none' self.tracks_json[i] = dtc outside_track_list = chunks( self.tracks_json, 20) # this is to avoid hitting the API id count limit for j, tracks in enumerate(outside_track_list): track_ids = [] for track in tracks: track_ids.append(track['TrackId']) artist_ids = [] for track in tracks: artist_ids.append(track['ArtistId']) album_ids = [] for track in tracks: album_ids.append(track['AlbumId']) # get album details album_meta = self.credentials.albums(albums=album_ids) for i, track in enumerate(tracks): track['ReleaseDate'] = album_meta['albums'][i]['release_date'] track['RecordLabel'] = album_meta['albums'][i]['label'] # Get artist details artist_meta = self.credentials.artists(artists=artist_ids) for i, track in enumerate(tracks): try: # tags will always refer to first artist listed tags = '; '.join(artist_meta['artists'][i]['genres'][:4]) track['ArtistTags'] = tags except IndexError: track['ArtistTags'] = 'none' # get track features data feature_meta = self.credentials.audio_features(tracks=track_ids) for i, track in enumerate(tracks): track['danceability'] = feature_meta[i]['danceability'] track['energy'] = feature_meta[i]['energy'] track['key'] = feature_meta[i]['key'] track['loudness'] = feature_meta[i]['loudness'] track['mode'] = feature_meta[i]['mode'] track['speechiness'] = feature_meta[i]['speechiness'] track['acousticness'] = feature_meta[i]['acousticness'] track['instrumentalness'] = feature_meta[i]['instrumentalness'] track['liveness'] = feature_meta[i]['liveness'] track['valence'] = feature_meta[i]['valence'] track['tempo'] = feature_meta[i]['tempo'] if j == 0: main_df = pd.DataFrame.from_dict(tracks) else: sub_df = pd.DataFrame.from_dict(tracks) main_df = main_df.append(sub_df) column_order = ('Title', 'Album', 'Artist', 'ArtistTags', 'ReleaseDate', 'RecordLabel', 'Comments', 'TrackId', 'AlbumId', 'ArtistId', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo', 'valence') main_df = main_df.ix[:, column_order] return main_df
def test(nbr_sampler, config): data = nbr_sampler.data multilabel, device = config.multilabel, config.device model, _ = load_model(config, data) model.eval() # _, prior = torch.unique(data.y[data.train_mask == 1], return_counts=True) # prior = prior.to(torch.float32)/prior.sum() # prior = prior.to(device) city2ix = {c: i for i, c in enumerate(data.cities)} if multilabel: test_cities = [ 'co_Russia.r1_CentralRussia', 'co_Russia.r1_CentralRussia.r2_MosObl', 'co_Russia.r1_CentralRussia.r2_MosObl.ci_Moscow', 'co_Russia.r1_NorthWest', 'co_Russia.r1_NorthWest.r2_SpbObl', 'co_Russia.r1_NorthWest.r2_SpbObl.ci_SaintPetersburg', 'co_Russia.r1_Urals', 'co_Russia.r1_Urals.r2_Sverdlovsk', 'co_Russia.r1_Urals.r2_Sverdlovsk.ci_Yekaterinburg', 'co_Russia.r1_FarEast', 'co_Russia.r1_FarEast.r2_Yakutia', 'co_Russia.r1_FarEast.r2_Yakutia.ci_Yakutsk', 'co_Russia.r1_Siberia', 'co_Russia.r1_Siberia.r2_Chita', 'co_Russia.r1_Siberia.r2_Chita.ci_Chita', ] else: test_cities = [ 'p_Europe.co_Russia.r1_CentralRussia.r2_MosObl.ci_Moscow', 'p_Europe.co_Russia.r1_NorthWest.r2_SpbObl.ci_SaintPetersburg', 'p_Europe.co_Russia.r1_Urals.r2_Sverdlovsk.ci_Yekaterinburg', 'p_Europe.co_Russia.r1_Siberia.r2_Novosibirsk.ci_Novosibirsk', 'p_Europe.co_Belorussia.r1_MinskObl.ci_Minsk', 'p_Europe.co_Russia.r1_CentralRussia.r2_Tula.ci_Tula', 'p_Europe.co_Russia.r1_Siberia.r2_Chita.ci_Chita', 'p_Europe.co_Russia.r1_CentralRussia.r2_Bryansk.ci_Bryansk', 'p_Europe.co_Russia.r1_FarEast.r2_Yakutia.ci_Yakutsk', # 'p_Europe.co_Russia.r1_CentralRussia.r2_Orel', ] ixs = [city2ix[c] for c in test_cities] if multilabel: plt_ixs = list(chunks(ixs, 3)) probs, preds, y = [], [], [] with torch.no_grad(): for data_flow in nbr_sampler(data.test_mask): if multilabel: y_cur = data.y[data_flow.n_id] else: y_cur = data.y[data_flow.n_id].numpy() x = slice_data_flow(config, data, data_flow) logits = model(x, data_flow.to(device)) probs.append(torch.exp(logits).cpu().detach().numpy()) pred = logits.max(1)[1] preds.append(pred.cpu().detach().numpy()) y.append(y_cur) probs = np.concatenate(probs) preds = np.concatenate(preds) y = np.concatenate(y) if not multilabel: ntest = data.test_mask.sum().item() accuracy = (preds == y).sum() / ntest print(accuracy) Y = label_binarize(y, classes=ixs) # ids = load_vector("../data/vk/nodes.bin", 'I') # df = pd.DataFrame(probs) # df.columns = data.cities # df.index = [ids[i] for i in data_flow.n_id] # df.to_csv('../data/vk/preds_example.csv') else: Y = y df = pd.DataFrame() df['geo'] = data.cities # choose thresholds suche that precision >= precision_min for th in [config.precision_min1, config.precision_min2]: recalls, thresholds = [], [] for i in range(Y.shape[1]): precision, recall, _thresholds = precision_recall_curve( Y[:, i], probs[:, i]) wix = np.where(precision >= th)[0][0] if wix == len(_thresholds): thresholds.append(1) recalls.append(0) else: thresholds.append(_thresholds[wix]) recalls.append(recall[wix]) df['recall_%.2f' % th] = recalls df['threshold_%.2f' % th] = thresholds df.to_csv(config.join('thresholds.csv'), index=False) def plot_precision_recall(Y, probs, ixs, fname): plt.clf() for i in ixs: precision, recall, _ = precision_recall_curve(Y[:, i], probs[:, i]) plt.plot(recall, precision, lw=1, label=data.cities[i].split('.')[-1]) plt.legend(fontsize='small', loc='lower left') plt.xlabel("recall") plt.ylabel("precision") plt.title("precision vs. recall curve") plt.savefig(fname, dpi=900) fname = config.join('precision_recall%s_all.pdf' % config.postfix) plot_precision_recall(Y, probs, ixs, fname) if multilabel: for i, sub_ixs in enumerate(plt_ixs): fname = config.join('precision_recall%s_%s.pdf' % (config.postfix, i)) plot_precision_recall(Y, probs, sub_ixs, fname)
def pullFeatureForSong(self, feature_name, song_id, pack_size): """ Returns an array with elements of type <feature_name>. The song is split into n number of sample packs. Each sample pack is of size pack_size. The feature is evaluated on each sample pack and filled into the returned array (in same order). If no data is already present in the database for the given pack_size, the feature will be evaluated for the whole song and added to the database. @param feature_name: the feature to get @param song_id: the song to get the feature for @param pack_size: the size of the packets to split the song into. (-1 to apply the feature to the whole song)dw """ logging.debug("song_id " + str(song_id) + ": " + feature_name + " with pack size " + str(pack_size) + " searching...") # PULL IF FEATURE THAT ALREADY EXISTS self.cur.execute("SELECT data FROM FeatureData WHERE song_id=%s AND feature_name=%s AND pack_size=%s", (song_id, feature_name, pack_size)) class_ = getattr(features, feature_name) # prepare the class for instantiation packList = [] row = self.cur.fetchone() if row: # RETURN THE LIST OF FEATURES # in the case that class_.requireFullSong == True, their should only be one row. But is returned in the same # way. while row: feature = class_.unserialize(row[0]) packList.append(feature) row = self.cur.fetchone() else: # NO ELEMENTS FOUND, COMPUTE raw_data = self.fetchSongData(song_id) # fetch raw data from file on disk raw_chunks = [] if pack_size == -1: raise "pack_size == -1 not yet supported." else: raw_chunks = tools.chunks(raw_data, pack_size) # Split the song into chunks of size pack_size. this will be processed by the feature logging.debug("Feature Data not found. Generating feature data for " + str(len(raw_data) / pack_size) + " packs...") if class_.requireFullSong: # single feature creation for full song feature = class_(raw_chunks) packList.append(feature) serialized = feature.serialize() logging.debug(feature_name + ".value = " + str(feature.value)) self.cur.execute("INSERT INTO FeatureData (song_id, pack_index, feature_name, pack_size, data) VALUES (%s, %s, %s, %s, %s)", (song_id, 0, feature_name, pack_size, serialized)) else: # feature creation for single sample pack # iterate and create a feature for each one to save to the DB pack_index = 0 for dataPack in raw_chunks: feature = class_(dataPack) packList.append(feature) serialized = feature.serialize() self.cur.execute("INSERT INTO FeatureData (song_id, pack_index, feature_name, pack_size, data) VALUES (%s, %s, %s, %s, %s)", (song_id, pack_index, feature_name, pack_size, serialized)) pack_index += pack_size self.commit() # commit the insertions return packList