Пример #1
0
 def testChunks(self):
     arr = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     chunked = tools.chunks(arr, 3)
     i = 0
     total = 0
     for chunk in chunked:
         total += sum(chunk)
         i += 1
     self.assertEqual(total, 45)
     self.assertEqual(i, 3)
Пример #2
0
 def testChunks(self):
     arr = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     chunked = tools.chunks(arr, 3)
     i = 0
     total = 0
     for chunk in chunked:
         total += sum(chunk)
         i += 1
     self.assertEqual(total, 45)
     self.assertEqual(i, 3)
Пример #3
0
 def get_school_holidays(self):
     range_name = "school_data!X2:X"
     range_values = self.service.spreadsheets().values().get(
         spreadsheetId=self.spreadsheet_id, range=range_name).execute()
     values = [
         datetime.datetime.strptime(x[0], "%d/%m/%Y")
         for x in range_values.get('values', [])
     ]
     grouped = tools.chunks(values, 2)
     r = list(grouped)
     return r
    def iter_parts(self, parts=range(4)):
        '''Iterate over parts in self.trials
        part 0: trials 1-30
        part 1: trials 31-60
        part 2: trials 61-90
        part 3: trials 91-120'''

        if type(parts) is not list:
            parts = [parts]
        assert all([0 <= t < 4 for t in parts])

        trial_chunks = list(tls.chunks(self.trials, 30))
        for i in parts:
            yield i, trial_chunks[i]
Пример #5
0
    def __construct_data(self):
        indexed_content = []
        for block in self.block_image.blocks:
            itt = ImageToText(block)
            char_index = self.char_database.get(str(itt), 32)
            indexed_content.append(str(char_index))

        line_count = DATA_FIRST_LINE
        data_lines = []
        for data in chunks(indexed_content, 16):
            line = DATA_TEMPLATE.format(line=line_count,
                                        content=",".join(data))
            data_lines.append(line)
            line_count += 10

        return data_lines
Пример #6
0
    def run(self,
            circuits,
            meas_qubits=None,
            measure=None,
            count_chunks=False):
        """
        :param circuits: list of QuantumCircuit or QuantumCircuit
        :param meas_qubits: list of qubits that will be measured.
        :param measure: optional. By default after channel transformation we do tomography.
        Not implemented now
        :return: depend on measure parameter. By default, it's list of density matrix
        """
        if measure is not None:
            raise NotImplementedError

        if isinstance(circuits, QuantumCircuit):
            circuits = [circuits]

        meas_qubits, s_matrix = sort_list_and_transformation_matrix(
            meas_qubits)
        number_measure_experiments = 3**len(meas_qubits)

        jobs = []
        for qc in circuits:
            qr = qc.qregs[0]
            tomo_circuits = state_tomography_circuits(
                qc, [qr[qubit_number] for qubit_number in meas_qubits])
            jobs.extend(tomo_circuits)

        res = None
        for i, chunk_jobs in enumerate(chunks(jobs, self.max_jobs_per_one)):
            if count_chunks:
                print(f'chunk number: {i + 1}')
            execute_kwargs = {
                'experiments': chunk_jobs,
                'backend': self.backend,
                'shots': self.shots,
                'max_credits': 15
            }

            new_res = execute(**execute_kwargs).result()

            if res is None:
                res = new_res
            else:
                res += new_res

        matrices = []
        for i in range(int(len(res.results) / number_measure_experiments)):
            res_matrix = copy(res)
            res_matrix.results = res.results[i *
                                             number_measure_experiments:(i +
                                                                         1) *
                                             number_measure_experiments]

            rho = StateTomographyFitter(
                res_matrix, jobs[i * number_measure_experiments:(i + 1) *
                                 number_measure_experiments]).fit()
            rho = np.linalg.inv(s_matrix) @ rho @ s_matrix
            matrices.append(rho)
        return matrices
Пример #7
0
    max_colm_norm=False, max_norm=15.0,
    norm_gsup=norm_gsup, norm_gh=norm_gh)

eval_fn, eval_fn_tr = fns["eval_fn"], fns["eval_fn_tr"]
# Things to track during training: epoch and minibatch
train_stats = {"tr_error_ep": [], "vl_error_ep": [], "tr_cost_ep": [],
               "tr_error_mn": [], "vl_error_mn": [], "tr_cost_mn": [],
               "current_nb_mb": 0, "best_epoch": 0, "best_mn": 0}

names = []
for l, i in zip(layers, range(len(layers))):
    if l["hint"] is not None:
        names.append(i)
debug = {"grad_sup": [], "grad_hint": [], "penalty": [], "names": names}
# Eval before start training
l_vl = chunks(range(validx.shape[0]), valid_batch_size)
l_tr = chunks(range(trainx.shape[0]), valid_batch_size)
vl_err_start = np.mean(
    [eval_fn(np.array(l_vl[kk])) for kk in range(len(l_vl))])
tr_err_start = np.mean(
    [eval_fn_tr(np.array(l_tr[kk])) for kk in range(len(l_tr))])
print vl_err_start, tr_err_start

# Exp stamp
time_exp = DT.datetime.now().strftime('%m_%d_%Y_%H_%M_%s')
tag_text = "_".join([str(l["hint"]) for l in layers])
h_exp = "_".join([str(e) for e in h_ind])
fold_exp = "exps/" + tag + "_" + str(nbr_sup) + "_" + h_exp + "_" +\
    size_model + "_" + time_exp
if not os.path.exists(fold_exp):
    os.makedirs(fold_exp)
Пример #8
0
    def get(self):

        results = self.credentials.user_playlist(self.user.user_id,
                                                 self.id,
                                                 fields="tracks,next")

        for i, item in enumerate(results['tracks']['items']):
            dtc = {}
            dtc['Title'] = item['track']['name']
            dtc['TrackId'] = item['track']['id']
            dtc['Album'] = item['track']['album']['name']
            dtc['AlbumId'] = item['track']['album']['id']
            dtc['Comments'] = ''
            try:
                dtc['Artist'] = item['track']['album']['artists'][0]['name']
            except IndexError:
                dtc['Artist'] = 'none'
            try:  # only take first artist ID
                dtc['ArtistId'] = item['track']['album']['artists'][0]['id']
            except IndexError:
                dtc['ArtistId'] = 'none'

            self.tracks_json[i] = dtc

        outside_track_list = chunks(
            self.tracks_json,
            20)  # this is to avoid hitting the API id count limit

        for j, tracks in enumerate(outside_track_list):

            track_ids = []
            for track in tracks:
                track_ids.append(track['TrackId'])

            artist_ids = []
            for track in tracks:
                artist_ids.append(track['ArtistId'])

            album_ids = []
            for track in tracks:
                album_ids.append(track['AlbumId'])

            # get album details
            album_meta = self.credentials.albums(albums=album_ids)
            for i, track in enumerate(tracks):
                track['ReleaseDate'] = album_meta['albums'][i]['release_date']
                track['RecordLabel'] = album_meta['albums'][i]['label']

            # Get artist details
            artist_meta = self.credentials.artists(artists=artist_ids)
            for i, track in enumerate(tracks):
                try:  # tags will always refer to first artist listed
                    tags = '; '.join(artist_meta['artists'][i]['genres'][:4])
                    track['ArtistTags'] = tags
                except IndexError:
                    track['ArtistTags'] = 'none'

            # get track features data
            feature_meta = self.credentials.audio_features(tracks=track_ids)
            for i, track in enumerate(tracks):
                track['danceability'] = feature_meta[i]['danceability']
                track['energy'] = feature_meta[i]['energy']
                track['key'] = feature_meta[i]['key']
                track['loudness'] = feature_meta[i]['loudness']
                track['mode'] = feature_meta[i]['mode']
                track['speechiness'] = feature_meta[i]['speechiness']
                track['acousticness'] = feature_meta[i]['acousticness']
                track['instrumentalness'] = feature_meta[i]['instrumentalness']
                track['liveness'] = feature_meta[i]['liveness']
                track['valence'] = feature_meta[i]['valence']
                track['tempo'] = feature_meta[i]['tempo']

            if j == 0:
                main_df = pd.DataFrame.from_dict(tracks)

            else:
                sub_df = pd.DataFrame.from_dict(tracks)
                main_df = main_df.append(sub_df)

        column_order = ('Title', 'Album', 'Artist', 'ArtistTags',
                        'ReleaseDate', 'RecordLabel', 'Comments', 'TrackId',
                        'AlbumId', 'ArtistId', 'acousticness', 'danceability',
                        'energy', 'instrumentalness', 'key', 'liveness',
                        'loudness', 'mode', 'speechiness', 'tempo', 'valence')

        main_df = main_df.ix[:, column_order]
        return main_df
Пример #9
0
def test(nbr_sampler, config):
    data = nbr_sampler.data
    multilabel, device = config.multilabel, config.device
    model, _ = load_model(config, data)
    model.eval()

    # _, prior = torch.unique(data.y[data.train_mask == 1], return_counts=True)
    # prior = prior.to(torch.float32)/prior.sum()
    # prior = prior.to(device)

    city2ix = {c: i for i, c in enumerate(data.cities)}
    if multilabel:
        test_cities = [
            'co_Russia.r1_CentralRussia',
            'co_Russia.r1_CentralRussia.r2_MosObl',
            'co_Russia.r1_CentralRussia.r2_MosObl.ci_Moscow',
            'co_Russia.r1_NorthWest',
            'co_Russia.r1_NorthWest.r2_SpbObl',
            'co_Russia.r1_NorthWest.r2_SpbObl.ci_SaintPetersburg',
            'co_Russia.r1_Urals',
            'co_Russia.r1_Urals.r2_Sverdlovsk',
            'co_Russia.r1_Urals.r2_Sverdlovsk.ci_Yekaterinburg',
            'co_Russia.r1_FarEast',
            'co_Russia.r1_FarEast.r2_Yakutia',
            'co_Russia.r1_FarEast.r2_Yakutia.ci_Yakutsk',
            'co_Russia.r1_Siberia',
            'co_Russia.r1_Siberia.r2_Chita',
            'co_Russia.r1_Siberia.r2_Chita.ci_Chita',
        ]
    else:
        test_cities = [
            'p_Europe.co_Russia.r1_CentralRussia.r2_MosObl.ci_Moscow',
            'p_Europe.co_Russia.r1_NorthWest.r2_SpbObl.ci_SaintPetersburg',
            'p_Europe.co_Russia.r1_Urals.r2_Sverdlovsk.ci_Yekaterinburg',
            'p_Europe.co_Russia.r1_Siberia.r2_Novosibirsk.ci_Novosibirsk',
            'p_Europe.co_Belorussia.r1_MinskObl.ci_Minsk',
            'p_Europe.co_Russia.r1_CentralRussia.r2_Tula.ci_Tula',
            'p_Europe.co_Russia.r1_Siberia.r2_Chita.ci_Chita',
            'p_Europe.co_Russia.r1_CentralRussia.r2_Bryansk.ci_Bryansk',
            'p_Europe.co_Russia.r1_FarEast.r2_Yakutia.ci_Yakutsk',
            # 'p_Europe.co_Russia.r1_CentralRussia.r2_Orel',
        ]

    ixs = [city2ix[c] for c in test_cities]
    if multilabel:
        plt_ixs = list(chunks(ixs, 3))

    probs, preds, y = [], [], []
    with torch.no_grad():
        for data_flow in nbr_sampler(data.test_mask):
            if multilabel:
                y_cur = data.y[data_flow.n_id]
            else:
                y_cur = data.y[data_flow.n_id].numpy()
            x = slice_data_flow(config, data, data_flow)
            logits = model(x, data_flow.to(device))
            probs.append(torch.exp(logits).cpu().detach().numpy())
            pred = logits.max(1)[1]
            preds.append(pred.cpu().detach().numpy())
            y.append(y_cur)

    probs = np.concatenate(probs)
    preds = np.concatenate(preds)
    y = np.concatenate(y)

    if not multilabel:
        ntest = data.test_mask.sum().item()
        accuracy = (preds == y).sum() / ntest
        print(accuracy)
        Y = label_binarize(y, classes=ixs)

        # ids = load_vector("../data/vk/nodes.bin", 'I')
        # df = pd.DataFrame(probs)
        # df.columns = data.cities
        # df.index = [ids[i] for i in data_flow.n_id]
        # df.to_csv('../data/vk/preds_example.csv')
    else:
        Y = y

        df = pd.DataFrame()
        df['geo'] = data.cities
        # choose thresholds suche that precision >= precision_min
        for th in [config.precision_min1, config.precision_min2]:
            recalls, thresholds = [], []
            for i in range(Y.shape[1]):
                precision, recall, _thresholds = precision_recall_curve(
                    Y[:, i], probs[:, i])
                wix = np.where(precision >= th)[0][0]
                if wix == len(_thresholds):
                    thresholds.append(1)
                    recalls.append(0)
                else:
                    thresholds.append(_thresholds[wix])
                    recalls.append(recall[wix])

            df['recall_%.2f' % th] = recalls
            df['threshold_%.2f' % th] = thresholds

        df.to_csv(config.join('thresholds.csv'), index=False)

    def plot_precision_recall(Y, probs, ixs, fname):
        plt.clf()
        for i in ixs:
            precision, recall, _ = precision_recall_curve(Y[:, i], probs[:, i])
            plt.plot(recall,
                     precision,
                     lw=1,
                     label=data.cities[i].split('.')[-1])

        plt.legend(fontsize='small', loc='lower left')
        plt.xlabel("recall")
        plt.ylabel("precision")
        plt.title("precision vs. recall curve")
        plt.savefig(fname, dpi=900)

    fname = config.join('precision_recall%s_all.pdf' % config.postfix)
    plot_precision_recall(Y, probs, ixs, fname)
    if multilabel:
        for i, sub_ixs in enumerate(plt_ixs):
            fname = config.join('precision_recall%s_%s.pdf' %
                                (config.postfix, i))
            plot_precision_recall(Y, probs, sub_ixs, fname)
    def pullFeatureForSong(self, feature_name, song_id, pack_size):
        """
            Returns an array with elements of type <feature_name>.

            The song is split into n number of sample packs. Each sample pack is of size pack_size.
            The feature is evaluated on each sample pack and filled into the returned array (in same order).

            If no data is already present in the database for the given pack_size, the feature will be evaluated for the whole song and added to the database.

            @param feature_name: the feature to get
            @param song_id: the song to get the feature for
            @param pack_size: the size of the packets to split the song into. (-1 to apply the feature to the whole song)dw
        """

        logging.debug("song_id " + str(song_id) + ":  " + feature_name + " with pack size " + str(pack_size) + " searching...")
        # PULL IF FEATURE THAT ALREADY EXISTS
        self.cur.execute("SELECT data FROM FeatureData WHERE song_id=%s AND feature_name=%s AND pack_size=%s", (song_id, feature_name, pack_size))

        class_ = getattr(features, feature_name)  # prepare the class for instantiation
        packList = []

        row = self.cur.fetchone()

        if row:  # RETURN THE LIST OF FEATURES

            # in the case that class_.requireFullSong == True, their should only be one row. But is returned in the same
            # way.

            while row:
                feature = class_.unserialize(row[0])
                packList.append(feature)
                row = self.cur.fetchone()

        else:  # NO ELEMENTS FOUND, COMPUTE
            raw_data = self.fetchSongData(song_id)  # fetch raw data from file on disk
            raw_chunks = []

            if pack_size == -1:
                raise "pack_size == -1 not yet supported."
            else:
                raw_chunks = tools.chunks(raw_data, pack_size)  # Split the song into chunks of size pack_size. this will be processed by the feature

            logging.debug("Feature Data not found. Generating feature data for " +
                          str(len(raw_data) / pack_size) + " packs...")


            if class_.requireFullSong: # single feature creation for full song
                feature = class_(raw_chunks)
                packList.append(feature)
                serialized = feature.serialize()
                logging.debug(feature_name + ".value = " + str(feature.value))
                self.cur.execute("INSERT INTO FeatureData (song_id, pack_index, feature_name, pack_size, data) VALUES (%s, %s, %s, %s, %s)",
                                 (song_id, 0, feature_name, pack_size, serialized))

            else: # feature creation for single sample pack
                # iterate and create a feature for each one to save to the DB
                pack_index = 0
                for dataPack in raw_chunks:
                    feature = class_(dataPack)
                    packList.append(feature)
                    serialized = feature.serialize()
                    self.cur.execute("INSERT INTO FeatureData (song_id, pack_index, feature_name, pack_size, data) VALUES (%s, %s, %s, %s, %s)",
                                     (song_id, pack_index, feature_name, pack_size, serialized))
                    pack_index += pack_size

            self.commit()  # commit the insertions

        return packList