def load_arithmetic_data():
    data = pd.read_csv("./data/arithmetic-data.csv")
    equations = list(data['input'])
    answers = list(data['output'])
    print(answers[0])

    equations_of_chars = [[c for c in str(equation)] for equation in equations]
    answers_of_chars = [[c for c in str(answer)] for answer in answers]
    index_to_char = {i: str(i) for i in range(1, 10)}
    index_to_char[len(index_to_char.keys())+1] = '+'
    index_to_char[len(index_to_char.keys())+1] = '*'
    index_to_char[len(index_to_char.keys())+1] = '-'
    vocab_size = len(index_to_char.keys())
    char_to_index = {v:k for k,v in index_to_char.iteritems()}

    max_len = max(np.max([len(equation_of_chars) for equation_of_chars in equations_of_chars]), np.max([len(answer_of_chars) for answer_of_chars in answers_of_chars]))
    n_equations = len(equations_of_chars)

    X = np.zeros(shape=(n_equations, max_len, vocab_size), dtype='float32')
    y = np.zeros(shape=(n_equations, max_len, vocab_size), dtype='float32')

    label_to_index = {'negative': 0, 'neutral': 1, 'positive':2}
    for sentence_index in range(n_equations):
        current_sentence = equations_of_chars[sentence_index]
        for current_char_position in range(len(current_sentence)):
            char = equations_of_chars[sentence_index][current_char_position]
            token_index = char_to_index[char]
            X[sentence_index][current_char_position][token_index] = 1

        current_answer = answers_of_chars[sentence_index]
        for current_char_position in range(len(current_answer)):
            char = answers_of_chars[sentence_index][current_char_position]
            token_index = char_to_index[char]
            y[sentence_index][current_char_position][token_index] = 1
    return X, y, index_to_char, equations, answers, max_len
示例#2
0
def relu(feature_map):
    #Preparing the output of the ReLU activation function.
    relu_out = np.zeros(feature_map.shape)
    for map_num in range(feature_map.shape[-1]):
        for r in np.arange(0,feature_map.shape[0]):
            for c in np.arange(0, feature_map.shape[1]):
                relu_out[r, c, map_num] = np.max([feature_map[r, c, map_num], 0])
    return relu_out
示例#3
0
def pooling(feature_map, size=2, stride=2):
    #Preparing the output of the pooling operation.
    pool_out = np.zeros((np.uint16((feature_map.shape[0]-size+1)/stride+1),
                            np.uint16((feature_map.shape[1]-size+1)/stride+1),
                            feature_map.shape[-1]))
    for map_num in range(feature_map.shape[-1]):
        r2 = 0
        for r in np.arange(0,feature_map.shape[0]-size+1, stride):
            c2 = 0
            for c in np.arange(0, feature_map.shape[1]-size+1, stride):
                pool_out[r2, c2, map_num] = np.max([feature_map[r:r+size,  c:c+size, map_num]])
                c2 = c2 + 1
            r2 = r2 +1
    return pool_out
示例#4
0
def save(stories, query, error=False, exc="", name=""):
    if error:
        print(f"Ran into error!")
    print(f"fetched {len(stories)} total!")
    if stories:
        query["last_processed_id"] = stories[-1]["processed_stories_id"]
    else:
        query["last_processed_id"] = 0

    df = pd.DataFrame(stories)
    DATA_FILENAME = f"{name}_us_mainstream_stories.tsv"
    METADATA_FILENAME = f"{name}_metadata.json"

    # chuck the whole thing into a df
    # append without headers if the file exists already, otherwise create a new file
    if os.path.exists(DATA_FILENAME):
        with open(DATA_FILENAME, "at") as f:
            df.to_csv(f, sep="\t", header=False)
    else:
        with open(DATA_FILENAME, "wt") as f:
            df.to_csv(f, sep="\t", header=True)

    print(f"Saved to {DATA_FILENAME}")

    # always rescan to get the latest date
    # df_all = pd.read_csv(DATA_FILENAME, sep='\t')
    latest_date = str(np.max(pd.to_datetime(df["publish_date"])).date())
    new_metadata = {
        "error": error,
        "exc": exc,
        "last_query": query,
        "latest": latest_date,
    }

    with open(METADATA_FILENAME, "wt") as f:
        f.write(json.dumps(new_metadata))
    print(f"Metadata saved to {METADATA_FILENAME}")
示例#5
0
文件: utilty.py 项目: atzanbar/ass1
def softmax(x):
    m = np.max(x)
    e = np.exp(x - m)
    return e / e.sum()
示例#6
0
    def query(self, n, model, train_dataset, pool_dataset, budget=10000):
        device = model.state_dict()['softmax.bias'].device

        full_dataset = ConcatDataset([pool_dataset, train_dataset])
        pool_len = len(pool_dataset)

        self.embeddings = self.get_embeddings(model, device, full_dataset)

        # Calc distance matrix
        num_images = self.embeddings.shape[0]
        dist_mat = self.calc_distance_matrix(num_images)

        # We need to get k centers start with greedy solution
        upper_bound = gb.UB
        lower_bound = upper_bound / 2.0
        max_dist = upper_bound

        _x, _y = np.where(dist_mat <= max_dist)
        _distances = dist_mat[_x, _y]
        subset = [i for i in range(1)]
        model = solve_fac_loc(_x, _y, subset, num_images, budget)
        # model.setParam( 'OutputFlag', False )
        x, y, z = model.__data
        delta = 1e-7

        while upper_bound - lower_bound > delta:
            print("State", upper_bound, lower_bound)
            current_radius = (upper_bound + lower_bound) / 2.0

            violate = np.where(_distances > current_radius)  # Point distances which violate the radius

            new_max_d = np.min(_distances[_distances >= current_radius])
            new_min_d = np.max(_distances[_distances <= current_radius])

            print("If it succeeds, new max is:", new_max_d, new_min_d)

            for v in violate[0]:
                x[_x[v], _y[v]].UB = 0  # The upper bound for points, which violate the radius are set to zero

            model.update()
            r = model.optimize()

            if model.getAttr(gb.GRB.Attr.Status) == gb.GRB.INFEASIBLE:
                failed = True
                print("Infeasible")
            elif sum([z[i].X for i in range(len(z))]) > 0:
                failed = True
                print("Failed")
            else:
                failed = False

            if failed:
                lower_bound = max(current_radius, new_max_d)
                # failed so put edges back
                for v in violate[0]:
                    x[_x[v], _y[v]].UB = 1
            else:
                print("solution found", current_radius, lower_bound, upper_bound)
                upper_bound = min(current_radius, new_min_d)
                model.write("s_{}_solution_{}.sol".format(budget, current_radius))

        idxs_labeled = np.arange(start=pool_len, stop=pool_len + len(train_dataset))

        # Perform kcenter greedy
        self.update_distances(idxs_labeled, idxs_labeled, only_new=False, reset_dist=True)
        sel_ind = []
        for _ in range(n):
            ind = np.argmax(self.min_distances)  # Get sample with highest distance
            assert ind not in idxs_labeled, "Core-set picked index already labeled"
            self.update_distances([ind], idxs_labeled, only_new=True, reset_dist=False)
            sel_ind.append(ind)

        assert len(set(sel_ind)) == len(sel_ind), "Core-set picked duplicate samples"

        remaining_ind = list(set(np.arange(pool_len)) - set(sel_ind))

        return sel_ind, remaining_ind