Пример #1
0
    def predict(self,passage :str,question :str):
        example = input_to_squad_example(passage,question)
        features = squad_examples_to_features(example,self.tokenizer,self.max_seq_length,self.doc_stride,self.max_query_length)
        all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
        all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long)
        dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids,
                                all_example_index)
        eval_sampler = SequentialSampler(dataset)
        eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=1)
        all_results = []
        for batch in eval_dataloader:
            batch = tuple(t.to(self.device) for t in batch)
            with torch.no_grad():
                inputs = {'input_ids':      batch[0],
                        'attention_mask': batch[1],
                        'token_type_ids': batch[2]  
                        }
                example_indices = batch[3]
                outputs = self.model(**inputs)

            for i, example_index in enumerate(example_indices):
                eval_feature = features[example_index.item()]
                unique_id = int(eval_feature.unique_id)
                result = RawResult(unique_id    = unique_id,
                                    start_logits = to_list(outputs[0][i]),
                                    end_logits   = to_list(outputs[1][i]))
                all_results.append(result)
        answer = get_answer(example,features,all_results,self.n_best_size,self.max_answer_length,self.do_lower_case)
        return answer
Пример #2
0
def parse_system_file_ext(file):
    data_list = []

    with open(file, 'r') as f:
        header = f.readline()
        header_list = utils.to_list(header)
        while True:
            line = f.readline()
            if not line:
                # print("Reach {} EOF".format(f))
                break
            info = utils.to_list(line)
            # info = line.split(',')

            # error check
            if len(header_list) != len(info):
                print(line, len(header_list), len(info))
                for i in range(0, min(len(info), len(header_list))):
                    print(file, header_list[i], info[i])
                    exit(0)

            info_dict = {}
            for i in range(0, len(header_list)):
                info_dict[header_list[i]] = info[i]
            data_list.append(info_dict)
    res = {'header': header_list, 'data': data_list}
    # print(file, len(data_list))
    return res
    def __init__(self, experiment, target_disease):
        super(PhyloDAP, self).__init__(experiment=experiment)
        self._disease_name = target_disease
        self.type_data = settings.TYPE_DATA
        self.total_nb_samples = settings.NB_SAMPLES
        self.nb_filters = to_list(settings.nb_convolutional_filters)
        self.phylo_neighbours = to_list(settings.nb_phylo_neighbours)

        self._do_serialisation = False  # Serialization does not work with our Keras layer
Пример #4
0
 def __init__(self, experiment, target_disease):
     super(PhyloDAP, self).__init__(experiment=experiment)
     self._disease_name = target_disease
     self.type_data = settings.TYPE_DATA
     self.total_nb_samples = settings.NB_SAMPLES
     self.nb_filters = to_list(settings.nb_convolutional_filters)
     self.phylo_neighbours = to_list(settings.nb_phylo_neighbours)
     self._path_weights = os.path.join(os.path.abspath(os.path.curdir), 'weights', 
                                       '{}_training_weights.hdf5'.format(self._disease_name.lower()))
     
     self._do_serialisation = False  # Serialization does not work with our Keras layer
Пример #5
0
 def __init__(self, scenarios, b, c, h, f=None, m=None, durability=None):
     self.root = defaultdict(ScenarioTree.Node)
     self.model = Model()
     self.variables = {}
     self.obj_sum = []
     self.t = len(scenarios[0][0])
     self.c = utils.to_list(c, self.t)
     self.b = utils.to_list(b, self.t)
     self.h = utils.to_list(h, self.t)
     self.f = None if f is None else utils.to_list(f, self.t)
     self.M = self.find_m(scenarios, durability) if m is None else m
     self.durability = durability
     self.start_build_tree(scenarios)
     self.start_build_model()
Пример #6
0
def build_scenario_structs(scenarios, b, c, h, f, durability):
    b = utils.to_list(b, len(scenarios[0][0]))
    c = utils.to_list(c, len(scenarios[0][0]))
    h = utils.to_list(h, len(scenarios[0][0]))
    f = None if f is None else utils.to_list(f, len(scenarios[0][0]))
    scenario_structs = []
    for scenario, prob in scenarios:
        scenario_structs.append(
            ScenarioStruct(scenario=scenario,
                           prob=prob,
                           b=b,
                           c=c,
                           h=h,
                           f=f,
                           durability=durability))
    return scenario_structs
Пример #7
0
 def single_rel_sampler(self, batch_id, relation_id, link_info, pos_neg=1, remove_false_neg=True):
     '''
     it is okay not to remove the "false negative", for it is not really a large portion
     '''
     batch_size = self.batch_sizes[pos_neg][relation_id]
     if pos_neg: 
         # pos_neg is 1, positibe sampling
         positive_samples = self.link_info[relation_id][:2, batch_id*batch_size : (batch_id+1)*batch_size]
         samples_from = positive_samples[0, :]
         samples_to   = positive_samples[1, :]
     else:
         # pos_neg is 0, negative sampling
         known_set = self.known_positive_links[relation_id]
         samples_from = np.random.choice(self.n_entities, batch_size)
         samples_to = np.random.choice(self.n_entities, batch_size)
         if remove_false_neg:
             # get the pairs as set and remove "false negatives" (those that are positive)
             raw_negative_pair_set = pair_set(list(samples_from), list(samples_to))
             filtered_negative_pairs = list(raw_negative_pair_set - known_set - self.self_loop_links)
             # extract the from and to infomation
             samples_from = from_list(filtered_negative_pairs)
             samples_to = to_list(filtered_negative_pairs)
     i_label = 0 if self.separate_relations else relation_id
     samples_relation = [i_label] * len(samples_from)
     return samples_from, samples_relation, samples_to
Пример #8
0
def create_tokenizer():
    """
    根据训练数据集中图像名,和其它对应的标题,生成一个tokenizer
    :return: 生成的tokenizer
    """
    train_image_names = utils.load_image_nmaes('Flickr_8k.trainTmages.txt')
    train_descriptions = utils.load_clean_captions('descriptions.txt',
                                                   train_image_names)
    lines = utils.to_list(train_descriptions)
Пример #9
0
def evaluate(real_scenario, xts, goods_info):
    t = len(real_scenario)
    b = utils.to_list(goods_info['b'], t)
    c = utils.to_list(goods_info['c'], t)
    h = utils.to_list(goods_info['h'], t)
    f = utils.to_list(goods_info['f'],
                      t) if goods_info['f'] is not None else None
    durability = goods_info['durability']
    yt = 0
    fixed_cost = 0
    ordering_cost = np.sum(np.array(c) * (np.array(xts)))
    holding_cost = 0
    shortage_cost = 0
    leftovers = np.zeros(durability)
    if f is not None:
        fixed_cost = np.sum(np.array(f) * (np.array(xts) > 0))
    for i in range(0, t):
        xt = xts[i]
        dt = real_scenario[i]
        holding_cost += h[i] * max(0, (xt + yt - dt))
        shortage_cost += b[i] * max(0, (dt - xt - yt))
        if durability is not None and i >= durability:
            yt = max(xt + yt - dt - leftovers[-1], 0)
        else:
            yt = max(xt + yt - dt, 0)
        if durability is not None:
            lt = dt
            leftovers = np.roll(leftovers, 1)
            for i in range(durability - 1, 0, -1):
                if lt == 0:
                    break
                dec = min(lt, leftovers[i])
                leftovers[i] -= dec
                lt -= dec
            leftovers[0] = 0
            leftovers[0] = min(xt, max(yt - np.sum(leftovers), 0))
            yt = max(0, np.sum(leftovers))

    cost = fixed_cost + ordering_cost + holding_cost + shortage_cost
    return cost, fixed_cost, ordering_cost, holding_cost, shortage_cost
Пример #10
0
    def __init__(self, state, num_steps, **kwargs):
        state_list = utils.to_list(state)
        #print state_list
        self.num_steps = num_steps
        self.range_step = range(num_steps)
        self.num_states = len(state_list)
        self.range_state = range(self.num_states)

        self.current_state = [deque(maxlen=num_steps) for _ in self.range_state]
        self.old_state = [deque(maxlen=num_steps) for _ in self.range_state]
        for _ in self.range_step:
            for qidx in self.range_state:
                self.current_state[qidx].append(state_list[qidx])
                self.old_state[qidx].append(state_list[qidx])
        #self.current_state.append(state_list)
        #print self.current_state
        #self.num_states = len(current_state[-1]) if utils.multi_state(state) else 1
        self.num_states = len(self.current_state[-1])
        self.state_dims = [len(s.shape) for s in state_list]
Пример #11
0
 def __init__(self, state, **kwargs):
     self.current_state = utils.to_list(state)
     self.old_state = utils.to_list(state)
Пример #12
0
def get_countries_by_ids(countries: Countries, country_ids: str) -> pd.DataFrame:
    ids = utils.to_list(country_ids)
    return countries.last_day.iloc[ids]  # last_df[last_day.index.isin(df_def_countries['Country'])]
Пример #13
0
def ward(ward_number):
	headers = mongo.db.streetlights.aggregate(utils.ward_pipeline(ward_number))
	streetlights = mongo.db.streetlights.find({"ward": ward_number})

	return render_template('index.html', tittle = "Ward", headers = headers["result"][0], streetlights = utils.to_list(streetlights))
Пример #14
0
def dump_json(path_feature_map, filepath):
    path_feature_map = to_list(path_feature_map)
    json.dump(path_feature_map, open(filepath, 'w'))
Пример #15
0
def evaluate(args, model, tokenizer, prefix=""):
    dataset, examples, features = load_and_cache_examples(args,
                                                          tokenizer,
                                                          evaluate=True,
                                                          output_examples=True)

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)

    eval_sampler = SequentialSampler(dataset)
    eval_dataloader = DataLoader(dataset,
                                 sampler=eval_sampler,
                                 batch_size=args.eval_batch_size)

    # multi-gpu evaluate
    if args.n_gpu > 1 and not isinstance(model, torch.nn.DataParallel):
        model = torch.nn.DataParallel(model)

    # Eval!
    logger.info("***** Running evaluation {} *****".format(prefix))
    logger.info("  Num examples = %d", len(dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)

    all_results = []
    start_time = timeit.default_timer()

    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)

        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "token_type_ids": batch[2],
            }

            example_indices = batch[3]

            outputs = model(**inputs)

        for i, example_index in enumerate(example_indices):
            eval_feature = features[example_index.item()]
            unique_id = int(eval_feature.unique_id)

            # [A] bug fix
            output = [
                to_list(output[i]) for output in (outputs[0], outputs[1])
            ]

            start_logits, end_logits = output
            result = SquadResult(unique_id, start_logits, end_logits)

            all_results.append(result)

    evalTime = timeit.default_timer() - start_time
    logger.info("  Evaluation done in total %f secs (%f sec per example)",
                evalTime, evalTime / len(dataset))

    # Compute predictions
    output_prediction_file = os.path.join(args.output_dir,
                                          "predictions_{}.json".format(prefix))
    output_nbest_file = os.path.join(
        args.output_dir, "nbest_predictions_{}.json".format(prefix))

    if args.version_2_with_negative:
        output_null_log_odds_file = os.path.join(
            args.output_dir, "null_odds_{}.json".format(prefix))
    else:
        output_null_log_odds_file = None

    predictions = compute_predictions_logits(
        examples,
        features,
        all_results,
        args.n_best_size,
        args.max_answer_length,
        args.do_lower_case,
        output_prediction_file,
        output_nbest_file,
        output_null_log_odds_file,
        args.verbose_logging,
        args.version_2_with_negative,
        args.null_score_diff_threshold,
        tokenizer,
    )

    # Compute the F1 and exact scores.
    results = squad_evaluate(examples, predictions)
    return results
Пример #16
0
 def update(self, state):
     state_list = utils.to_list(state)
     for qidx, s in enumerate(state_list):
         self.update_deque(qidx, state_list)
Пример #17
0
def district(district_name):
	headers = mongo.db.streetlights.aggregate(utils.district_pipeline(district_name))
	streetlights = mongo.db.streetlights.find({"district": district_name})

	print headers
	return render_template('index.html', title = "District" , headers = headers["result"][0], streetlights = utils.to_list(streetlights))
Пример #18
0
    def predict(self, passages: list, question: str):
        examples = input_to_squad_example(passages, question)
        features = squad_examples_to_features(examples,
                                              self.tokenizer,
                                              self.max_seq_length,
                                              self.doc_stride,
                                              self.max_query_length,
                                              vsl=self.vsl)
        if not self.use_tf and not self.use_onnx_runtime:
            torch_input_ids = torch.tensor([f.input_ids for f in features],
                                           dtype=torch.long).to(self.device)
            torch_input_mask = torch.tensor([f.input_mask for f in features],
                                            dtype=torch.long).to(self.device)
            torch_segment_ids = torch.tensor([f.segment_ids for f in features],
                                             dtype=torch.long).to(self.device)
            torch_example_index = torch.arange(torch_input_ids.size(0),
                                               dtype=torch.long).to(
                                                   self.device)
        all_results = []
        if self.use_tf:
            if self.tf_onnx:
                # this is the TF graph converted from ONNX
                inputs = {
                    'input_ids:0': [f.input_ids for f in features],
                    'attention_mask:0': [f.input_mask for f in features],
                    'token_type_ids:0': [f.segment_ids for f in features]
                }
                start_logits, end_logits = self.model.run(
                    ['Squeeze_49:0', 'Squeeze_50:0'], feed_dict=inputs)
            else:
                # this is the original TF graph
                inputs = {
                    'input_ids:0': [f.input_ids for f in features],
                    'input_mask:0': [f.input_mask for f in features],
                    'segment_ids:0': [f.segment_ids for f in features]
                }
                start_logits, end_logits = self.model.run(
                    ['start_logits:0', 'end_logits:0'], feed_dict=inputs)
            example_indices = np.arange(len(features))
            outputs = [start_logits, end_logits]
        elif self.use_onnx_runtime:
            inputs = {
                self.model.get_inputs()[0].name:
                np.array([f.input_ids for f in features]),
                self.model.get_inputs()[1].name:
                np.array([f.input_mask for f in features]),
                self.model.get_inputs()[2].name:
                np.array([f.segment_ids for f in features])
            }
            output_names = [
                self.model.get_outputs()[0].name,
                self.model.get_outputs()[1].name
            ]
            example_indices = np.arange(len(features))
            outputs = self.model.run(output_names, inputs)
        else:
            example_indices = torch_example_index
            if self.use_jit:
                outputs = self.model(torch_input_ids, torch_input_mask,
                                     torch_segment_ids)
            else:
                with torch.no_grad():
                    inputs = {
                        'input_ids': torch_input_ids,
                        'attention_mask': torch_input_mask,
                        'token_type_ids': torch_segment_ids
                    }
                    outputs = self.model(**inputs)

        for i, example_index in enumerate(example_indices):
            eval_feature = features[example_index.item()]
            unique_id = int(eval_feature.unique_id)
            result = RawResult(unique_id=unique_id,
                               start_logits=to_list(outputs[0][i]),
                               end_logits=to_list(outputs[1][i]))
            all_results.append(result)
        answers = get_answer(examples, features, all_results, self.n_best_size,
                             self.max_answer_length, self.do_lower_case)
        return answers
Пример #19
0
from Khorus.Choori.decorators import privileges, retrieve
from Khorus.crud.order import bp, orders
from Khorus.crud.order.design.schema import schema, obj, status as statuses
from sanic.response import json
from bson import ObjectId
from datetime import datetime, timedelta
from temp import orders as orders_cache, users as users_cache
from utils import to_list, dot_notation
from copy import deepcopy

retrieval = to_list(schema)
minus = [(feature[1][0][1:], feature[1][1], feature[0])
         for feature in retrieval if '-' in feature[1][0]]
retrieval = [(feature[1][0][1:], feature[1][1], feature[0])
             for feature in retrieval if '$' in feature[1][0]]


@bp.route('/', methods=['POST'])
@bp.route('/<_id>', methods=['POST'])
@privileges('dev', 'operator', 'applicator')
@retrieve(*[
    '<{key}:{type}:$form:k>'.format(key=feature[0], type=feature[1])
    for feature in retrieval
])
async def _put(request, payload, _id=None, **kwargs):
    options = []
    d = deepcopy(obj)
    for pay, _type, address in minus:
        _d, key, flag = dot_notation(d, address)
        _d[key] = payload[pay]
Пример #20
0
 def update(self, state):
     self.old_state = self.current_state
     self.current_state = utils.to_list(state)