Exemplo n.º 1
0
    def create_train_features(self, examples):
        features = {"context": [], "id": [], "question": [], "title": []}
        id = 0
        # print(examples)
        for row_number in range(len(examples["text"])):
            context = examples["text"][row_number]
            question = "offense"
            title = context.split(" ")[0]
            span = eval(examples["spans"][row_number])
            contiguous_spans = _contiguous_ranges(span)
            for lst in contiguous_spans:
                lst = list(lst)
                dict_to_write = {}

                dict_to_write["answer_start"] = [lst[0]]
                dict_to_write["text"] = [context[lst[0] : lst[-1] + 1]]
                # print(dict_to_write)
                if "answers" in features.keys():
                    features["answers"].append(dict_to_write)
                else:
                    features["answers"] = [
                        dict_to_write,
                    ]
                features["context"].append(context)
                features["id"].append(str(id))
                features["question"].append(question)
                features["title"].append(title)
                id += 1

        return features
Exemplo n.º 2
0
    def create_train_features(self, examples):
        features = {
            "context": [],
            "id": [],
            "question": [],
            "title": [],
            "start_positions": [],
            "end_positions": [],
        }
        id = 0
        # print(examples)
        for row_number in range(len(examples["text"])):
            context = examples["text"][row_number]
            question = "offense"
            title = context.split(" ")[0]
            start_positions = []
            end_positions = []
            span = eval(examples["spans"][row_number])
            contiguous_spans = _contiguous_ranges(span)
            for lst in contiguous_spans:
                lst = list(lst)
                dict_to_write = {}

                start_positions.append(lst[0])
                end_positions.append(lst[1])

            features["context"].append(context)
            features["id"].append(str(id))
            features["question"].append(question)
            features["title"].append(title)
            features["start_positions"].append(start_positions)
            features["end_positions"].append(end_positions)
            id += 1

        return features
Exemplo n.º 3
0
def clean_predicted_text(
        text, offsets):  ##Remove punctuations from outputs beginning or end
    new_offsets = []
    pred_ranges = _contiguous_ranges(offsets)
    for range_ in pred_ranges:
        start = range_[0]
        end = range_[-1]

        while start < end:
            if (text[start] in string.punctuation or is_whitespace(text[start])
                    or text[end] in string.punctuation
                    or is_whitespace(text[end])):
                if text[start] in string.punctuation or is_whitespace(
                        text[start]):
                    start += 1
                if text[end] in string.punctuation or is_whitespace(text[end]):
                    end -= 1
            else:
                break
        new_offsets += list(range(start, end + 1))
    return new_offsets
Exemplo n.º 4
0
def get_text_spans(text, offsets):
    text_spans = []
    ranges = _contiguous_ranges(offsets)
    for range_ in ranges:
        text_spans.append(text[range_[0]:range_[1] + 1])
    return text_spans
def get_text_from_preds(text, pred):
    text_spans = []
    ranges = _contiguous_ranges(pred)
    for _range in ranges:
        text_spans.append(text[_range[0] : _range[1] + 1])
    return text_spans
Exemplo n.º 6
0
        if not os.path.exists(ig_config.out_dir + "/" +
                              str(ig_config.sample_index)):
            os.makedirs(ig_config.out_dir + "/" + str(ig_config.sample_index))
        with open(ig_config.word_out_file, "wb") as f:
            pkl.dump(importances["word_importances"], f)
        with open(ig_config.token_out_file, "wb") as f:
            pkl.dump(importances["token_importances"], f)

        words, importances, word_wise_offsets = importances["word_importances"]

    else:
        with open(ig_config.word_out_file, "rb") as f:
            words, importances, word_wise_offsets = pkl.load(f)

    ground_spans = _contiguous_ranges(
        eval(
            pd.read_csv(ig_config.ground_truths_file)["spans"][
                ig_config.sample_index]))

    predicted_spans = _contiguous_ranges(
        eval(
            pd.read_csv(ig_config.predictions_file, header=None,
                        sep="\t")[1][ig_config.sample_index]))

    ground_text_spans = []
    predicted_text_spans = []
    if ignore_first_word:
        for span in ground_spans:
            ground_text_spans.append(text[1][span[0]:span[1] + 1])
        for span in predicted_spans:
            predicted_text_spans.append(text[1][span[0]:span[1] + 1])
    else: