示例#1
0
def load_sentence(sent_dict):

    tokens = load_tokens(sent_dict["tokens"])
    pub_time = utils.strip_to_date(arrow.get(sent_dict["pub_time"]))
    time = Sentence.get_time(tokens)
    time_level = None
    if time:
        time = arrow.get(time)
        time_format = Sentence.get_time_format(tokens)
        time_level = None
        if "d" in time_format:
            time = datetime.datetime(time.year, time.month, time.day)
            time_level = "d"
        elif ("m" in time_format) or ("y" in time_format):
            if "m" in time_format:
                start, end = time.span("month")
                time_level = "m"
            else:
                start, end = time.span("year")
                time_level = "y"
            start = datetime.datetime(start.year, start.month, start.day)
            end = datetime.datetime(end.year, end.month, end.day)
            time = (start, end)

    return Sentence(sent_dict["raw"], tokens, pub_time, time, time_level)
示例#2
0
def load_sentence(sent_dict):

    tokens = load_tokens(sent_dict['tokens'])
    pub_time = utils.strip_to_date(arrow.get(sent_dict['pub_time']))
    time = Sentence.get_time(tokens)
    time_level = None
    if time:
        time = arrow.get(time)
        time_format = Sentence.get_time_format(tokens)
        time_level = None
        if 'd' in time_format:
            time = datetime.datetime(time.year, time.month, time.day)
            time_level = 'd'
        elif ('m' in time_format) or ('y' in time_format):
            if 'm' in time_format:
                start, end = time.span('month')
                time_level = 'm'
            else:
                start, end = time.span('year')
                time_level = 'y'
            start = datetime.datetime(start.year, start.month, start.day)
            end = datetime.datetime(end.year, end.month, end.day)
            time = (start, end)

    return Sentence(sent_dict['raw'], tokens, pub_time, time, time_level)
示例#3
0
    def temporal_graph(self, X, times):
        times = [utils.strip_to_date(t) for t in times]
        time_to_ixs = collections.defaultdict(list)
        for i in range(len(times)):
            time_to_ixs[times[i]].append(i)

        n_items = X.shape[0]
        S = sparse.lil_matrix((n_items, n_items))
        start, end = min(times), max(times)
        total_days = (end - start).days + 1

        for n in range(total_days + 1):
            t = start + datetime.timedelta(days=n)
            window_size = min(self.max_days + 1, total_days + 1 - n)
            window = [
                t + datetime.timedelta(days=k) for k in range(window_size)
            ]

            if n == 0 or len(window) == 1:
                indices = [i for t in window for i in time_to_ixs[t]]
                if len(indices) == 0:
                    continue

                if sparse.issparse(X):
                    X_n = sparse.vstack([X[i] for i in indices])
                else:
                    X_n = np.vstack([X[i] for i in indices])

                S_n = cosine_similarity(X_n)
                n_items = len(indices)
                for i_x, i_n in zip(indices, range(n_items)):
                    for j_x, j_n in zip(indices, range(i_n + 1, n_items)):
                        S[i_x, j_x] = S_n[i_n, j_n]
            else:
                # prev is actually prev + new
                prev_indices = [i for t in window for i in time_to_ixs[t]]
                new_indices = time_to_ixs[window[-1]]

                if len(new_indices) == 0:
                    continue

                if sparse.issparse(X):
                    X_prev = sparse.vstack([X[i] for i in prev_indices])
                    X_new = sparse.vstack([X[i] for i in new_indices])
                else:
                    X_prev = np.vstack([X[i] for i in prev_indices])
                    X_new = np.vstack([X[i] for i in new_indices])

                S_n = cosine_similarity(X_prev, X_new)
                n_prev, n_new = len(prev_indices), len(new_indices)
                for i_x, i_n in zip(prev_indices, range(n_prev)):
                    for j_x, j_n in zip(new_indices, range(n_new)):
                        S[i_x, j_x] = S_n[i_n, j_n]

        return sparse.csr_matrix(S)
示例#4
0
def get_input_time_span(ref_dates, extension):
    ref_start = utils.strip_to_date(min(ref_dates))
    ref_end = utils.strip_to_date(max(ref_dates))
    input_start = ref_start - datetime.timedelta(days=extension)
    input_end = ref_end + datetime.timedelta(days=extension)
    return input_start, input_end