예제 #1
0
def parse_input_data(cache_file, url, cookie_file):
    """
    Calls the utility function to load either a cached data file or pull it from the AoC server, then splits it
    on newlines, strips the lines and extracts the password rules.

    :param cache_file:      The file in which the input data is stored
    :param url:             The URL to the AoC page
    :param cookie_file:     The path to the cookie file (only used if the cache file isn't found
    :return: The input data as a list of rows, where each row is (min_val, max_val, required_letter, password)
    """

    raw_data = utils.load_input_data(cache_file, url, cookie_file)

    def extract_info(line):
        m = re.match(r'^(\d+)-(\d+)\s(\w+):\s([^\s]+)\s*$', line)
        if m is None:
            return None

        min_val = int(m.group(1))
        max_val = int(m.group(2))
        req_letter = m.group(3)
        password = m.group(4)

        return (min_val, max_val, req_letter, password)

    data = [extract_info(line) for line in raw_data.split("\n")]
    return [d for d in data if d is not None]
예제 #2
0
def parse_input_data(cache_file, url, cookie_file):
    """
    Calls the utility function to load either a cached data file or pull it from the AoC server, then splits it
    on newlines, strips the lines and extracts the integer values.

    :param cache_file:      The file in which the input data is stored
    :param url:             The URL to the AoC page
    :param cookie_file:     The path to the cookie file (only used if the cache file isn't found
    :return: The input data as a list of ints
    """
    raw_data = utils.load_input_data(cache_file, url, cookie_file)
    return [int(r.strip()) for r in raw_data.split("\n") if r.strip() != ""]
예제 #3
0
def get_x_y(features, label, data=None, reduce_classes=True, path=None):
    config = load_global_config()
    if data is None:
        data = load_input_data(path)

    try:
        combined_label, combined_from = config['combined_label'], config[
            'combined_from']
    except KeyError:
        X, y = extract_features_and_label(
            data,
            label,
            features,
        )
    else:
        if label == combined_label:
            X, _ = extract_features_and_label(
                data,
                features=features,
            )
            class1 = data[combined_from[0]]
            class2 = data[combined_from[1]]
            y = lvddf_to_1_class(class1).combine(
                class2, lambda val1, val2: val1 or val2)
        else:
            X, y = extract_features_and_label(
                data,
                label,
                features,
            )

    try:
        reduce_class_label = config['reduce_class_label']
    except KeyError:
        pass
    else:
        if label == reduce_class_label and reduce_classes:
            y = lvddf_to_1_class(y)

    return X, y
예제 #4
0
    data = list(utils.parse_int_data(raw_data))
    freqs = get_difference_frequency(data)
    prod_1x3 = freqs[1] * freqs[3]
    print("Joltage differences: ", freqs)
    print(f"Product of 1 and 3 differences: {prod_1x3}")
    return prod_1x3


def task_2(raw_data):
    data = list(utils.parse_int_data(raw_data))
    tgt = max(data) + 3
    data += [tgt]
    data.sort()

    scores = defaultdict(int)
    scores[0] = 1
    for d in data:
        scores[d] = scores[d - 3] + scores[d - 2] + scores[d - 1]

    print(f"Total number of valid arrangements is {scores[data[-1]]}")


if __name__ == "__main__":
    input_data = utils.load_input_data("cached_input.txt", INPUT_URL, '../session_cookie.txt')
    print("Task 1:")
    task_1(input_data)

    print()
    print("Task 2:")
    task_2(input_data)
예제 #5
0
    :param pattern_len: 패턴 매트릭스의 크기
    :param text_len: 텍스트 매트릭스의 크기
    :param patterns: 패턴 2차원 배열
    :param text:: 텍스트 2차원 배열
    :return: 텍스트 내 매칭되는 모든 매트릭스의 postion을 튜플 형식으로 반환
    '''
    ret = []

    pattern_arr, text_arr = [list(v)
                             for v in patterns], [list(v) for v in text]
    for i in range(text_len - pattern_len + 1):
        for j in range(text_len - pattern_len + 1):
            mask = [e[j:j + pattern_len]
                    for e in text_arr[i:i + pattern_len]] == pattern_arr
            if mask:
                ret.append((i + pattern_len - 1, j + pattern_len - 1))

    return ret


if __name__ == "__main__":
    data = load_input_data(args.input)
    outputs = load_output_data(args.output)

    check_result = checker_naive(*data)
    print(check_result)

    if check_result == outputs:
        save_check_data(args.check, "yes")
    else:
        save_check_data(args.check, "no")
예제 #6
0
    )


def format_columns(data: DataFrame) -> DataFrame:
    data_new = data.copy()
    data_new.columns = [column.upper() for column in data.columns]
    return data_new


def select_features(data: DataFrame, features: Iterable[str]) -> DataFrame:
    return data[[feature.upper() for feature in features]]


def impute_missing(data: DataFrame) -> DataFrame:
    return DataFrame(SimpleImputer().fit_transform(data), columns=data.columns, index=data.index)


config = load_global_config()
data_all = load_input_data()
data_without_cardiac_events = data_all[data_all['HCAR2'] != 1]

datasets_all = get_datasets(data_all)
datasets_without_cardiac_events = get_datasets(data_without_cardiac_events)

datasets_to_report = [
    (
        'All—correlated >0.8 removed', 'all_without_correlation',
        datasets_all['clustering_correlated_removed']
    ),
]
예제 #7
0
from utils import one_hot_encoder, load_input_data, \
    build_classifier

# ----------------------
# Load input data
# ----------------------

# Class labels:
# antigen binder = 1, non-binder = 0

# Load non-binding sequences
ab_neg_files = [
    'mHER_H3_1_Ab.txt', 'mHER_H3_1_AgN.txt', 'mHER_H3_2_Ab.txt',
    'mHER_H3_2_AgN.txt', 'mHER_H3_3_Ab.txt', 'mHER_H3_3_AgN.txt'
]
mHER_H3_AgNeg = load_input_data(ab_neg_files, Ag_class=0)

# Load binding sequences
ab_pos_files = [
    'mHER_H3_1_2Ag647.txt', 'mHER_H3_1_2Ag488.txt', 'mHER_H3_2_2Ag647.txt',
    'mHER_H3_2_2Ag488.txt', 'mHER_H3_3_2Ag647.txt', 'mHER_H3_3_2Ag488.txt'
]
mHER_H3_AgPos = load_input_data(ab_pos_files, Ag_class=1)

# Fuse Ag positive and negative sequences
Ag_combined = pd.concat([mHER_H3_AgPos, mHER_H3_AgNeg])
Ag_combined = Ag_combined.drop_duplicates(subset='AASeq')
Ag_combined = Ag_combined.sample(frac=1).reset_index(drop=True)

# Save sequences and labels
X = Ag_combined.AASeq