Python load_file示例，helper.load_file Python示例

示例#1

0

显示文件

文件： CleanSet.py 项目： tcoatale/Caterpillar-tube-Pricing

def cleanTrainSet():
    from helper import load_file
    data = load_file("train_set.csv")
    data = parseDatasetDate(data)
    data = OHEDataset(data)
    
    return data

示例#2

0

显示文件

文件： CleanBill.py 项目： tcoatale/Caterpillar-tube-Pricing

def cleanBill2():
    from helper import load_file
    from pandas import merge
    from CleanComponents import cleanComponents
    from sklearn.decomposition import PCA
    from pandas import DataFrame
    
    billOfComponents = load_file("bill_of_materials.csv")
    billOfComponents['tube_assembly_id'] = billOfComponents.index
    
    components = cleanComponents()
    components['component_id'] = components.index
    names = components.columns.values
    
    for i in range(1, 9):
        cols = names + "_" + str(i)
        components.columns = cols
        billOfComponents = merge(billOfComponents, components, how='left', on="component_id" + "_" + str(i))
        billOfComponents = billOfComponents.drop("component_id" + "_" + str(i), 1)
    
    billOfComponents.index = billOfComponents['tube_assembly_id']
    billOfComponents = billOfComponents.drop("tube_assembly_id", 1)
    billOfComponents = billOfComponents.fillna(0)
    
    pca = PCA(n_components=20)
    pca = pca.fit_transform(billOfComponents)
    
    billOfComponents = DataFrame(pca, billOfComponents.index)
    cols = ["pca_" + str(i) for i in billOfComponents.columns.values]

    billOfComponents.columns = cols
    
    return billOfComponents

示例#3

0

显示文件

文件： CleanComponents.py 项目： tcoatale/Caterpillar-tube-Pricing

def loadComponentBase():
    from helper import load_file
    from pandas import merge
    components = load_file("components.csv")
    components['component_id'] = components.index
    components.columns = ['component_name', 'component_type_id', 'component_id']
    
    typeComponents = load_file("type_component.csv")
    typeComponents['component_type_id'] = typeComponents.index
    typeComponents.columns = ['component_type_name', 'component_type_id']

    components = merge(components, typeComponents, how='left', left_on="component_type_id", right_on="component_type_id")    
    components.index = components.component_id
    
    components = components.fillna('Unnamed')
    
    return components

示例#4

0

显示文件

文件： CleanSet.py 项目： tcoatale/Caterpillar-tube-Pricing

def cleanTestSet():
    from helper import load_file
    data = load_file("test_set.csv")
    data['id'] = data.index
    data.index = data['tube_assembly_id']
    
    data = data.drop('tube_assembly_id', 1)
    
    data = parseDatasetDate(data)
    data = OHEDataset(data)
    
    return data

示例#5

0

显示文件

文件： cleanTubes.py 项目： tcoatale/Caterpillar-tube-Pricing

def loadTubes():
    from helper import load_file
    from pandas import merge

    tube = load_file("tube.csv")
    tube['tube_assembly_id'] = tube.index
    
    tube_end = load_file("tube_end_form.csv")    
    tube_end['end_id'] = tube_end.index

    tube_end.columns = ['forming_a', 'end_id']
    tube = merge(tube, tube_end, how='left', left_on="end_a", right_on="end_id")    

    tube_end.columns = ['forming_x', 'end_id']
    tube = merge(tube, tube_end, how='left', left_on="end_x", right_on="end_id")    
    
    tube.index = tube.tube_assembly_id
    
    tube = tube.drop("tube_assembly_id", 1)
    tube = tube.drop("end_id_y", 1)
    tube = tube.drop("end_id_x", 1)
    return tube

示例#6

0

显示文件

文件： CleanComponents.py 项目： tcoatale/Caterpillar-tube-Pricing

def loadComponentSpecifics(components):
    from helper import load_file

    files = ["comp_adaptor.csv", "comp_boss.csv", "comp_elbow.csv", "comp_float.csv", "comp_hfl.csv", "comp_nut.csv", "comp_other.csv", "comp_sleeve.csv", "comp_straight.csv", "comp_tee.csv", "comp_threaded.csv"]
    
    for file in files:
        data = load_file(file)
        data['component_id'] = data.index
        data['from_file'] = file
        
        if 'component_type_id' in data.columns:
            data = data.drop('component_type_id', 1)

        data = data.fillna(0)
        components = specificMerge(data, components, "component_id")    
    
    components.index = components.component_id
    components = components.drop("component_id", 1)
    
    
    return components

示例#7

0

显示文件

文件： CleanBill.py 项目： tcoatale/Caterpillar-tube-Pricing

def processBill():
    from helper import load_file
    from pandas import concat

    billOfComponents = load_file("bill_of_materials.csv")
    billOfComponents['tube_assembly_id'] = billOfComponents.index
    
    frames = [billOfComponents[['tube_assembly_id', 'quantity_' + str(i+1), 'component_id_' + str(i+1)]] for i in range(8)]
    for df in frames:
        df.columns = ['tube_assembly_id', 'quantity', 'component_id']    
    
    billOfComponents = concat(frames)
    billOfComponents.quantity = billOfComponents.quantity.fillna(0)
    billOfComponents.component_id = billOfComponents.component_id.fillna("None")
    
    billOfComponents['component_number'] = billOfComponents.quantity != 0
    billOfComponents['component_number'] = billOfComponents.quantity != 0
    billOfComponents.component_number = billOfComponents.component_number.astype(int)
    
    billOfComponents.index = billOfComponents.tube_assembly_id
    billOfComponents = billOfComponents.drop('tube_assembly_id', 1)
    
    return billOfComponents

示例#8

0

显示文件

    max_source_len = max(source_lens)
    max_target_len = max(target_lens)

    for p in ps:
        source_seq = [w2i_source[w] for w in doc_source[p].split()] + [w2i_source["<PAD>"]] * (
                max_source_len - len(doc_source[p].split()))
        target_seq = [w2i_target[w] for w in doc_target[p].split()] + [w2i_target["<PAD>"]] * (
                max_target_len - 1 - len(doc_target[p].split())) + [w2i_target["<EOS>"]]
        source_batch.append(source_seq)
        target_batch.append(target_seq)
    return source_batch, source_lens, target_batch, target_lens


if __name__ == '__main__':
    print 'loading data ...'
    doc_source = helper.load_file('./data/small_vocab_en.txt')
    doc_target = helper.load_file('./data/small_vocab_fr.txt')
    s_token2idx, s_idx2token = helper.load_vocab('./data/small_vocab_en.txt', helper.SOURCE_CODES)
    t_token2idx, t_idx2token = helper.load_vocab('./data/small_vocab_fr.txt', helper.TARGET_CODES)
    print 'building model...'
    config = config()
    config.source_vocab_size = len(s_token2idx)
    config.target_vocab_size = len(t_token2idx)
    model = Seq2seq(config, t_token2idx, useTeacherForcing=True)
    batches = 10000
    print_every = 100
    print 'run model...'
    with tf.Session() as sess:
        saver = tf.train.Saver()
        sess.run(tf.global_variables_initializer())
        losses = []

示例#9

0

显示文件

文件： CleanSpecs.py 项目： tcoatale/Caterpillar-tube-Pricing

def loadSpecs():
    from helper import load_file

    specs = load_file("specs.csv")
    return specs

示例#10

0

显示文件

文件： predict.py 项目： sagorbrur/lstm-sentence-generator

helper.step_window = args.win

### ---------- Import Relevand Libraries ----------

import numpy as np
from keras.optimizers import RMSprop
import sys
import random
import re
from helper import load_file, create_model, sequence_length, add_temperature

### ---------- Load Text File and Build Vocabulary ----------

# note that the data length for training (train.py) and predictions (predict.py) must be the same

all_words, unique_words = load_file(args.data)
total_num_words = len(all_words)
len_vocab = len(unique_words)

print('\n----------------------------')
print("> Total number of words:\t" + str(total_num_words))
print("> Length of vocabulary:\t\t" + str(len_vocab))
print('----------------------------')

word_to_int = dict((c, i) for i, c in enumerate(unique_words))
int_to_word = dict((i, c) for i, c in enumerate(unique_words))

### ---------- Define Model ----------

num_layers = 1
drop_out_rate = 0.2

示例#11

0

显示文件

文件： gitcli.py 项目： mingway1991/gitcli

def merge(path, branch):
    git_config(path)
    click.secho('合并分支')
    click.secho('工作目录: %s' % (path))
    if branch is None:
        click.secho('缺少参数branch', fg='red')
        sys.exit(1)
    click.secho('分支名：%s' % (branch))
    # 读取配置文件
    yml_path = os.path.join(path, '.gitcli.yml')
    merge_ignores = []
    conflict_resolve_by_self_files = []
    conflict_resolve_by_others_files = []
    content = load_file(yml_path)
    if content is not None:
        temp = yaml.load(content, Loader=yaml.FullLoader)
        if temp.has_key('merge_ignores'):
            merge_ignores.extend(temp['merge_ignores'])
        if temp.has_key('conflict_resolve_by_self_files'):
            conflict_resolve_by_self_files.extend(
                temp['conflict_resolve_by_self_files'])
        if temp.has_key('conflict_resolve_by_others_files'):
            conflict_resolve_by_others_files.extend(
                temp['conflict_resolve_by_others_files'])
    click.secho('.gitcli.yml中配置的合并忽略文件：%s' % (merge_ignores))
    click.secho('.gitcli.yml中配置的冲突使用自己解决的文件：%s' %
                (conflict_resolve_by_self_files))
    click.secho('.gitcli.yml中配置的冲突使用对方解决的文件：%s' %
                (conflict_resolve_by_others_files))
    errCode, stdMsg, errMsg = run_command(
        'git merge %s --no-commit --no-ff' % (branch), path)
    if errCode == 0 and stdMsg == 'Already up to date.\n':
        click.secho('不需要合并', fg='green')
        return
    for merge_ignore in merge_ignores:
        errCode, stdMsg, errMsg = run_command(
            'git checkout HEAD -- %s && git reset HEAD %s' %
            (merge_ignore, merge_ignore), path)
        if errCode == 0:
            click.secho('合并忽略文件：%s' % (merge_ignore))
        else:
            click.secho('合并忽略文件：%s %s' % (merge_ignore, errMsg))
    errCode, stdMsg, errMsg = run_command('git clean -df', path)
    if errCode == 0:
        click.secho('清理不在版本库文件成功', fg='green')
    else:
        click.secho('清理不在版本库文件失败', fg='red')
    # 列出冲突文件
    errCode, stdMsg, errMsg = run_command(
        'GIT_PAGER='
        ' git diff --name-only --diff-filter=U', path)
    conflict_files = stdMsg.split('\n')
    while '' in conflict_files:
        conflict_files.remove('')
    cannot_fix_conflict_files = []
    is_resolve_conflict = False
    if len(conflict_files) != 0:
        # 处理冲突
        click.secho('冲突文件列表:\n%s' % (stdMsg))
        for conflict_file in conflict_files:
            conflict_file_path = os.path.join(path, conflict_file)
            if conflict_file in conflict_resolve_by_self_files:
                # 使用自己解决
                newcontent = ''
                file_obj = open(conflict_file_path)
                all_lines = file_obj.readlines()
                in_conflict_head_block = False
                in_conflict_others_block = False
                for line in all_lines:
                    if line.startswith('<<<<<<<'):
                        in_conflict_head_block = True
                        continue
                    elif line.startswith('======='):
                        in_conflict_head_block = False
                        in_conflict_others_block = True
                        continue
                    elif line.startswith('>>>>>>>'):
                        in_conflict_others_block = False
                        continue
                    if in_conflict_head_block and not in_conflict_others_block:
                        newcontent += line
                    elif not in_conflict_head_block and in_conflict_others_block:
                        pass
                    else:
                        newcontent += line
                write_file(conflict_file_path, newcontent)
                is_resolve_conflict = True
                click.secho('使用自己解决冲突成功:%s' % (conflict_file), fg='green')
            elif conflict_file in conflict_resolve_by_others_files:
                # 使用对方解决
                newcontent = ''
                file_obj = open(conflict_file_path)
                all_lines = file_obj.readlines()
                in_conflict_head_block = False
                in_conflict_others_block = False
                for line in all_lines:
                    if line.startswith('<<<<<<<'):
                        in_conflict_head_block = True
                        continue
                    elif line.startswith('======='):
                        in_conflict_head_block = False
                        in_conflict_others_block = True
                        continue
                    elif line.startswith('>>>>>>>'):
                        in_conflict_others_block = False
                        continue
                    if in_conflict_head_block and not in_conflict_others_block:
                        pass
                    elif not in_conflict_head_block and in_conflict_others_block:
                        newcontent += line
                    else:
                        newcontent += line
                write_file(conflict_file_path, newcontent)
                is_resolve_conflict = True
                click.secho('使用对方解决冲突成功:%s' % (conflict_file), fg='green')
            else:
                cannot_fix_conflict_files.append(conflict_file)
        if len(cannot_fix_conflict_files) > 0:
            click.secho('不能解决冲突文件列表:%s' % (cannot_fix_conflict_files),
                        fg='red')
            sys.exit(1)
    all_modify_files = []
    # 列出没有暂存的修改文件
    errCode, stdMsg, errMsg = run_command('GIT_PAGER='
                                          ' git diff --name-only', path)
    unstaged_modify_files = stdMsg.split('\n')
    while '' in unstaged_modify_files:
        unstaged_modify_files.remove('')
    for modify_file in unstaged_modify_files:
        errCode, stdMsg, errMsg = run_command('git add \'%s\'' % (modify_file),
                                              path)
        if errCode != 0:
            click.secho('添加文件到缓存区失败:%s' % (errMsg), fg='red')
            sys.exit(1)
    all_modify_files.extend(unstaged_modify_files)
    # 添加暂存区内的修改文件
    errCode, stdMsg, errMsg = run_command(
        'GIT_PAGER='
        ' git diff --cached --name-only', path)
    staged_modify_files = stdMsg.split('\n')
    while '' in staged_modify_files:
        staged_modify_files.remove('')
    all_modify_files.extend(staged_modify_files)
    # 判断是否没有修改文件（如果是解决过冲突后无文件修改不在此范围内）
    if len(all_modify_files) == 0 and not is_resolve_conflict:
        click.secho('没有文件修改', fg='green')
    # git commit
    errCode, stdMsg, errMsg = run_command(
        'git commit -m \'gitcli: merge from %s\'' % (branch), path)
    if errCode == 0:
        click.secho('commit成功', fg='green')
    else:
        click.secho('commit失败 %s' % (errMsg), fg='red')
    # git push
    errCode, stdMsg, errMsg = run_command('git push', path)
    if errCode == 0:
        click.secho('合并提交成功', fg='green')
    else:
        click.secho('合并提交失败:%s' % (errMsg), fg='red')
        sys.exit(1)

示例#12

0

显示文件

from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend
from helper import load_file

import base64
import pprint

def AES(msg, key):

        
    cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=default_backend())
    encryptor = cipher.encryptor()

    return encryptor.update(msg) + encryptor.finalize()

def AES_decryptor(ciphertext, key):
    cipher = Cipher(algorithms.AES(key), modes.ECB(), backend=default_backend())
    decryptor = cipher.decryptor()

    return decryptor.update(ciphertext) + decryptor.finalize()

if __name__ == "__main__":
    
    key = b"YELLOW SUBMARINE"
    ciphertext = "".join(load_file("ch7.txt"))
    ciphertext = base64.b64decode(ciphertext)
    msg = AES_decryptor(ciphertext, key).decode()

    print(msg.rstrip())

示例#13

0

显示文件

文件： challenge8.py 项目： rabialex/cryptopal-challenges

            repDict[substring] += 1
        else:
            repDict[substring] = 0

    return sum(repDict.values())


def ecb_detector(cipherText, block):

    rep_check = reps(cipherText, block)
    if rep_check > 0:
        return True
    else:
        return False


if __name__ == "__main__":

    block = 16
    cipherText = load_file("ch8.txt")

    for cp in cipherText:

        ecb = ecb_detector(cp, block)

        if ecb:
            ecb_encoded = cp
            break

    print("ECB ENCODED MESSAGE:\n", ecb_encoded, "\n")

示例#14

0

显示文件

文件： challenge4.py 项目： rabialex/cryptopal-challenges

"""
Challenge4: Detect single-character XOR
    Given a file of hex encoded string that has been xor'd 
    agains a single character. The goal is to  
    find and decrypt the message. 
"""
from challenge3 import breakSingleXOR
from helper import load_file

# decode the lines in the file
fileList = load_file("ch4.txt")
fileList = list(bytes.fromhex(hexString) for hexString in fileList)

# first for each line return the broken XOR


def detectXOR(cipherList):

    brokenList = []

    for i in range(len(fileList)):

        temp = breakSingleXOR(fileList[i])
        brokenList.append(temp)

    key = lambda x: x[-1]
    return sorted(brokenList, key=key, reverse=True)[0]


if __name__ == "__main__":