def convert_text(input_path, output_path, vocab): with open(output_path, 'w') as output: for line in data_utils.read(input_path): words = [ filter_word(word, vocab) for word in line.replace("\n", " <eos>").split() ] output.write(" ".join(words) + "\n") output.close()
def create_vocab(path, vocab_size): counter = defaultdict(int) for line in data_utils.read(path): for word in line.replace("\n", " <eos>").split(): counter[word] += 1 count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))[:vocab_size] words = [w for (w, v) in count_pairs] print(len(counter), count_pairs[vocab_size - 1]) w2idx = dict(zip(words, range(len(words)))) idx2w = dict(zip(range(len(words)), words)) return w2idx, idx2w
def update_function(cfg, path_to_zip_file): #Updates the code of an existing Lambda function print('Updating your Lambda function') byte_stream = read(path_to_zip_file) aws_access_key_id = cfg.get('aws_access_key_id') aws_secret_access_key = cfg.get('aws_secret_access_key') account_id = get_account_id(aws_access_key_id, aws_secret_access_key) role = get_role_name(account_id, cfg.get('role', 'lambda_basic_execution')) client = get_client('lambda', aws_access_key_id, aws_secret_access_key, cfg.get('region')) client.update_function_code(FunctionName=cfg.get('function_name'), ZipFile=byte_stream, Publish=True) kwargs = { 'FunctionName': cfg.get('function_name'), 'Role': role, 'Handler': cfg.get('handler'), 'Description': cfg.get('description'), 'Timeout': cfg.get('timeout', 15), 'MemorySize': cfg.get('memory_size', 512), 'VpcConfig': { 'SubnetIds': cfg.get('subnet_ids', []), 'SecurityGroupIds': cfg.get('security_group_ids', []) } } if 'environment_variables' in cfg: kwargs.update( Environment={ 'Variables': { key: value for key, value in cfg.get('environment_variables').items() } }) client.update_function_configuration(**kwargs)
def pip_install_to_target(path, src, local_package=None): """For a given active virtualenv, gather all installed pip packages then copy (re-install) them to the path provided. :param str path: Path to copy installed pip packages to. :param bool requirements: If set, only the packages in the req.txt file are installed. The req.txt file needs to be in the same directory as the project which shall be deployed. Defaults to false and installs all pacakges found via pip freeze if not set. :param str local_package: The path to a local package with should be included in the deploy as well (and/or is not available on PyPi) """ packages = [] print path os.listdir(path) os.listdir(src) if os.path.exists(os.path.join(src,'requirements.txt')): print('Gathering requirement packages') data = read(os.path.join(src,'requirements.txt')) packages.extend(data.splitlines()) else: print('Please Specify A Requirements.txt file') if not packages: print('No dependency packages installed!') if local_package is not None: if not isinstance(local_package, (list, tuple)): local_package = [local_package] for l_package in local_package: packages.append(l_package) _install_packages(path, packages)
def create_corpus(input_path, output_path, vocab): """ Split data to create training, validation and test corpus """ nlines = 0 f_train = open(output_path + "/train.txt", 'w') f_valid = open(output_path + "/valid.txt", 'w') f_test = open(output_path + "/test.txt", 'w') train = [] for line in data_utils.read(input_path): if nlines % 10 == 0: f_valid.write(" ".join(convert_line(line, vocab)) + "\n") elif nlines % 10 == 1: f_test.write(" ".join(convert_line(line, vocab)) + "\n") else: train.append(" ".join(convert_line(line, vocab)) + "\n") nlines += 1 shuffle(train) f_train.writelines(train) f_train.close() f_valid.close() f_test.close()
parser.add_argument('--nwords', type=int, default='100000000', required=False, help='How many words to process') parser.add_argument( '--min_freq', type=int, default='5', required=False, help='Minimal frequency of paradigm to be included in the dictionary') args = parser.parse_args() nwords = 0 paradigms = defaultdict(int) for line in data_utils.read(args.input): if line.strip() == "" or len(line.split("\t")) < 2: continue else: fields = line.split("\t") if fields[1].isalpha(): paradigms[(fields[1], fields[2], fields[3], fields[5])] += 1 nwords += 1 if nwords > args.nwords: break with open(args.output, 'w') as f: for p in paradigms: if paradigms[p] > args.min_freq: f.write("\t".join(el for el in p) + "\t" + str(paradigms[p]) + "\n")
def main(src, path_to_zip_file): """ @Block :desc: Register and upload a function to AWS Lambda. :param src: path to lambda function :type src: string :example src: my_lambda_function :param path_to_zip_file: path to the zip file to deploy :type path_to_zip_file: string :example path_to_zip_file: my_lambda_function/dist/myzip.zip """ print('Creating your new Lambda function') path_to_config_file = os.path.join(src, 'config.yaml') cfg = read(path_to_config_file, loader=yaml.load) if function_exists(cfg, cfg.get('function_name')): update_function(cfg, path_to_zip_file) else: byte_stream = read(path_to_zip_file) aws_access_key_id = cfg.get('aws_access_key_id') aws_secret_access_key = cfg.get('aws_secret_access_key') account_id = get_account_id(aws_access_key_id, aws_secret_access_key) role = get_role_name(account_id, cfg.get('role', 'lambda_basic_execution')) client = get_client('lambda', aws_access_key_id, aws_secret_access_key, cfg.get('region')) # Do we prefer development variable over config? func_name = (os.environ.get('LAMBDA_FUNCTION_NAME') or cfg.get('function_name')) print('Creating lambda function with name: {}'.format(func_name)) kwargs = { 'FunctionName': func_name, 'Runtime': cfg.get('runtime', 'python2.7'), 'Role': role, 'Handler': cfg.get('handler'), 'Code': { 'ZipFile': byte_stream }, 'Description': cfg.get('description'), 'Timeout': cfg.get('timeout', 15), 'MemorySize': cfg.get('memory_size', 512), 'Publish': True } if 'environment_variables' in cfg: kwargs.update( Environment={ 'Variables': { key: value for key, value in cfg.get( 'environment_variables').items() } }) client.create_function(**kwargs)
def main(src,local_package=None): """ @Block :desc: This block bundles a lambda_func into a zipfile for deployment on aws. It installs all of the requirements listed within the requirements.txt file of the directory, and uploads them to a directory called dist within the lambda function :param src: the path to the lambda function :type src: string :example src: build_lambda_function :param local_package: A boolean indicating whether or not to install the local package :type local_package: boolean :example local_package: True :return: Path to zip file created """ # Load and parse the config file. if 'ec2-user' or 'Users' not in src: src = os.path.join(os.getcwd(), src) path_to_config_file = os.path.join(src, 'config.yaml') cfg = read(path_to_config_file, loader=yaml.load) # Get the absolute path to the output directory and create it if it doesn't # already exist. dist_directory = cfg.get('dist_directory', 'dist') path_to_dist = os.path.join(src, dist_directory) mkdir(path_to_dist) # Combine the name of the Lambda function with the current timestamp to use # for the output filename. function_name = cfg.get('function_name') output_filename = '{0}-{1}.zip'.format(timestamp(), function_name) with make_temp_directory(prefix="aws-lambda") as path_to_temp: print "PATH TO TEMP {}".format(path_to_temp) pip_install_to_target(path_to_temp,src, local_package=local_package) # Hack for Zope. if 'zope' in os.listdir(path_to_temp): print('Zope packages detected; fixing Zope package paths to ' 'make them importable.') # Touch. with open(os.path.join(path_to_temp, 'zope/__init__.py'), 'wb'): pass # Gracefully handle whether ".zip" was included in the filename or not. output_filename = ('{0}.zip'.format(output_filename) if not output_filename.endswith('.zip') else output_filename) files = [] dirs = [] print dist_directory print "CURRENT DIRECTORY {}".format(os.getcwd()) for filename in os.listdir(src): if os.path.isdir(filename): #print "This is a directory {}".format(filename) if filename == dist_directory: continue dirs.append(os.path.join(src, filename)) else: if filename == '.DS_Store': continue if filename == 'config.yaml': continue if 'dist' in filename: continue if '.pyc' in filename: continue #print "This is a file {}".format(filename) files.append(os.path.join(src, filename)) # "cd" into `temp_path` directory. print "PATH TO TEMP {}".format(path_to_temp) for f in files: print "FILE IN THIS DIRECTORY {}".format(f) print "PATH TO TEMP {}".format(path_to_temp) print "CURRENT DIRECTORY {}".format(os.getcwd()) print "LIST DIRECTORY {}".format(os.listdir(src)) # print "This is the file we are copying {}".format(f) _, filename = os.path.split(f) #print "THis is the path to the temp directory {}".format(path_to_temp) # print "This is the filename we are saving it as {}".format(filename) # Copy handler file into root of the packages folder. print os.path.join(path_to_temp, filename) copyfile(f, os.path.join(path_to_temp, filename)) for d in dirs: print "This is the directory we are interested in {}".format(d) _, dirname = os.path.split(d) print "This is the name we are copying too {}".format(dirname) copytree(d, os.path.join(path_to_temp,dirname)) # Zip them together into a single file. # TODO: Delete temp directory created once the archive has been compiled. os.chdir(path_to_temp) print "CURRENT DIRECTORY {}".format(os.getcwd()) path_to_zip_file = archive('./', path_to_dist, output_filename) os.chdir(src) os.chdir('../..') print "CURRENT DIRECTORY {}".format(os.getcwd()) return path_to_zip_file