def script_to_colab(name, description, instructions, tasks, parameters={}): colab = Colab(name) colab.header('1. Install Dependencies') colab.paragraph( 'First install the libraries needed to execute recipes, this only needs to be done once, then click play.' ) colab.code('!pip install git+https://github.com/google/starthinker') colab.header('2. Get Cloud Project ID') colab.paragraph( 'To run this recipe [requires a Google Cloud Project](https://github.com/google/starthinker/blob/master/tutorials/cloud_project.md), this only needs to be done once, then click play.' ) #colab.image('Client Project ID', 'https://github.com/google/starthinker/raw/master/tutorials/images/cloud_project.png') colab.code('CLOUD_PROJECT = \'PASTE PROJECT ID HERE\'') colab.code('\nprint("Cloud Project Set To: %s" % CLOUD_PROJECT)') colab.header('3. Get Client Credentials') #colab.image('Client Credentials', 'https://github.com/google/starthinker/raw/master/tutorials/images/cloud_client_installed.png') colab.paragraph( 'To read and write to various endpoints requires [downloading client credentials](https://github.com/google/starthinker/blob/master/tutorials/cloud_client_installed.md), this only needs to be done once, then click play.' ) colab.code('CLIENT_CREDENTIALS = \'PASTE CREDENTIALS HERE\'') colab.code('\nprint("Client Credentials Set To: %s" % CLIENT_CREDENTIALS)') fields = json_get_fields(tasks) if fields: colab.header('4. Enter %s Parameters' % name) colab.paragraph(description) colab.list(instructions) colab.paragraph( 'Modify the values below for your use case, can be done multiple times, then click play.' ) colab.code('FIELDS = %s' % fields_to_string(fields, parameters)) colab.code('\nprint("Parameters Set To: %s" % FIELDS)') colab.header('%d. Execute %s' % (5 if fields else 4, name)) colab.paragraph( 'This does NOT need to be modified unles you are changing the recipe, click play.' ) colab.code('from starthinker.util.project import project') colab.code('from starthinker.script.parse import json_set_fields') colab.code('') colab.code("USER_CREDENTIALS = '/content/user.json'") colab.code('') colab.code('TASKS = %s' % dict_to_string(json_set_auths(tasks, 'user'), skip=('field', ))) colab.code('') if fields: colab.code('json_set_fields(TASKS, FIELDS)') colab.code('') colab.code( "project.initialize(_recipe={ 'tasks':TASKS }, _project=CLOUD_PROJECT, _user=USER_CREDENTIALS, _client=CLIENT_CREDENTIALS, _verbose=True, _force=True)" ) colab.code('project.execute(_force=True)') return colab.render()
def script_interactive(): args = {} from_json = sys.argv[1] to_json = sys.argv[2] if len(sys.argv) == 3 else '' script = get_project(sys.argv[1]) # parse fields and constants into parameters fields = json_get_fields(script) if to_json: print('\n(1 of %d) From %s template create recipe: %s\n' % (len(fields), from_json, to_json)) else: print('\n(1 of %d) Recipe file to create from %s template.\n' % (len(fields), sys.argv[1])) to_json = input("Full Path TO JSON File:") for count, field in enumerate(fields): print( '\n(%d of %d) %s' % (count + 2, len(fields), field['description']), ) if 'default' in field: print(' ( Default to "%s" if blank. )' % field['default'], ) print('\n') args[field['name']] = input("%s ( %s ):" % (field['name'], field['kind'])) print('\n') # remove blanks ( they should have defaults ) if not args[field['name']]: del args[field['name']] script_write(script, args, to_json)
def script_to_dag(dag_name, title, description, instructions, tasks, parameters={}): return AIRFLOW_TEMPLATE % ( title, description, '\n'.join(instructions), fields_to_string(json_get_fields(tasks), parameters), dict_to_string(tasks, skip=('fields',)), dag_name )
def __init__(self, sequence, script, values, constants, *args, **kwargs): self.script = script self.constants = constants super(ScriptJsonForm, self).__init__(*args, **kwargs) self.fields['script_sequence'].initial = sequence x = script.get_tag() self.variables = json_get_fields(script.get_task()) for variable in self.variables: if variable['name'] in constants: continue # skip inputs that come from recipe constants self.fields[variable['name']] = get_field_kind(variable) self.fields[variable['name']].initial = values.get(variable['name'], variable.get('value', variable.get('default', ''))) self.fields[variable['name']].required = variable.get('required', False) self.fields[variable['name']].help_text = variable.get('description', '')
def script_test_include(): script = get_project(sys.argv[1]) # parse fields and constants into parameters print(' { "include":{') print(' "script":"%s",' % sys.argv[1]) print(' "parameters":{') print(',\n'.join([ ' "%s":{"field":{ "name":"%s", "kind":"%s", "description":"%s" }}' % (field['name'], field['name'], field['kind'], field.get('description', '')) for field in json_get_fields(script) ])) print(' }') print(' }}')
def script_to_dag(dag_name, title, description, instructions, script, parameters={}): return AIRFLOW_TEMPLATE.format( **{ 'title': title, 'description': description, 'instructions': ' - ' + '\n - '.join(instructions), 'inputs': fields_to_string(json_get_fields(script), parameters), 'recipe': dict_to_string(script, skip=('fields', )), 'dag': dag_name })
def get_fields_from_gtech_folder(): # Get all the necessary fields from the gtech scripts fields = {} for root, dirs, files in os.walk(TEST_DIRECTORY_PATH): for filename in files: path = TEST_DIRECTORY_PATH + filename script = script_read(path) script_fields = json_get_fields(script) for field in script_fields: script_name = filename.split('.')[0] if script_name not in fields: fields[script_name] = {} fields[script_name][field[ "name"]] = field["default"] if 'default' in field else '' return fields
def script_commandline(): script = script_read(sys.argv[1]) # assemble parameters parser = argparse.ArgumentParser() parser.add_argument('json', help='JSON recipe template to configure.', default=None) # parse fields and constants into parameters for field in json_get_fields(script): parser_add_field(parser, field) # run new arg parser with parameters from script args = vars(parser.parse_args()) # always write to STDOUT, caller whould rediect output to a JSON file script_write(script, args)
def script_interactive(): args = {} script = script_read() # parse fields and constants into parameters fields = json_get_fields(script) print '\n(1 of %d) Recipe file to create from %s template.\n' % (len(fields), sys.argv[1]) filepath = raw_input("Full Path TO JSON File:") print '\n' for count, field in enumerate(fields): print '\n(%d of %d) %s\n' % (count + 2, len(fields), field['description']) if 'default' in field: print 'Will default to %s if blank.\n' % field['default'] args[field['name']] = raw_input("%s ( %s ):" % (field['name'], field['kind'])) print '\n' # remove blanks ( they should have defaults ) if not args[field['name']]: del args[field['name']] script_write(script, args, filepath)
def parse_json(filepath): print 'PROCESSING:', filepath doc = '' with open(filepath) as f: try: script = json.load(f) except Exception, e: print 'JSON ERROR', filepath, str(e) exit() params = script['script'] params['path'] = filepath.replace(EXECUTE_PATH, '/') params['instructions'] = '- ' + '\n- '.join(script['script'].get('instructions', [])) params['authors'] = ', '.join(script['script'].get('authors', [])) params['fields'] = '' for field in json_get_fields(script): params['fields'] += '- %s (%s) %s' % (field['name'], field['kind'], field.get('description', '')) if field.get('default', ''): params['fields'] += 'Default: %s' % str(field['default']) params['fields'] += '\n' params['fields'] = params['fields'].strip() #params['tasks'] = json.dumps(script['tasks'], indent=2) tasks = sorted(set([task.keys()[0] for task in script['tasks']])) params['tasks'] = '- ' + '\n- '.join(['[/task/%s](/task/%s)' % (task, task) for task in tasks]) doc = '''## [%(title)s](%(path)s) %(description)s Maintained and supported by: %(authors)s ### Fields %(fields)s ### Instructions %(instructions)s ### Task Code Used Each task in the %(title)s recipe maps to the following stand alone python code modules: %(tasks)s ### Quick Command Line To see all required parameters and generate a recipe from this script template run: `python script/run.py %(path)s -h` `python script/run.py %(path)s [all required parameters] > projects/recipe.json` After [getting Google Cloud Credentials](/auth/README.md), execute the recipe created run the following: `python all/run.py projects/recipe.json -u [user credentials path] -s [service credentials path]` Any two or more recipes can be combined by copying and pasting task JSON into the task [...] list. All tasks execute in sequence. ''' % params
def initialize_tests(scripts, tests): """Initialize all the necessary test files for Starthinker Args: None Returns: None """ # Get old fields from the config file print('UPDATE CONFIG') old_fields = {} if (os.path.exists(CONFIG_FILE)): with open(CONFIG_FILE, 'r') as f: old_fields = json.load(f) # Get new fields from test files and merge in old values fields = {} for filename, script in scripts: script_fields = json_get_fields(script) script_name = filename.split('.')[0] for field in script_fields: fields.setdefault(script_name, {}) fields[script_name][field["name"]] = old_fields.get( script_name, {}).get(field["name"], field.get("default", '')) fields[script_name][ '%s_description' % field["name"]] = '(%s) %s' % (field.get( 'kind', 'string'), field.get('description', 'No description.')) if field["name"] not in old_fields.get(script_name, {}): print('NEW FIELD ADDED', script_name, field["name"]) # Save field values to config file if fields: f = open(CONFIG_FILE, "w") f.write(json.dumps(fields, sort_keys=True, indent=2)) f.close() else: print('WARNING CONFIGURATION IS EMPTY, CHECK YOUR PATHS!') # Create recipe directory print('GENERATE RECIPES') os.makedirs(RECIPE_DIRECTORY, exist_ok=True) # Create recipes from scripts recipes = [] for filename, script in scripts: name = filename.split('.')[0] if tests and name not in tests: continue # Set config field values into the script json_set_fields(script, fields.get(name, {})) # Expand all includes to full recipe json_expand_includes(script) with open(RECIPE_DIRECTORY + filename, 'w') as f: f.write(json.dumps(script, sort_keys=True, indent=2)) recipes.append(filename) # Create log directory and clear old logs os.makedirs(LOG_DIRECTORY, exist_ok=True) # Display instructions print("") print("------") print("------------") print("------------------------") print( "Some tests require custom values. Update the necessary fields for the tests you wish to run." ) print("EDIT: " + CONFIG_FILE) print("------------------------") print( "Some tests require external assets. Join the following group to gain access." ) print("VISIT: https://groups.google.com/forum/#!forum/starthinker-assets") print("------------------------") print("------------") print("------") print("") sleep(3) return recipes
def configure_tests(tests, runs, skips, test_run_id): """Initialize the starthinker_assets/tests.json variable harness. Read all existing tests from tests/scripts/*.json and create a dictionary of each script and fields. Save that dictionary to a test harness file where developer can configure tests. Then read the test harness and create recipe files to that can be run. Write those files to tests/recipes/*.json for execution in a later step. Args: test: List of (filename, json) pairs containing all the tests. runs: List of test names that will be run, all will run if blank. skips: List of tests to skip. test_run_id: String added as a field to each test, used for namespacing. Returns: List of JSON recpies, where all fields have values from the test harness. """ # Get old fields from the config file print('UPDATE CONFIG') old_fields = {} if os.path.exists(CONFIG_FILE): with open(CONFIG_FILE, 'r') as f: old_fields = json.load(f) # Get new fields from test files and merge in old values fields = {} for filename, script in tests: script_fields = json_get_fields(script) script_name = filename.split('.')[0] for field in script_fields: if field['name'] == 'test_run_id': continue fields.setdefault(script_name, {}) fields[script_name][field['name']] = old_fields.get(script_name, {}).get( field['name'], field.get('default', '')) fields[script_name][ '%s_description' % field['name']] = '(%s) %s' % (field.get( 'kind', 'string'), field.get('description', 'No description.')) if field['name'] not in old_fields.get(script_name, {}): print('NEW FIELD ADDED', script_name, field['name']) # Save field values to config file if fields: f = open(CONFIG_FILE, 'w') f.write(json.dumps(fields, sort_keys=True, indent=2)) f.close() if test_run_id: # Inject the test run ID to the list of field values that were read from the # test config file. This is done in memory only, so that concrete test run # value are replaced every time a test runs. for script in fields: fields[script]['test_run_id'] = test_run_id else: print('WARNING CONFIGURATION IS EMPTY, CHECK YOUR PATHS!') # Create recipe directory print('GENERATE RECIPES') os.makedirs(RECIPE_DIRECTORY, exist_ok=True) # Create recipes from tests recipes = [] for filename, script in tests: name = filename.split('.')[0] if runs and name not in runs: continue if name in skips: continue # Set config field values into the script json_set_fields(script, fields.get(name, {})) # Expand all includes to full recipe json_expand_includes(script) with open(RECIPE_DIRECTORY + filename, 'w') as f: f.write(json.dumps(script, sort_keys=True, indent=2)) recipes.append(filename) # Create log directory and clear old logs os.makedirs(LOG_DIRECTORY, exist_ok=True) # Display instructions print('') print('------') print('------------') print('------------------------') print( 'Some tests require custom values. Update the necessary fields for the tests you wish to run.' ) print('EDIT: ' + CONFIG_FILE) print('------------------------') print( 'Some tests require external assets. Join the following group to gain access.' ) print('VISIT: https://groups.google.com/forum/#!forum/starthinker-assets') print('------------------------') print('------------') print('------') print('') sleep(3) return recipes
def initialize_tests(): """Initialize all the necessary test files for Starthinker Args: None Returns: None """ # Get old fields from the config file print('UPDATE CONFIG') old_fields = {} if (os.path.exists(CONFIG_FILE)): with open(CONFIG_FILE, 'r') as f: old_fields = json.load(f) # Get new fields from test files and merge in old values fields = {} for filename, script in load_tests(): script_fields = json_get_fields(script) script_name = filename.split('.')[0] for field in script_fields: fields.setdefault(script_name, {}) fields[script_name][field["name"]] = old_fields.get( script_name, {}).get(field["name"], field.get("default", '')) fields[script_name][ '%s_description' % field["name"]] = '(%s) %s' % (field.get( 'kind', 'string'), field.get('description', 'No description.')) if field["name"] not in old_fields.get(script_name, {}): print('NEW FIELD ADDED', script_name, field["name"]) # Save field values to config file if fields: f = open(CONFIG_FILE, "w") f.write(json.dumps(fields, sort_keys=True, indent=2)) f.close() else: print('WARNING CONFIGURATION IS EMPTY, CHECK YOUR PATHS!') # Display instructions print("") print("------") print("------------") print("------------------------") print( "Some tests require custom values. Update the necessary fields for the tests you wish to run." ) print("EDIT: " + CONFIG_FILE) print("------------------------") print( "Some tests require external assets. Join the following group to gain access." ) print("VISIT: https://groups.google.com/forum/#!forum/starthinker-assets") print("------------------------") print("------------") print("------") print("") sleep(3)