def __eval_fexpl(self, u, t): """ Helper routine to evaluate the explicit part of the RHS Args: u: current values (not used here) t: current time Returns: explicit part of RHS """ fexpl = mesh(self.nvars) # Copy values of u into pyClaw state object self.state.q[0, :, :] = u.values[0, :, :] # Evaluate right hand side self.solver.before_step(self.solver, self.state) tmp = self.solver.dqdt(self.state) fexpl.values[0, :, :] = unflatten(tmp, 1, self.nvars[1], self.nvars[2]) # Copy values of u into pyClaw state object #self.state.q[0,:,:] = u.values[1,:,:] # Evaluate right hand side #tmp = self.solver.dqdt(self.state) #fexpl.values[1,:,:] = tmp.reshape(self.nvars[1:]) return fexpl
def __eval_fexpl(self,u,t): """ Helper routine to evaluate the explicit part of the RHS Args: u: current values (not used here) t: current time Returns: explicit part of RHS """ fexpl = mesh(self.nvars) # Copy values of u into pyClaw state object self.state.q[0,:,:] = u.values[0,:,:] # Evaluate right hand side self.solver.before_step(self.solver, self.state) tmp = self.solver.dqdt(self.state) fexpl.values[0,:,:] = unflatten(tmp, 1, self.nvars[1], self.nvars[2]) # Copy values of u into pyClaw state object #self.state.q[0,:,:] = u.values[1,:,:] # Evaluate right hand side #tmp = self.solver.dqdt(self.state) #fexpl.values[1,:,:] = tmp.reshape(self.nvars[1:]) return fexpl
def solve_system(self, rhs, factor, u0, t): """ Simple linear solver for (I-dtA)u = rhs Args: rhs: right-hand side for the nonlinear system factor: abbrev. for the node-to-node stepsize (or any other factor required) u0: initial guess for the iterative solver (not used here so far) t: current time (e.g. for time-dependent BCs) Returns: solution as mesh """ b = rhs.values.flatten() # NOTE: A = -M, therefore solve Id + factor*M here sol, info = LA.gmres(self.Id + factor * self.c_s * self.M, b, x0=u0.values.flatten(), tol=1e-13, restart=10, maxiter=20) me = mesh(self.nvars) me.values = unflatten(sol, 3, self.N[0], self.N[1]) return me
def test_again(self): self.assertDictEqual( unflatten({ 'a': 1, 'b': { 0: 'c', 1: { 0: 'd', 1: { 'e': { 'f': -1, 'g': 'h' } } } } }), { 'a': 1, 'b': ['c', ['d', { 'e': { 'f': -1, 'g': 'h' } }]] })
def solve_system(self,rhs,factor,u0,t): """ Simple linear solver for (I-dtA)u = rhs Args: rhs: right-hand side for the nonlinear system factor: abbrev. for the node-to-node stepsize (or any other factor required) u0: initial guess for the iterative solver (not used here so far) t: current time (e.g. for time-dependent BCs) Returns: solution as mesh """ b = rhs.values.flatten() cb = Callback() sol, info = LA.gmres( self.Id - factor*self.M, b, x0=u0.values.flatten(), tol=self.gmres_tol, restart=self.gmres_restart, maxiter=self.gmres_maxiter, callback=cb) # If this is a dummy call with factor==0.0, do not log because it should not be counted as a solver call if factor!=0.0: #print "SDC: Number of GMRES iterations: %3i --- Final residual: %6.3e" % ( cb.getcounter(), cb.getresidual() ) self.logger.add(cb.getcounter()) me = mesh(self.nvars) me.values = unflatten(sol, 4, self.N[0], self.N[1]) return me
def classify(self, data, model, output, parse=None, goldData=None): print >> sys.stderr, "--------- Rule based unmerging ---------" model = self.openModel(model, "r") exampleFileName = output+".examples.gz" self.buildExamples(model, [data], [exampleFileName], [goldData]) if parse == None: parse = self.getStr("parse", model) unmergedXML = unflatten(xml, parse, parse) STFormat.ConvertXML.toSTFormat(unmergedXML, "rulebased-unmerging-geniaformat", getA2FileTag(options.task, subTask)) # Evaluation of the Shared Task format if self.stEvaluator != None: # TODO: Store task/subtask in model self.stEvaluator.evaluate(output+".tar.gz")
def resolve(self, app_config): jsonpath_expr = parse(f'$..{self.key}.`parent`') results = jsonpath_expr.find(app_config) count = len(results) if count > 0: logging.info(f'Needs to resolve {count} values by {self.key} module') provider = self.provider() resolved = {} [merge(resolved, unflatten({f'{match.full_path}': self.fetch(match.value[self.key], provider)}), strategy=Strategy.ADDITIVE) for match in results] return merge(nested_delete(app_config, self.key), resolved, strategy=Strategy.ADDITIVE) else: return app_config
def new_credential_builder( self, new_credential: dict, unflatten_dict: dict ) -> dict: """ Update and return the new_credential. Args: new_credential: credential dict to be updated and returned unflatten_dict: dict with traversal path as key and match_value as value Return: dict """ new_credential.update(unflatten(unflatten_dict)) return new_credential
def csv2jsonl(json_eng, csv_eng): nested_datasets = ['MultiRC', 'WSC', 'ReCoRD'] for file in os.listdir(csv_eng): save_to = os.path.join(json_eng, file[:-4] + '.jsonl') if os.path.join(csv_eng, file).split('/')[-2] in nested_datasets: df = pd.read_csv(os.path.join(csv_eng, file)) with jsonlines.open(save_to, mode='w') as writer: for sample in df.iterrows(): sample = sample[1].dropna() sample = unflatten(sample.to_dict()) writer.write(sample) else: df = pd.read_csv(os.path.join(csv_eng, file), encoding='utf-8') df.to_json(path_or_buf=save_to, orient='records', lines=True, force_ascii=False) # force_ascii
def test_simple(self): self.assertDictEqual( unflatten({ 'a': 1, 'b[0]': 'c', 'b[1][0]': 'd', 'b[1][1][e][f]': -1, 'b[1][1][e][g]': 'h' }), { 'a': 1, 'b': ['c', ['d', { 'e': { 'f': -1, 'g': 'h' } }]] })
def __eval_fimpl(self,u,t): """ Helper routine to evaluate the implicit part of the RHS Args: u: current values t: current time (not used here) Returns: implicit part of RHS """ temp = u.values.flatten() temp = self.M.dot(temp) fimpl = mesh(self.nvars,val=0.0) fimpl.values = unflatten(temp, 4, self.N[0], self.N[1]) return fimpl
def __eval_fimpl(self,u,t): """ Helper routine to evaluate the implicit part of the RHS Args: u: current values t: current time (not used here) Returns: implicit part of RHS """ temp = u.values.flatten() temp = self.M.dot(temp) fimpl = mesh(self.nvars,val=0) # NOTE: M = -A, therefore add a minus here fimpl.values = unflatten(-self.c_s*temp, 3, self.N[0], self.N[1]) return fimpl
def test_dot_colon(self): self.assertDictEqual( unflatten( { 'a': 1, 'b:0': 'c', 'b:1:0': 'd', 'b:1:1.e.f': -1, 'b:1:1.e.g': 'h' }, split=dot_colon_split), { 'a': 1, 'b': ['c', ['d', { 'e': { 'f': -1, 'g': 'h' } }]] })
def __eval_fimpl(self,u,t): """ Helper routine to evaluate the implicit part of the RHS Args: u: current values t: current time (not used here) Returns: implicit part of RHS """ temp = u.values.flatten() temp = self.M.dot(temp) fimpl = mesh(self.nvars,val=0.0) # NOTE: M = -A, therefore add a minus here fimpl.values = unflatten(-self.c_s*temp, 3, self.N[0], self.N[1]) return fimpl
def __eval_fexpl(self,u,t): """ Helper routine to evaluate the explicit part of the RHS Args: u: current values (not used here) t: current time Returns: explicit part of RHS """ # Evaluate right hand side fexpl = mesh(self.nvars,val=0.0) temp = u.values.flatten() temp = self.D_upwind.dot(temp) fexpl.values = unflatten( temp, 4, self.N[0], self.N[1]) return fexpl
def __eval_fexpl(self,u,t): """ Helper routine to evaluate the explicit part of the RHS Args: u: current values (not used here) t: current time Returns: explicit part of RHS """ # Evaluate right hand side fexpl = mesh(self.nvars) temp = u.values.flatten() temp = self.D_upwind.dot(temp) # NOTE: M_adv = -D_upwind, therefore add a minus here fexpl.values = unflatten(-self.u_adv*temp, 3, self.N[0], self.N[1]) #fexpl.values = np.zeros((3, self.N[0], self.N[1])) return fexpl
def solve_system(self,rhs,factor,u0,t): """ Simple linear solver for (I-dtA)u = rhs Args: rhs: right-hand side for the nonlinear system factor: abbrev. for the node-to-node stepsize (or any other factor required) u0: initial guess for the iterative solver (not used here so far) t: current time (e.g. for time-dependent BCs) Returns: solution as mesh """ b = rhs.values.flatten() # NOTE: A = -M, therefore solve Id + factor*M here sol, info = LA.gmres( self.Id + factor*self.c_s*self.M, b, x0=u0.values.flatten(), tol=1e-13, restart=10, maxiter=20) me = mesh(self.nvars) me.values = unflatten(sol, 3, self.N[0], self.N[1]) return me
def test_unflatten(label, flattened, unflattened): assert unflatten(flattened) == unflattened
def test_unflatten_mixed_node_types(keys): with pytest.raises(ValueError) as ctx: unflatten((key, {'val_for_key': key}) for key in keys) assert str(ctx.value).startswith("conflicting types")
def test_unflatten_nonstring_key(): with pytest.raises(TypeError) as ctx: assert unflatten([(42, 'val')]) assert "must be strings" in str(ctx.value)
uimex = rkimex.timestep(uimex, dt_imex) # call main function to get things done... print("Running SDC...") uend,stats = mp.run_pfasst(MS,u0=uinit,t0=t0,dt=dt,Tend=Tend) # For reference solution, increase GMRES tolerance P.gmres_tol_limit = 1e-10 rkimexref = rk_imex(P, 5) uref = np.copy(u0) dt_ref = dt/10.0 print("Running RK-IMEX reference....") for i in range(0,10*Nsteps): uref = rkimexref.timestep(uref, dt_ref) udirk = unflatten(udirk, 4, P.N[0], P.N[1]) uimex = unflatten(uimex, 4, P.N[0], P.N[1]) uref = unflatten(uref, 4, P.N[0], P.N[1]) np.save('xaxis', P.xx) np.save('sdc', uend.values) np.save('dirk', udirk) np.save('rkimex', uimex) np.save('uref', uref) print(" #### Logging report for DIRK-%1i #### " % dirkp.order) print("Number of calls to implicit solver: %5i" % dirkp.logger.solver_calls) print("Total number of GMRES iterations: %5i" % dirkp.logger.iterations) print("Average number of iterations per call: %6.3f" % (float(dirkp.logger.iterations)/float(dirkp.logger.solver_calls))) print(" ") print(" #### Logging report for RK-IMEX-%1i #### " % rkimex.order)
from ec2_metadata import ec2_metadata client = boto3.client('secretsmanager', region_name=ec2_metadata.region) if (len(sys.argv) == 1): print("No secrets to be mounted, exiting") sys.exit(0) secret_names = sys.argv[1].split(",") out_directory = sys.argv[2] file_type = sys.argv[3] values = { secret["Name"].replace('/', '.'): secret["SecretString"] for secret in map(lambda name: client.get_secret_value(SecretId=name), secret_names) } file_name = "%s/secrets.%s" % (out_directory, file_type) if file_type == "yaml": yaml.dump(unflatten(values), open(file_name, 'w'), explicit_start=True) elif file_type == "json": json.dump(unflatten(values), open(file_name, 'w')) elif file_type == "toml": toml.dump(unflatten(values), open(file_name, 'w')) else: properties = Properties() properties.properties = values with open(file_name, "wb") as out_file: properties.store(out_file, strict=True)
def main(): if len(sys.argv) < 2: sys.exit( 'Provide the path to the exported CSV file you would like to import.' ) export_path = sys.argv[1] with open(export_path, 'r') as csvfile: data = csv.reader(csvfile) header = next(data) language = header[2] if len(header) < 2: sys.exit( 'The header for the third column must be a language code.') # Make sure the folder exists. language_folder = os.path.join('translations', language) if not os.path.isdir(language_folder): os.mkdir(language_folder) yaml_files = {} for row in data: key_string = row[0] key_parts = key_string.split(':') filename = key_parts[0] key_flat = key_parts[1] # For now replace dots with something recognizable that we can # replace later. This is because dots mess up the "unflatten" # library. key_flat = key_flat.replace('.', '^^^') # Along the same lines, we now put dots where we actually want dots. # The export script uses a separation string of "---" instead of # dots, so now let's replace those, to prepare for unflattening. key_flat = key_flat.replace('---', '.') translation = row[2] if filename not in yaml_files: # Start with an empty dict. yaml_files[filename] = {} # But also check to see if there is existing data. filepath = os.path.join(language_folder, filename + '.yml') if (os.path.isfile(filepath)): with open(filepath, 'r') as infile: existing = yaml.load(infile) if existing: yaml_files[filename] = existing # Unflatted and merge the data into our yaml_files dict. unflattened = unflatten({key_flat: translation}) yaml_files[filename] = merge_dicts(unflattened, yaml_files[filename]) # Put the dots back into the keys. yaml_files = change_keys(yaml_files, lambda key: key.replace('^^^', '.')) # Loop through the yaml_files dict and write any changes to file. for yaml_file in yaml_files: yaml_path = os.path.join(language_folder, yaml_file + '.yml') with open(yaml_path, 'w') as outfile: yaml.dump(yaml_files[yaml_file], outfile, default_flow_style=False, allow_unicode=True)
def test_unflatten_missing_array_key(): with pytest.raises(ValueError) as ctx: unflatten({'a[1]': 'a1'}) assert str(ctx.value).startswith('missing key') assert 'a[0]' in str(ctx.value)
def get_nested_dict(dictionary): nested_dict = unflatten(dictionary) return nested_dict
def uploadDocuments(): """ Perform a merge between DynamoDB documents and topics form Comprehend. Then upload documents on Cloudsearch. Both add new documents and update. """ # Parse CSV df = pd.read_csv('doc-topics.csv', dtype={ "docname": str, "topic": str, "proportion": float }) df = (df[df.proportion > 0.1]) # Format document and topics table for easyer merging. results = [] for (docname), bag in df.groupby(["docname"]): contents_df = bag.drop(["docname", 'proportion'], axis=1) subset = [OrderedDict(row) for i, row in contents_df.iterrows()] results.append(OrderedDict([("id", docname), ("topics", subset)])) for result in results: topics = [] for i in result['topics']: topics.append(i['topic']) result['fields'] = {} result['fields']['topics'] = topics del result['topics'] #print(json.dumps(result, indent=4)) # Create topic file. topics_file = open("topicFile.json", 'w', encoding="utf-8") topics_file.write(json.dumps(results)) topics_file.close() print('Topics file created.') # Fetch all data to reindex result_items = [] response = allScraped_table.scan(IndexName="last_update-id-index", ) result_items.extend(response['Items']) # Perform scan through all the table. while 'LastEvaluatedKey' in response: response = allScraped_table.scan( IndexName="last_update-id-index", ExclusiveStartKey=response['LastEvaluatedKey']) result_items.extend(response['Items']) # Format DynamoDB articles. batch = [] for i in result_items: # Build doc doc = {} doc['id'] = i['id'] doc['type'] = 'add' doc['fields'] = {} doc['fields']['title'] = i['title'] doc['fields']['authors'] = i['authors'] doc['fields']['abstract'] = i['abstract'] doc['fields']['release_date'] = i['release_date'] doc['fields']['article_type'] = i['article_type'] # Prevent optional data to add unwanted object. if i['file_url'] != None: doc['fields']['file_url'] = i['file_url'] if i['keywords'] != None: doc['fields']['keywords'] = i['keywords'] if i['fulltext'] != None: doc['fields']['fulltext'] = i['fulltext'] doc['fields']['last_update'] = int(i['last_update']) batch.append(doc) # Create document file. docs_file = open("docFile.json", 'w', encoding="utf-8") docs_file.write(json.dumps(batch)) docs_file.close() print('Documents file created.') print('Start merging both files.') # Open documents with open('docFile.json') as f: data = json.load(f) # Flatten data doc_df = json_normalize(data) #print("doc_df :\n" + doc_df.head(3).to_string()) # Open topics with open('topicFile.json') as f: data = json.load(f) # Flatten topics topic_df = json_normalize(data) #print("topic_df :\n" + topic_df.head(3).to_string()) # Add topics to data results = doc_df.merge(topic_df, how='inner', on='id') #print("results :\n" + results.head(3).to_string()) print('Merging done. Start jsonify.') # Reforme json for CloudSearch API. docCount = 0 itemsCount = 0 result_items = results.to_dict('records') batch = [] for r in result_items: item = unflatten(r) # Treat NaN cells if item['fields']['file_url'] != item['fields']['file_url']: del item['fields']['file_url'] if item['fields']['keywords'] != item['fields']['keywords']: del item['fields']['keywords'] if item['fields']['fulltext'] != item['fields']['fulltext']: del item['fields']['fulltext'] """ # Test empty keywords list if not item['fields']['keywords']: del item['fields']['keywords'] """ batch.append(item) itemsCount += 1 # Separate upload file in smaller fragments to avoid OS socket exeption. if itemsCount > 4000 or r == result_items[len(result_items) - 1]: # Create file updateCloudSearch_file = open("updateTopic_" + str(docCount) + ".json", 'w', encoding="utf-8") updateCloudSearch_file.write(json.dumps(batch)) print("Update file n°" + str(docCount) + " complete with " + str(itemsCount) + " documents.") updateCloudSearch_file.close() docCount += 1 itemsCount = 0 batch = [] # Start indexing. if len(result_items) > 0: print("Start indexing.") for doc in range(4): #print("Upload file n°" + str(doc) + " with " + str(itemsCount) + " documents.") # Call upload docEd = 'http://doc-micorr-test-yzjuar4kajhkoii2hgziiq5vxy.us-east-1.cloudsearch.amazonaws.com' updateFile = "updateTopic_" + str(doc) + ".json" run([ "aws", "cloudsearchdomain", "--endpoint-url", docEd, "upload-documents", "--content-type", "application/json", "--documents", updateFile ]) else: print("Nothing to index.")