示例#1
0
    def __eval_fexpl(self, u, t):
        """
        Helper routine to evaluate the explicit part of the RHS

        Args:
            u: current values (not used here)
            t: current time

        Returns:
            explicit part of RHS
        """

        fexpl = mesh(self.nvars)

        # Copy values of u into pyClaw state object
        self.state.q[0, :, :] = u.values[0, :, :]

        # Evaluate right hand side
        self.solver.before_step(self.solver, self.state)
        tmp = self.solver.dqdt(self.state)

        fexpl.values[0, :, :] = unflatten(tmp, 1, self.nvars[1], self.nvars[2])

        # Copy values of u into pyClaw state object
        #self.state.q[0,:,:] = u.values[1,:,:]

        # Evaluate right hand side
        #tmp = self.solver.dqdt(self.state)
        #fexpl.values[1,:,:] = tmp.reshape(self.nvars[1:])

        return fexpl
示例#2
0
    def __eval_fexpl(self,u,t):
        """
        Helper routine to evaluate the explicit part of the RHS

        Args:
            u: current values (not used here)
            t: current time

        Returns:
            explicit part of RHS
        """


        fexpl        = mesh(self.nvars)

        # Copy values of u into pyClaw state object
        self.state.q[0,:,:] = u.values[0,:,:]

        # Evaluate right hand side
        self.solver.before_step(self.solver, self.state)
        tmp = self.solver.dqdt(self.state)
        
        fexpl.values[0,:,:] = unflatten(tmp, 1, self.nvars[1], self.nvars[2])


        # Copy values of u into pyClaw state object
        #self.state.q[0,:,:] = u.values[1,:,:]

        # Evaluate right hand side
        #tmp = self.solver.dqdt(self.state)
        #fexpl.values[1,:,:] = tmp.reshape(self.nvars[1:])

        return fexpl
示例#3
0
    def solve_system(self, rhs, factor, u0, t):
        """
        Simple linear solver for (I-dtA)u = rhs

        Args:
            rhs: right-hand side for the nonlinear system
            factor: abbrev. for the node-to-node stepsize (or any other factor required)
            u0: initial guess for the iterative solver (not used here so far)
            t: current time (e.g. for time-dependent BCs)

        Returns:
            solution as mesh
        """

        b = rhs.values.flatten()
        # NOTE: A = -M, therefore solve Id + factor*M here
        sol, info = LA.gmres(self.Id + factor * self.c_s * self.M,
                             b,
                             x0=u0.values.flatten(),
                             tol=1e-13,
                             restart=10,
                             maxiter=20)
        me = mesh(self.nvars)
        me.values = unflatten(sol, 3, self.N[0], self.N[1])

        return me
示例#4
0
 def test_again(self):
     self.assertDictEqual(
         unflatten({
             'a': 1,
             'b': {
                 0: 'c',
                 1: {
                     0: 'd',
                     1: {
                         'e': {
                             'f': -1,
                             'g': 'h'
                         }
                     }
                 }
             }
         }), {
             'a': 1,
             'b': ['c', ['d', {
                 'e': {
                     'f': -1,
                     'g': 'h'
                 }
             }]]
         })
示例#5
0
    def solve_system(self,rhs,factor,u0,t):
        """
        Simple linear solver for (I-dtA)u = rhs

        Args:
            rhs: right-hand side for the nonlinear system
            factor: abbrev. for the node-to-node stepsize (or any other factor required)
            u0: initial guess for the iterative solver (not used here so far)
            t: current time (e.g. for time-dependent BCs)

        Returns:
            solution as mesh
        """

        b         = rhs.values.flatten()
        cb        = Callback()

        sol, info = LA.gmres( self.Id - factor*self.M, b, x0=u0.values.flatten(), tol=self.gmres_tol, restart=self.gmres_restart, maxiter=self.gmres_maxiter, callback=cb)
        # If this is a dummy call with factor==0.0, do not log because it should not be counted as a solver call
        if factor!=0.0:
          #print "SDC: Number of GMRES iterations: %3i --- Final residual: %6.3e" % ( cb.getcounter(), cb.getresidual() )
          self.logger.add(cb.getcounter())
        me        = mesh(self.nvars)
        me.values = unflatten(sol, 4, self.N[0], self.N[1])

        return me
示例#6
0
 def classify(self, data, model, output, parse=None, goldData=None):
     print >> sys.stderr, "--------- Rule based unmerging ---------"
     model = self.openModel(model, "r")
     exampleFileName = output+".examples.gz"
     self.buildExamples(model, [data], [exampleFileName], [goldData])
     if parse == None:
         parse = self.getStr("parse", model)
     unmergedXML = unflatten(xml, parse, parse)
     STFormat.ConvertXML.toSTFormat(unmergedXML, "rulebased-unmerging-geniaformat", getA2FileTag(options.task, subTask))
     # Evaluation of the Shared Task format
     if self.stEvaluator != None:
         # TODO: Store task/subtask in model
         self.stEvaluator.evaluate(output+".tar.gz")
示例#7
0
 def resolve(self, app_config):
     jsonpath_expr = parse(f'$..{self.key}.`parent`')
     results = jsonpath_expr.find(app_config)
     count = len(results)
     if count > 0:
         logging.info(f'Needs to resolve {count} values by {self.key} module')
         provider = self.provider()
         resolved = {}
         [merge(resolved, unflatten({f'{match.full_path}': self.fetch(match.value[self.key], provider)}),
                strategy=Strategy.ADDITIVE) for match in results]
         return merge(nested_delete(app_config, self.key), resolved, strategy=Strategy.ADDITIVE)
     else:
         return app_config
示例#8
0
    def new_credential_builder(
        self, new_credential: dict, unflatten_dict: dict
    ) -> dict:
        """
        Update and return the new_credential.

        Args:
            new_credential: credential dict to be updated and returned
            unflatten_dict: dict with traversal path as key and match_value as value
        Return:
            dict

        """
        new_credential.update(unflatten(unflatten_dict))
        return new_credential
示例#9
0
def csv2jsonl(json_eng, csv_eng):
    nested_datasets = ['MultiRC', 'WSC', 'ReCoRD']

    for file in os.listdir(csv_eng):
        save_to = os.path.join(json_eng, file[:-4] + '.jsonl')

        if os.path.join(csv_eng, file).split('/')[-2] in nested_datasets:
            df = pd.read_csv(os.path.join(csv_eng, file))
            with jsonlines.open(save_to, mode='w') as writer:
                for sample in df.iterrows():
                    sample = sample[1].dropna()
                    sample = unflatten(sample.to_dict())
                    writer.write(sample)
        else:
            df = pd.read_csv(os.path.join(csv_eng, file), encoding='utf-8')
            df.to_json(path_or_buf=save_to, orient='records', lines=True, force_ascii=False)  # force_ascii
示例#10
0
 def test_simple(self):
     self.assertDictEqual(
         unflatten({
             'a': 1,
             'b[0]': 'c',
             'b[1][0]': 'd',
             'b[1][1][e][f]': -1,
             'b[1][1][e][g]': 'h'
         }), {
             'a': 1,
             'b': ['c', ['d', {
                 'e': {
                     'f': -1,
                     'g': 'h'
                 }
             }]]
         })
示例#11
0
    def __eval_fimpl(self,u,t):
        """
        Helper routine to evaluate the implicit part of the RHS

        Args:
            u: current values
            t: current time (not used here)

        Returns:
            implicit part of RHS
        """

        temp         = u.values.flatten()
        temp         = self.M.dot(temp)
        fimpl        = mesh(self.nvars,val=0.0)
        fimpl.values = unflatten(temp, 4, self.N[0], self.N[1])
        
        return fimpl
示例#12
0
    def __eval_fimpl(self,u,t):
        """
        Helper routine to evaluate the implicit part of the RHS

        Args:
            u: current values
            t: current time (not used here)

        Returns:
            implicit part of RHS
        """

        temp = u.values.flatten()
        temp = self.M.dot(temp)
        fimpl = mesh(self.nvars,val=0)
        # NOTE: M = -A, therefore add a minus here
        fimpl.values = unflatten(-self.c_s*temp, 3, self.N[0], self.N[1])
        
        return fimpl
示例#13
0
 def test_dot_colon(self):
     self.assertDictEqual(
         unflatten(
             {
                 'a': 1,
                 'b:0': 'c',
                 'b:1:0': 'd',
                 'b:1:1.e.f': -1,
                 'b:1:1.e.g': 'h'
             },
             split=dot_colon_split), {
                 'a': 1,
                 'b': ['c', ['d', {
                     'e': {
                         'f': -1,
                         'g': 'h'
                     }
                 }]]
             })
示例#14
0
    def __eval_fimpl(self,u,t):
        """
        Helper routine to evaluate the implicit part of the RHS

        Args:
            u: current values
            t: current time (not used here)

        Returns:
            implicit part of RHS
        """

        temp = u.values.flatten()
        temp = self.M.dot(temp)
        fimpl = mesh(self.nvars,val=0.0)
        # NOTE: M = -A, therefore add a minus here
        fimpl.values = unflatten(-self.c_s*temp, 3, self.N[0], self.N[1])
        
        return fimpl
示例#15
0
    def __eval_fexpl(self,u,t):
        """
        Helper routine to evaluate the explicit part of the RHS

        Args:
            u: current values (not used here)
            t: current time

        Returns:
            explicit part of RHS
        """
        
        # Evaluate right hand side
        fexpl        = mesh(self.nvars,val=0.0)
        temp         = u.values.flatten()
        temp         = self.D_upwind.dot(temp)
        fexpl.values = unflatten( temp, 4, self.N[0], self.N[1])
              
        return fexpl
示例#16
0
    def __eval_fexpl(self,u,t):
        """
        Helper routine to evaluate the explicit part of the RHS

        Args:
            u: current values (not used here)
            t: current time

        Returns:
            explicit part of RHS
        """
        
        # Evaluate right hand side
        fexpl = mesh(self.nvars)
        temp  = u.values.flatten()
        temp  = self.D_upwind.dot(temp)
        # NOTE: M_adv = -D_upwind, therefore add a minus here
        fexpl.values = unflatten(-self.u_adv*temp, 3, self.N[0], self.N[1])
              
        #fexpl.values = np.zeros((3, self.N[0], self.N[1]))
        return fexpl
示例#17
0
    def solve_system(self,rhs,factor,u0,t):
        """
        Simple linear solver for (I-dtA)u = rhs

        Args:
            rhs: right-hand side for the nonlinear system
            factor: abbrev. for the node-to-node stepsize (or any other factor required)
            u0: initial guess for the iterative solver (not used here so far)
            t: current time (e.g. for time-dependent BCs)

        Returns:
            solution as mesh
        """

        b = rhs.values.flatten()
        # NOTE: A = -M, therefore solve Id + factor*M here
        sol, info =  LA.gmres( self.Id + factor*self.c_s*self.M, b, x0=u0.values.flatten(), tol=1e-13, restart=10, maxiter=20)
        me = mesh(self.nvars)
        me.values = unflatten(sol, 3, self.N[0], self.N[1])

        return me
示例#18
0
    def __eval_fexpl(self,u,t):
        """
        Helper routine to evaluate the explicit part of the RHS

        Args:
            u: current values (not used here)
            t: current time

        Returns:
            explicit part of RHS
        """
        
        # Evaluate right hand side
        fexpl = mesh(self.nvars)
        temp  = u.values.flatten()
        temp  = self.D_upwind.dot(temp)
        # NOTE: M_adv = -D_upwind, therefore add a minus here
        fexpl.values = unflatten(-self.u_adv*temp, 3, self.N[0], self.N[1])
              
        #fexpl.values = np.zeros((3, self.N[0], self.N[1]))
        return fexpl
示例#19
0
def test_unflatten(label, flattened, unflattened):
    assert unflatten(flattened) == unflattened
示例#20
0
def test_unflatten_mixed_node_types(keys):
    with pytest.raises(ValueError) as ctx:
        unflatten((key, {'val_for_key': key}) for key in keys)
    assert str(ctx.value).startswith("conflicting types")
示例#21
0
def test_unflatten_nonstring_key():
    with pytest.raises(TypeError) as ctx:
        assert unflatten([(42, 'val')])
    assert "must be strings" in str(ctx.value)
示例#22
0
      uimex = rkimex.timestep(uimex, dt_imex)

    # call main function to get things done...
    print("Running SDC...")
    uend,stats = mp.run_pfasst(MS,u0=uinit,t0=t0,dt=dt,Tend=Tend)

    # For reference solution, increase GMRES tolerance
    P.gmres_tol_limit = 1e-10
    rkimexref = rk_imex(P, 5)
    uref      = np.copy(u0)
    dt_ref    = dt/10.0
    print("Running RK-IMEX reference....")
    for i in range(0,10*Nsteps):
      uref = rkimexref.timestep(uref, dt_ref)
  
    udirk = unflatten(udirk, 4, P.N[0], P.N[1])
    uimex = unflatten(uimex, 4, P.N[0], P.N[1])
    uref  = unflatten(uref,  4, P.N[0], P.N[1])

    np.save('xaxis', P.xx)
    np.save('sdc', uend.values)
    np.save('dirk', udirk)
    np.save('rkimex', uimex)
    np.save('uref', uref)
    
    print(" #### Logging report for DIRK-%1i #### " % dirkp.order)
    print("Number of calls to implicit solver: %5i" % dirkp.logger.solver_calls)
    print("Total number of GMRES iterations: %5i" % dirkp.logger.iterations)
    print("Average number of iterations per call: %6.3f" % (float(dirkp.logger.iterations)/float(dirkp.logger.solver_calls)))
    print(" ")
    print(" #### Logging report for RK-IMEX-%1i #### " % rkimex.order)
示例#23
0
from ec2_metadata import ec2_metadata
client = boto3.client('secretsmanager', region_name=ec2_metadata.region)

if (len(sys.argv) == 1):
    print("No secrets to be mounted, exiting")
    sys.exit(0)

secret_names = sys.argv[1].split(",")
out_directory = sys.argv[2]
file_type = sys.argv[3]

values = {
    secret["Name"].replace('/', '.'): secret["SecretString"]
    for secret in map(lambda name: client.get_secret_value(SecretId=name),
                      secret_names)
}
file_name = "%s/secrets.%s" % (out_directory, file_type)

if file_type == "yaml":
    yaml.dump(unflatten(values), open(file_name, 'w'), explicit_start=True)
elif file_type == "json":
    json.dump(unflatten(values), open(file_name, 'w'))
elif file_type == "toml":
    toml.dump(unflatten(values), open(file_name, 'w'))
else:
    properties = Properties()
    properties.properties = values

    with open(file_name, "wb") as out_file:
        properties.store(out_file, strict=True)
def main():

    if len(sys.argv) < 2:
        sys.exit(
            'Provide the path to the exported CSV file you would like to import.'
        )

    export_path = sys.argv[1]
    with open(export_path, 'r') as csvfile:
        data = csv.reader(csvfile)
        header = next(data)
        language = header[2]
        if len(header) < 2:
            sys.exit(
                'The header for the third column must be a language code.')
        # Make sure the folder exists.
        language_folder = os.path.join('translations', language)
        if not os.path.isdir(language_folder):
            os.mkdir(language_folder)

        yaml_files = {}

        for row in data:
            key_string = row[0]
            key_parts = key_string.split(':')
            filename = key_parts[0]
            key_flat = key_parts[1]

            # For now replace dots with something recognizable that we can
            # replace later. This is because dots mess up the "unflatten"
            # library.
            key_flat = key_flat.replace('.', '^^^')

            # Along the same lines, we now put dots where we actually want dots.
            # The export script uses a separation string of "---" instead of
            # dots, so now let's replace those, to prepare for unflattening.
            key_flat = key_flat.replace('---', '.')

            translation = row[2]

            if filename not in yaml_files:
                # Start with an empty dict.
                yaml_files[filename] = {}
                # But also check to see if there is existing data.
                filepath = os.path.join(language_folder, filename + '.yml')
                if (os.path.isfile(filepath)):
                    with open(filepath, 'r') as infile:
                        existing = yaml.load(infile)
                        if existing:
                            yaml_files[filename] = existing

            # Unflatted and merge the data into our yaml_files dict.
            unflattened = unflatten({key_flat: translation})
            yaml_files[filename] = merge_dicts(unflattened,
                                               yaml_files[filename])

        # Put the dots back into the keys.
        yaml_files = change_keys(yaml_files,
                                 lambda key: key.replace('^^^', '.'))

        # Loop through the yaml_files dict and write any changes to file.
        for yaml_file in yaml_files:
            yaml_path = os.path.join(language_folder, yaml_file + '.yml')
            with open(yaml_path, 'w') as outfile:
                yaml.dump(yaml_files[yaml_file],
                          outfile,
                          default_flow_style=False,
                          allow_unicode=True)
示例#25
0
def test_unflatten_missing_array_key():
    with pytest.raises(ValueError) as ctx:
        unflatten({'a[1]': 'a1'})
    assert str(ctx.value).startswith('missing key')
    assert 'a[0]' in str(ctx.value)
示例#26
0
def get_nested_dict(dictionary):
    nested_dict = unflatten(dictionary)

    return nested_dict
示例#27
0
def uploadDocuments():
    """
    Perform a merge between DynamoDB documents and topics form Comprehend.
    Then upload documents on Cloudsearch. Both add new documents and update.
    """

    # Parse CSV
    df = pd.read_csv('doc-topics.csv',
                     dtype={
                         "docname": str,
                         "topic": str,
                         "proportion": float
                     })

    df = (df[df.proportion > 0.1])

    # Format document and topics table for easyer merging.
    results = []
    for (docname), bag in df.groupby(["docname"]):
        contents_df = bag.drop(["docname", 'proportion'], axis=1)
        subset = [OrderedDict(row) for i, row in contents_df.iterrows()]
        results.append(OrderedDict([("id", docname), ("topics", subset)]))
    for result in results:
        topics = []
        for i in result['topics']:
            topics.append(i['topic'])

        result['fields'] = {}
        result['fields']['topics'] = topics
        del result['topics']
        #print(json.dumps(result, indent=4))

    # Create topic file.
    topics_file = open("topicFile.json", 'w', encoding="utf-8")
    topics_file.write(json.dumps(results))
    topics_file.close()
    print('Topics file created.')

    # Fetch all data to reindex
    result_items = []
    response = allScraped_table.scan(IndexName="last_update-id-index", )
    result_items.extend(response['Items'])

    # Perform scan through all the table.
    while 'LastEvaluatedKey' in response:
        response = allScraped_table.scan(
            IndexName="last_update-id-index",
            ExclusiveStartKey=response['LastEvaluatedKey'])
        result_items.extend(response['Items'])

    # Format DynamoDB articles.
    batch = []
    for i in result_items:
        # Build doc
        doc = {}

        doc['id'] = i['id']
        doc['type'] = 'add'
        doc['fields'] = {}

        doc['fields']['title'] = i['title']
        doc['fields']['authors'] = i['authors']
        doc['fields']['abstract'] = i['abstract']
        doc['fields']['release_date'] = i['release_date']
        doc['fields']['article_type'] = i['article_type']

        # Prevent optional data to add unwanted object.
        if i['file_url'] != None:
            doc['fields']['file_url'] = i['file_url']

        if i['keywords'] != None:
            doc['fields']['keywords'] = i['keywords']

        if i['fulltext'] != None:
            doc['fields']['fulltext'] = i['fulltext']

        doc['fields']['last_update'] = int(i['last_update'])

        batch.append(doc)

    # Create document file.
    docs_file = open("docFile.json", 'w', encoding="utf-8")
    docs_file.write(json.dumps(batch))
    docs_file.close()
    print('Documents file created.')

    print('Start merging both files.')

    # Open documents
    with open('docFile.json') as f:
        data = json.load(f)
    # Flatten data
    doc_df = json_normalize(data)
    #print("doc_df :\n" + doc_df.head(3).to_string())

    # Open topics
    with open('topicFile.json') as f:
        data = json.load(f)
    # Flatten topics
    topic_df = json_normalize(data)
    #print("topic_df :\n" + topic_df.head(3).to_string())

    # Add topics to data
    results = doc_df.merge(topic_df, how='inner', on='id')
    #print("results :\n" + results.head(3).to_string())

    print('Merging done. Start jsonify.')

    # Reforme json for CloudSearch API.
    docCount = 0
    itemsCount = 0

    result_items = results.to_dict('records')
    batch = []
    for r in result_items:
        item = unflatten(r)

        # Treat NaN cells
        if item['fields']['file_url'] != item['fields']['file_url']:
            del item['fields']['file_url']

        if item['fields']['keywords'] != item['fields']['keywords']:
            del item['fields']['keywords']

        if item['fields']['fulltext'] != item['fields']['fulltext']:
            del item['fields']['fulltext']
        """
        # Test empty keywords list
        if not item['fields']['keywords']:
            del item['fields']['keywords']
        """

        batch.append(item)
        itemsCount += 1

        # Separate upload file in smaller fragments to avoid OS socket exeption.
        if itemsCount > 4000 or r == result_items[len(result_items) - 1]:
            # Create file
            updateCloudSearch_file = open("updateTopic_" + str(docCount) +
                                          ".json",
                                          'w',
                                          encoding="utf-8")
            updateCloudSearch_file.write(json.dumps(batch))
            print("Update file n°" + str(docCount) + " complete with " +
                  str(itemsCount) + " documents.")
            updateCloudSearch_file.close()

            docCount += 1
            itemsCount = 0
            batch = []

    # Start indexing.
    if len(result_items) > 0:
        print("Start indexing.")
        for doc in range(4):
            #print("Upload file n°" + str(doc) + " with " + str(itemsCount) + " documents.")
            # Call upload
            docEd = 'http://doc-micorr-test-yzjuar4kajhkoii2hgziiq5vxy.us-east-1.cloudsearch.amazonaws.com'
            updateFile = "updateTopic_" + str(doc) + ".json"
            run([
                "aws", "cloudsearchdomain", "--endpoint-url", docEd,
                "upload-documents", "--content-type", "application/json",
                "--documents", updateFile
            ])
    else:
        print("Nothing to index.")