def setup(session): search_context = { "name": search_context_name, "matchers": [ { "name": "SsnMatcher", "type": "pattern", "pattern": r"\b(\d{3}[-]?\d{2}[-]?\d{4})\b" }, { "name": "NameMatcher", "type": "set", "url": pathlib.Path('names.set').absolute().as_uri() } ] } mask_context = { "name": mask_context_name, "rules": [ { "name": "TestRule", "type": "cosort", "expression": "enc_fp_aes256_alphanum($\{NAME\})" } ], "ruleMatchers": [ { "name": "TestNameRuleMatcher", "type": "name", "rule": "TestRule", "pattern": ".*" } ] } file_search_context = { "name": file_search_context_name, "matchers": [ { "name": search_context_name, "type": "searchContext" } ] } file_mask_context = { "name": file_mask_context_name, "rules": [ { "name": mask_context_name, "type": "maskContext" } ] } utils.create_context(session, "searchContext", search_context) utils.create_context(session, "maskContext", mask_context) utils.create_context(session, "files/fileSearchContext", file_search_context) utils.create_context(session, "files/fileMaskContext", file_mask_context)
def build_iterator(self, data, for_train): ctx = create_context(self._options.num_gpu) T = data.shape[0] X3 = nd.zeros( (T - self.receptive_field, data.shape[1], self.receptive_field), ctx=ctx) y = nd.zeros((T - self.receptive_field, data.shape[1]), ctx=ctx) for i in range(T - self.receptive_field): for j in range(data.shape[1]): X3[i, j, :] = data[i:i + self.receptive_field, j] y[i, j] = data[i + self.receptive_field, j] if self._options.model == 'cw': dataset = gluon.data.ArrayDataset(X3, y[:, self._options.trajectory]) else: dataset = gluon.data.ArrayDataset( X3[:, self._options.trajectory, :], y[:, self._options.trajectory]) if for_train: diter = gluon.data.DataLoader(dataset, self._options.batch_size,\ shuffle=True, last_batch='discard') else: diter = gluon.data.DataLoader(dataset, self._options.batch_size_predict,\ shuffle=False, last_batch='discard') return diter
def setup(session): search_context = { "name": search_context_name, "matchers": [ { "name": "TestMatcher", "type": "pattern", "pattern": "test" } ] } mask_context = { "name": mask_context_name, "rules": [ { "name": "TestRule", "type": "cosort", "expression": "enc_fp_aes256_alphanum($\{NAME\})" } ], "ruleMatchers": [ { "name": "TestNameRuleMatcher", "type": "name", "rule": "TestRule", "pattern": "TestMatcher" } ] } file_search_context = { "name": file_search_context_name, "matchers": [ { "name": search_context_name, "type": "searchContext" } ] } file_mask_context = { "name": file_mask_context_name, "rules": [ { "name": mask_context_name, "type": "maskContext" } ] } utils.create_context(session, "searchContext", search_context) utils.create_context(session, "maskContext", mask_context) utils.create_context(session, "files/fileSearchContext", file_search_context) utils.create_context(session, "files/fileMaskContext", file_mask_context)
def context_from_nextflow(self, nf_project_dir): """Fetch a Nextflow pipeline's config settings. Returns: A cookiecutter-readable context (Python dictionary) """ # Check if we are on "master" (main pipeline code) if self.repo.active_branch is not "master": self.repo.git.checkout("origin/master", b="master") # Fetch the config variables from the Nextflow pipeline config = utils.fetch_wf_config(wf_path=nf_project_dir) # Checkout again to configured template branch self.repo.git.checkout("origin/{branch}".format(branch=self.branch), b="{branch}".format(branch=self.branch)) return utils.create_context(config)
def train(self, train_iter): ctx = create_context(self._options.num_gpu) net = LorenzBuilder(self._options, ctx=ctx, for_train=True).build() trainer = gluon.Trainer( net.collect_params(), 'adam', { 'learning_rate': self._options.learning_rate, 'wd': self._options.l2_regularization }) loss = gluon.loss.L1Loss() loss_save = [] best_loss = sys.maxsize start = time.time() for epoch in trange(self._options.epochs): total_epoch_loss, nb = mx.nd.zeros(1, ctx), 0 for x, y in train_iter: # x shape: (batch_sizeXin_channelsXwidth) x = x.reshape( (self._options.batch_size, self._options.in_channels, -1)).as_in_context(ctx) y = y.as_in_context(ctx) with autograd.record(): y_hat = net(x) l = loss(y_hat, y) l.backward() trainer.step(self._options.batch_size, ignore_stale_grad=True) total_epoch_loss += l.sum() nb += x.shape[0] # print('nb', nb) current_loss = total_epoch_loss.asscalar() / nb loss_save.append(current_loss) print('Epoch {}, loss {}'.format(epoch, current_loss)) if current_loss < best_loss: best_loss = current_loss self.save_model(net) print('best epoch loss: ', best_loss) end = time.time() np.savetxt(os.path.join(self._options.assets_dir, 'losses.txt'),\ np.array(loss_save)) print("Training took ", end - start, " seconds.")
def setup(session): model_url = utils.download_model('en-ner-person.bin', session) sent_url = utils.download_model('en-sent.bin', session) search_context = { "name": search_context_name, "matchers": [{ "name": "EmailMatcher", "type": "pattern", "pattern": r"\b[\w._%+-]+@[\w.-]+\.[A-Za-z]{2,4}\b" }, { "name": "PhoneMatcher", "type": "pattern", "pattern": r"\b(\+?1?([ .-]?)?)?(\(?([2-9]\d{2})\)?([ .-]?)?)([2-9]\d{2})([ .-]?)(\d{4})(?: #?[eE][xX][tT]\.? \d{2,6})?\b" }, { "name": "NameMatcher", "type": "ner", "modelUrl": model_url, "sentenceDetectorUrl": sent_url }] } mask_context = { "name": mask_context_name, "rules": [{ "name": "HashRule", "type": "cosort", "expression": "hash_sha2($\{INPUT\})" }, { "name": "FpeRule", "type": "cosort", "expression": "enc_fp_aes256_alphanum($\{INPUT\})" }], "ruleMatchers": [{ "name": "FpeRuleMatcher", "type": "name", "rule": "FpeRule", "pattern": "NameMatcher|PhoneMatcher" }, { "name": "HashRuleMatcher", "type": "name", "rule": "HashRule", "pattern": "EmailMatcher" }] } file_search_context = { "name": file_search_context_name, "matchers": [{ "name": search_context_name, "type": "searchContext" }, { "name": "NameMatcher", "type": "jsonPath", "jsonPath": "$..name" }, { "name": "NameMatcher", "type": "xmlPath", "xmlPath": "//name" }] } file_mask_context = { "name": file_mask_context_name, "rules": [{ "name": mask_context_name, "type": "maskContext" }] } utils.create_context("searchContext", search_context, session) utils.create_context("maskContext", mask_context, session) utils.create_context("files/fileSearchContext", file_search_context, session) utils.create_context("files/fileMaskContext", file_mask_context, session)
def setup(session): search_context = { "name": search_context_name, "matchers": [ { "name": "SsnMatcher", "type": "pattern", "pattern": r"\b(\d{3}[-]?\d{2}[-]?\d{4})\b" }, { "name": "NameMatcher", "type": "set", "url": pathlib.Path('names.set').absolute().as_uri() }, ] } mask_context = { "name": mask_context_name, "rules": [{ "name": "FpeRule", "type": "cosort", "expression": r"enc_fp_aes256_alphanum(${NAME})" }, { "name": "RedactSsnRule", "type": "cosort", "expression": r"replace_chars(${SSN},'*',1,3,'*',5,2)" }], "ruleMatchers": [{ "name": "FpeRuleMatcher", "type": "name", "rule": "FpeRule", "pattern": "NameMatcher" }, { "name": "SsnRuleMatcher", "type": "name", "rule": "RedactSsnRule", "pattern": "SsnMatcher" }] } file_search_context = { "name": file_search_context_name, "matchers": [{ "name": search_context_name, "type": "searchContext" }] } file_mask_context = { "name": file_mask_context_name, "rules": [{ "name": mask_context_name, "type": "maskContext" }] } utils.create_context("searchContext", search_context, session) utils.create_context("maskContext", mask_context, session) utils.create_context("files/fileSearchContext", file_search_context, session) utils.create_context("files/fileMaskContext", file_mask_context, session)
def __init__(self, protocol, cipher_suite): self.context = utils.create_context(protocol, cipher_suite) self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
def loading(): dots = ['.' for i in range(0, 5)] for dot in dots: sleep(0.2) print(dot) loading() final_path = None if type and type == 'p': dir_path = create_dir(c_name, 'pages') final_path = create_components('%s.page' % c_name, dir_path, is_native) elif type and type == 'c': dir_path = create_dir(c_name, 'components') final_path = create_components(c_name, dir_path, is_native) elif type and type == 'ctx': dir_path = create_dir('contexts') final_path = create_context(c_name, dir_path) else: dir_path = create_dir(c_name) final_path = create_components(c_name, dir_path) if final_path: print('%s was successful created!' % final_path) else: print( 'Sorry :(, a unhandled error occurred while creating your files.' )
def setup(session): model_url = utils.download_model('en-ner-person.bin',session) sent_url = utils.download_model('en-sent.bin',session) token_url = utils.download_model('en-token.bin',session) search_context = { "name": search_context_name, "matchers": [ { "name": "EmailMatcher", "type": "pattern", "pattern": r"\b[\w._%+-]+@[\w.-]+\.[A-Za-z]{2,4}\b" }, ] } search_context_ner = { "name": search_context_ner_name, "matchers": [ { "name": "NameMatcher", "type": "ner", "modelUrl": model_url, "sentenceDetectorUrl": sent_url, "tokenizerUrl": token_url } ] } mask_context = { "name": mask_context_name, "rules": [ { "name": "HashEmailRule", "type": "cosort", "expression": r"hash_sha2(${EMAIL})" }, { "name": "FpeNameRule", "type": "cosort", "expression": r"enc_fp_aes256_alphanum(${NAME},'passphrase')" } ], "ruleMatchers": [ { "name": "HashRuleMatcher", "type": "name", "rule": "HashEmailRule", "pattern": "EmailMatcher" }, { "name": "NameRuleMatcher", "type": "name", "rule": "FpeNameRule", "pattern": "NameMatcher" } ] } file_search_context = { "name": file_search_context_name, "matchers": [ { "name": search_context_name, "type": "searchContext" }, { "name": search_context_ner_name, "type": "searchContext", "contentFilters": { "columns": [ { "ignoreHeader": True, "pattern": "comment" } ] } }, { "name": "NameMatcher", "type": "column", "ignoreHeader": True, "pattern": ".*name" } ] } file_mask_context = { "name": file_mask_context_name, "rules": [ { "name": mask_context_name, "type": "maskContext" } ] } utils.create_context("searchContext", search_context,session) utils.create_context("searchContext", search_context_ner,session) utils.create_context("maskContext", mask_context,session) utils.create_context("files/fileSearchContext", file_search_context,session) utils.create_context("files/fileMaskContext", file_mask_context,session)
} # In Memory # context = StreamingContext(spark.sparkContext, batch_duration) # lines = [[1, 2, 3], [4, 5, 6]] # stream = context.queueStream(lines) # Google storage # context = StreamingContext.getOrCreate(checkpointDirectory, create_context) # stream = context.textFileStream("gs://test1-sha456-logs-sink/data/") # spark._jsc.hadoopConfiguration().set('fs.gs.impl', 'com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem') # spark._jsc.hadoopConfiguration().set('fs.AbstractFileSystem.gs.impl', # 'com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS') # spark._jsc.hadoopConfiguration().set('fs.gs.project.id', project_id) # spark._jsc.hadoopConfiguration().set('google.cloud.auth.service.account.enable', 'true') # spark._jsc.hadoopConfiguration().set('google.cloud.auth.service.account.json.keyfile', cred_location) # Pubsub context = StreamingContext.getOrCreate( checkpoint_directory, lambda: create_context(spark, checkpoint_directory, batch_duration)) stream = pubsub.PubsubUtils.createStream(context, subscription_name, batch_size, True) stream.flatMap(parse_request) \ .updateStateByKey(lambda new_values, state: update_state(new_values, state, job_conf)) \ .map(lambda state: publish_state_metric(state, pushgateway_url, myelin_ns, port, input_drift_probability_metric)) \ .foreachRDD(lambda rdd: write_state_to_bq(rdd, state_table)) context.start() context.awaitTermination()
def setup(session): search_context = { "name": search_context_name, "matchers": [ { "name": "SsnMatcher", "type": "pattern", "pattern": r"\b(\d{3}[-]?\d{2}[-]?\d{4})\b" }, { "name": "EmailMatcher", "type": "pattern", "pattern": r"\b[\w._%+-]+@[\w.-]+\.[A-Za-z]{2,4}\b" }, ] } mask_context = { "name": mask_context_name, "rules": [ { "name": "HashEmailRule", "type": "cosort", "expression": r"hash_sha2(${EMAIL})" }, { "name": "RedactSsnRule", "type": "cosort", "expression": r"replace_chars(${SSN},'*',1,3,'*',5,2)" } ], "ruleMatchers": [ { "name": "EmailRuleMatcher", "type": "name", "rule": "HashEmailRule", "pattern": "EmailMatcher" }, { "name": "SsnRuleMatcher", "type": "name", "rule": "RedactSsnRule", "pattern": "SsnMatcher" } ] } file_search_context = { "name": file_search_context_name, "matchers": [ { "name": search_context_name, "type": "searchContext" } ] } file_mask_context = { "name": file_mask_context_name, "rules": [ { "name": mask_context_name, "type": "maskContext" } ] } utils.create_context("searchContext", search_context,session) utils.create_context("maskContext", mask_context,session) utils.create_context("files/fileSearchContext", file_search_context,session) utils.create_context("files/fileMaskContext", file_mask_context,session)
def setup(session): model_url = utils.download_model('en-ner-person.bin',session) sent_url = utils.download_model('en-sent.bin',session) token_url = utils.download_model('en-token.bin',session) search_context = { "name": search_context_name, "matchers": [ { "name": "SsnMatcher", "type": "pattern", "pattern": r"\b(\d{3}[-]?\d{2}[-]?\d{4})\b" }, { "name": "EmailMatcher", "type": "pattern", "pattern": r"\b[\w._%+-]+@[\w.-]+\.[A-Za-z]{2,4}\b" }, { "name": "PhoneMatcher", "type": "pattern", "pattern": r"\b(\+?1?([ .-]?)?)?(\(?([2-9]\d{2})\)?([ .-]?)?)([2-9]\d{2})([ .-]?)(\d{4})(?: #?[eE][xX][tT]\.? \d{2,6})?\b" }, { "name": "NameMatcher", "type": "ner", "modelUrl": model_url, "sentenceDetectorUrl": sent_url, "tokenizerUrl": token_url } ] } mask_context = { "name": mask_context_name, "rules": [ { "name": "HashEmailRule", "type": "cosort", "expression": r"hash_sha2(${EMAIL})" }, { "name": "FpeRule", "type": "cosort", "expression": r"enc_fp_aes256_alphanum(${NAME})" }, { "name": "RedactSsnRule", "type": "cosort", "expression": r"replace_chars(${SSN},'*',1,3,'*',5,2)" } ], "ruleMatchers": [ { "name": "FpeRuleMatcher", "type": "name", "rule": "FpeRule", "pattern": "NameMatcher|PhoneMatcher" }, { "name": "EmailRuleMatcher", "type": "name", "rule": "HashEmailRule", "pattern": "EmailMatcher" }, { "name": "SsnRuleMatcher", "type": "name", "rule": "RedactSsnRule", "pattern": "SsnMatcher" } ] } file_search_context = { "name": file_search_context_name, "matchers": [ { "name": search_context_name, "type": "searchContext" } ] } file_mask_context = { "name": file_mask_context_name, "rules": [ { "name": mask_context_name, "type": "maskContext" } ] } utils.create_context("searchContext", search_context,session) utils.create_context("maskContext", mask_context,session) utils.create_context("files/fileSearchContext", file_search_context,session) utils.create_context("files/fileMaskContext", file_mask_context,session)