def build(self): """ $ python -m sagas.ofbiz.resource_mappings build :return: """ from sagas.ofbiz.forms import get_form_list, collect_forms from protobuf_utils import write_proto_to, read_proto from forms_pb2 import MetaForm, MetaMappingPackage, MetaFieldMapping, MetaFieldMappings, SUBMIT, RESET form_list = get_form_list() form_index = collect_forms(form_list) print("total forms:", len(form_index.items())) forms = [] for k, locs in form_index.items(): for loc in locs: # loc.name, loc.location, loc.uri form_loc = loc.uri + ';' + k + ';zh_CN' forms.append(form_loc) data_file = './data/resources/form_res.data' rm = ResourceMappings() meta_package = rm.build_package(forms) write_proto_to(meta_package, data_file) print('done.')
def write_samples(self, only_samples=True, out_file=f'{cf.conf_dir}/data/langs/samples_100.data'): rs = [] if only_samples: for i in range(2000, 2100): self.parse(self.pairs[i], rs) else: for p in self.pairs: self.parse(p, rs) langs = res.RsLangs(langs=rs) protobuf_utils.write_proto_to(langs, out_file)
def procs(self, out_file): """ $ python -m sagas.graph.dgraph_spacy procs '~/pi/data/langs/jpn_eng_spacy.data' :param out_file: :return: """ import numpy as np englist = [] for lang in self.pairs: englist.append(lang[0]) x = np.array(englist) lang_rs = np.unique(x) verb_maps = {} rs = [] # for pair in tqdm(self.pairs): for lang in tqdm(lang_rs): # doc = self.nlp(pair[0]) doc = self.nlp(str(lang)) # Finding a verb with a subject from below — good # verbs = set() verbs = [] lemmas = [] for possible_subject in doc: if possible_subject.dep == nsubj and possible_subject.head.pos == VERB: verbs.append(possible_subject.head.text) lemmas.append(possible_subject.head.lemma_) if len(verbs) > 0: verb_maps[lang] = verbs # self.step() data = doc.to_bytes() lang = res.RsLang(entries=[lang], store=data, verbs=verbs, verbLemmas=lemmas) rs.append(lang) print(len(verb_maps)) # randomly print some data print(self.pairs[2000], verb_maps[self.pairs[2000][0]]) print(self.pairs[3000], verb_maps[self.pairs[3000][0]]) # write to file print('.. write to file') # self.write_samples(False, './data/langs/jpn_eng_spacy.data') langs = res.RsLangs(langs=rs) protobuf_utils.write_proto_to(langs, out_file) print('done.')
def build(self): """ $ python -m sagas.ofbiz.entity_meta_indexer build :return: """ ents = all_entities() idx = build_field_index(ents) print('total fields:', len(idx)) idx_b = {} for k, v in idx.items(): idx_b[k] = res.RsEntityReference(entities=v) rs = res.RsEntities(fieldRefs=idx_b) protobuf_utils.write_proto_to(rs, 'data/resources/entities_index.data') print('total field-refs:', len(rs.fieldRefs))
def write_dataset(self, target_file, name_fld, desc_fld): """ write_dataset('data/resources/rs_product_type.data') :param target_file: :return: """ import protobuf_utils as pu from values_pb2 import ExternalLinks, ExternalLink result_st = {} rs = self.fill_records(name_fld, desc_fld) links = [] for rec in rs: link = ExternalLink(gid=rec[0], name=rec[1], description=rec[2]) links.append(link) links_rs = ExternalLinks(links=links) result_st['links_count'] = len(links) pu.write_proto_to(links_rs, target_file) return result_st, rs[:5]
def build_form_services_index(self): """ $ python -m sagas.ofbiz.form_services build_form_services_index :return: """ # form_locs=['component://webtools/widget/ServiceForms.xml;AddJobManagerLock;zh_CN'] form_locs = get_form_locs() form_services, form_reqs = extract_services(form_locs) print('form_services', len(form_services)) print('form_reqs', len(form_reqs)) print(form_reqs[:5]) print(form_services[:5]) folist = [] frlist = [] for item in form_services: folist.append(fo.MetaTuple(values=item)) for item in form_reqs: frlist.append(fo.MetaTuple(values=item)) fs = fo.MetaFormServices(formServices=folist, formRequests=frlist) pu.write_proto_to(fs, 'data/resources/form_services.data') print('done')