def create_consumer_acl(self, username, topic): user = '******'.format(username=username) cmd = '--authorizer-properties zookeeper.connect=localhost:2181 --add --allow-principal {user} --consumer --group=* --topic {topic}'.format( user=user, topic=topic) _, stdout = self._exec_cmd(cmd) LOG.info('[create_consumer_acl] {stdout}'.format(stdout=stdout)) return stdout
def create_user(self, username, password): pwd = "'SCRAM-SHA-256=[password={password}],SCRAM-SHA-512=[password={password}]'".format( password=password) cmd = "--zookeeper localhost:2181 --alter --add-config {pwd} --entity-type users --entity-name {username}".format( pwd=pwd, username=username) _, stdout = self._exec_cmd(cmd) LOG.info('[create_user] {stdout}'.format(stdout=stdout)) return stdout
def create_topic(self, topic_name, replication_factor, partitions): cmd = '--create --zookeeper localhost:2181 --replication-factor {replication_factor} --partitions {partitions} --topic {topic_name}'.format( topic_name=topic_name, replication_factor=replication_factor, partitions=partitions) _, stdout = self._exec_cmd(cmd) LOG.info('[create_topic] {stdout}'.format(stdout=stdout)) return stdout
def get_verse_alignment_mp(self, verse_nums, edition_pairs): res = [] ps_lang, pt_lang, index_t, alignments = None, None, None, None # if we have multiple edition pairs of the same languages, we use prev loaded files! for edition_1, edition_2 in edition_pairs: aligns = {} if self.get_lang_from_edition( edition_1) == self.get_lang_from_edition(edition_2): res.append((edition_1, edition_2, aligns)) continue if edition_1 in self.bert_files and edition_2 in self.bert_files: LOG.info("going to super aglingment for: {}, {}".format( edition_1, edition_2)) res.append((edition_1, edition_2, super().get_verse_alignment( verse_nums, self.lang_prf_map[edition_1], self.lang_prf_map[edition_2]))) continue LOG.info("getting eflomal aglingment for: {} , {}".format( edition_1, edition_2)) s_lang, t_lang, s_edition, t_edition = self.get_ordered_editions( edition_1, edition_2) s_lang_file = self.edition_file_mapping[s_edition] t_lang_file = self.edition_file_mapping[t_edition] revert = False if s_edition == edition_2: revert = True if s_lang != ps_lang or t_lang != pt_lang: alignments = self.get_alignment(s_lang, t_lang) index_t = self.get_index(s_lang, t_lang) ps_lang, pt_lang = s_lang, t_lang index = None if s_lang_file in index_t: if t_lang_file in index_t[s_lang_file]: index = index_t[s_lang_file][t_lang_file] if index is not None: LOG.info( "getting verse, {}, {}, {}, {}, {}, {}, {}, {}".format( edition_1, edition_2, s_lang, t_lang, ps_lang, pt_lang, len(index_t), len(index))) for verse in verse_nums: if verse in index: aligns[verse] = self.create_ordered_alignment( alignments, index[verse], revert) LOG.info("verses got") else: LOG.warning("couldn't find index for: " + s_edition + ", " + t_edition) res.append((edition_1, edition_2, aligns)) return res
def read_langs_order_file(self): res = [] try: with open(self.lang_order_file_path, 'r') as inf: for l in inf: res.append(l.strip()) except FileNotFoundError as e: LOG.warning("Langs order file not found") return res
def post(self): args = parser.parse_args() LOG.info( '[request] username={username}'.format(username=args['username'])) LOG.info( '[request] password={password}'.format(password=args['password'])) response = kafka_configs.create_user(args['username'], args['password']) return response
def create_index_binary_file_if_not_exists(self, lang1, lang2): index_lock.acquire() if not os.path.exists(self.get_index_binary_file_path(lang1, lang2)): LOG.info("creating binary index file for {}, {}".format( lang1, lang2)) ind = self.read_index_file(self.get_index_file_path(lang1, lang2)) with (open(self.get_index_binary_file_path(lang1, lang2), 'wb')) as of: pickle.dump(ind, of) index_lock.release()
def create_align_binary_file_if_not_exists(self, lang1, lang2): alignments_lock.acquire() if not os.path.exists(self.get_align_binary_file_path(lang1, lang2)): LOG.info("creating binary alignments file for {}, {}".format( lang1, lang2)) aln = self.read_alignment_file( self.get_align_file_path(lang1, lang2)) with (open(self.get_align_binary_file_path(lang1, lang2), 'wb')) as of: pickle.dump(aln, of) alignments_lock.release()
def read_alignment_file(self, file_path): LOG.info("reading alignment file ({})".format(file_path)) res = [] with open(file_path, 'r') as f: for line in f: s_l = line.split( '\t') # handle index at the begining of the line if len(s_l) > 1: res.append(s_l[1]) else: res.append(s_l[0]) return res
def read_index_file(self, file_path): LOG.info("reading index file ({})".format(file_path)) res = {} with open(file_path, 'r') as f: for i, line in enumerate(f): verse, s_file, t_file = tuple(line.strip().split('\t')) self.setup_dict_entry(res, s_file, {}) self.setup_dict_entry(res[s_file], t_file, {}) res[s_file][t_file][verse] = i return res
def add_to_index(self, index, key, val, to_send): loc = self.get_hash(key) % self.index_size next_count = 0 if index[loc] == None: index[loc] = {"key": key, "val": val, "next": None} else: last = index[loc] if to_send != None: LOG.info("verse {}, hash {}, loc {}, index {}".format( key, self.get_hash(key), loc, to_send)) while last["next"] != None: next_count += 1 last = last["next"] last["next"] = {"key": key, "val": val, "next": None} return next_count
def read_dict_file(self, file_path, do_lower=False): res = {} try: with open(file_path, "r") as mapping_list: for l in mapping_list: if l.startswith('#'): continue if do_lower: l.lower() pair = l.strip().split('\t') res[pair[0].strip()] = pair[1].strip() except FileNotFoundError: LOG.warning(f"file {file_path} not found") return res
def _exec_cmd(self, cmd): cmd = '/root/kafka/bin/kafka-topics.sh {cmd}'.format(cmd=cmd) LOG.info('[Command] {cmd}'.format(cmd=cmd)) p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) stdout, stderr = p.communicate() if p.returncode != 0: LOG.error('[kafka-topics] failed') LOG.error('[kafka-topics] {stdout}'.format(stdout=stdout)) LOG.error('[kafka-topics] {stderr}'.format(stderr=stderr)) return (p.returncode, stdout)
def get_verse_alignment(self, verse_nums, edition_1, edition_2, alignments_loc=None, index_loc=None): aligns = {} if edition_1 == edition_2: return aligns if edition_1 in self.bert_files and edition_2 in self.bert_files: LOG.info("going to super aglingment for: {}, {}".format( edition_1, edition_2)) return super().get_verse_alignment(verse_nums, self.lang_prf_map[edition_1], self.lang_prf_map[edition_2]) LOG.info("getting eflomal aglingment for: {} , {}".format( edition_1, edition_2)) s_lang, t_lang, s_edition, t_edition = self.get_ordered_editions( edition_1, edition_2) s_lang_file = self.edition_file_mapping[s_edition] t_lang_file = self.edition_file_mapping[t_edition] revert = False if s_edition == edition_2: revert = True LOG.info("copying") alignments = self.content_cache.get( self.get_align_file_path(s_lang, t_lang)) index = self.indexes_cache.get(self.get_index_file_path( s_lang, t_lang)) if s_lang_file in index: if t_lang_file in index[s_lang_file]: index = index[s_lang_file][t_lang_file] LOG.info("getting verses") for verse in verse_nums: if verse in index: aligns[verse] = self.create_ordered_alignment( alignments, index[verse], revert) LOG.info("verses got") return aligns
def post(self): args = parser.parse_args() LOG.info('[request] topic={topic}'.format(topic=args['topic'])) LOG.info('[request] replication={replication}'.format( replication=args['replication'])) LOG.info('[request] partitions={partitions}'.format( partitions=args['partitions'])) response = kafka_topics.create_topic(args['topic'], args['replication'], args['partitions']) return response
def post(self): args = parser.parse_args() LOG.info( '[request] username={username}'.format(username=args['username'])) LOG.info('[request] topic={topic}'.format(topic=args['topic'])) LOG.info('[request] role={role}'.format(role=args['role'])) if args['role'] == 'producer': response = kafka_acls.create_producer_acl(args['username'], args['topic']) elif args['role'] == 'consumer': response = kafka_acls.create_consumer_acl(args['username'], args['topic']) else: response = 'invalid role' return response
def search_documents(self, q, verse=None, all_docs=False, doc_count=10, prefixed_search=True, language=None): """ since elasticsearch doesn't support more that 10000 hits per run we currently stick to at most 10000 retrieved docs, we can later implement retrieval of all matched docs """ if q.strip() != "": query = { "query": { "bool": { "must": { "multi_match": { "fields": ["content", "language"], "query": q, "type": "cross_fields" #, # "use_dis_max": False # , "analyzer":"autocomplete" } } } } } if verse != None: query["query"]["bool"]["filter"] = { "match": { "verse_id": verse } } if language != None: query["query"]["bool"]["filter"] = { "match": { "language": language } } else: query = { "query": { "bool": { "must": { "match": { "verse_id": verse } } } } } query[ "size"] = 10000 if all_docs == True or doc_count > 10000 else doc_count LOG.info(query) resp = requests.get( self.ealstic_search_autocomplete_url if prefixed_search else self.ealstic_search_normal_url, data=json.dumps(query), headers={'Content-Type': 'application/json'}) return resp.json()
def delete_topic(self, topic): cmd = '--delete --zookeeper localhost:2181 --topic {topic}'.format( topic=topic) _, stdout = self._exec_cmd(cmd) LOG.info('[delete_topic] {stdout}'.format(stdout=stdout)) return stdout
def list_topic(self): cmd = '--list --zookeeper localhost:2181' _, stdout = self._exec_cmd(cmd) LOG.info('[list_topic] {stdout}'.format(stdout=stdout)) return stdout
def get_user(self, username): cmd = '--zookeeper localhost:2181 --describe --entity-type users --entity-name {username}'.format( username=username) _, stdout = self._exec_cmd(cmd) LOG.info('[get_user] {stdout}'.format(stdout=stdout)) return stdout
def list_user(self): cmd = '--zookeeper localhost:2181 --describe --entity-type users' _, stdout = self._exec_cmd(cmd) LOG.info('[list_user] {stdout}'.format(stdout=stdout)) return stdout