Пример #1
0
 def create_consumer_acl(self, username, topic):
     user = '******'.format(username=username)
     cmd = '--authorizer-properties zookeeper.connect=localhost:2181 --add --allow-principal {user} --consumer --group=* --topic {topic}'.format(
         user=user, topic=topic)
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[create_consumer_acl] {stdout}'.format(stdout=stdout))
     return stdout
Пример #2
0
 def create_user(self, username, password):
     pwd = "'SCRAM-SHA-256=[password={password}],SCRAM-SHA-512=[password={password}]'".format(
         password=password)
     cmd = "--zookeeper localhost:2181 --alter --add-config {pwd} --entity-type users --entity-name {username}".format(
         pwd=pwd, username=username)
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[create_user] {stdout}'.format(stdout=stdout))
     return stdout
Пример #3
0
 def create_topic(self, topic_name, replication_factor, partitions):
     cmd = '--create --zookeeper localhost:2181 --replication-factor {replication_factor} --partitions {partitions} --topic {topic_name}'.format(
         topic_name=topic_name,
         replication_factor=replication_factor,
         partitions=partitions)
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[create_topic] {stdout}'.format(stdout=stdout))
     return stdout
Пример #4
0
    def get_verse_alignment_mp(self, verse_nums, edition_pairs):
        res = []
        ps_lang, pt_lang, index_t, alignments = None, None, None, None  # if we have multiple edition pairs of the same languages, we use prev loaded files!
        for edition_1, edition_2 in edition_pairs:
            aligns = {}

            if self.get_lang_from_edition(
                    edition_1) == self.get_lang_from_edition(edition_2):
                res.append((edition_1, edition_2, aligns))
                continue

            if edition_1 in self.bert_files and edition_2 in self.bert_files:
                LOG.info("going to super aglingment for: {}, {}".format(
                    edition_1, edition_2))
                res.append((edition_1, edition_2, super().get_verse_alignment(
                    verse_nums, self.lang_prf_map[edition_1],
                    self.lang_prf_map[edition_2])))
                continue

            LOG.info("getting eflomal aglingment for: {} , {}".format(
                edition_1, edition_2))
            s_lang, t_lang, s_edition, t_edition = self.get_ordered_editions(
                edition_1, edition_2)
            s_lang_file = self.edition_file_mapping[s_edition]
            t_lang_file = self.edition_file_mapping[t_edition]

            revert = False
            if s_edition == edition_2:
                revert = True

            if s_lang != ps_lang or t_lang != pt_lang:
                alignments = self.get_alignment(s_lang, t_lang)
                index_t = self.get_index(s_lang, t_lang)
                ps_lang, pt_lang = s_lang, t_lang

            index = None
            if s_lang_file in index_t:
                if t_lang_file in index_t[s_lang_file]:
                    index = index_t[s_lang_file][t_lang_file]

            if index is not None:

                LOG.info(
                    "getting verse, {}, {}, {}, {}, {}, {}, {}, {}".format(
                        edition_1, edition_2, s_lang, t_lang, ps_lang, pt_lang,
                        len(index_t), len(index)))
                for verse in verse_nums:
                    if verse in index:
                        aligns[verse] = self.create_ordered_alignment(
                            alignments, index[verse], revert)
                LOG.info("verses got")

            else:
                LOG.warning("couldn't find index for: " + s_edition + ", " +
                            t_edition)

            res.append((edition_1, edition_2, aligns))
        return res
Пример #5
0
 def read_langs_order_file(self):
     res = []
     try:
         with open(self.lang_order_file_path, 'r') as inf:
             for l in inf:
                 res.append(l.strip())
     except FileNotFoundError as e:
         LOG.warning("Langs order file not found")
     return res
Пример #6
0
 def post(self):
     args = parser.parse_args()
     LOG.info(
         '[request] username={username}'.format(username=args['username']))
     LOG.info(
         '[request] password={password}'.format(password=args['password']))
     response = kafka_configs.create_user(args['username'],
                                          args['password'])
     return response
Пример #7
0
 def create_index_binary_file_if_not_exists(self, lang1, lang2):
     index_lock.acquire()
     if not os.path.exists(self.get_index_binary_file_path(lang1, lang2)):
         LOG.info("creating binary index file for {}, {}".format(
             lang1, lang2))
         ind = self.read_index_file(self.get_index_file_path(lang1, lang2))
         with (open(self.get_index_binary_file_path(lang1, lang2),
                    'wb')) as of:
             pickle.dump(ind, of)
     index_lock.release()
Пример #8
0
 def create_align_binary_file_if_not_exists(self, lang1, lang2):
     alignments_lock.acquire()
     if not os.path.exists(self.get_align_binary_file_path(lang1, lang2)):
         LOG.info("creating binary alignments file for {}, {}".format(
             lang1, lang2))
         aln = self.read_alignment_file(
             self.get_align_file_path(lang1, lang2))
         with (open(self.get_align_binary_file_path(lang1, lang2),
                    'wb')) as of:
             pickle.dump(aln, of)
     alignments_lock.release()
Пример #9
0
 def read_alignment_file(self, file_path):
     LOG.info("reading alignment file ({})".format(file_path))
     res = []
     with open(file_path, 'r') as f:
         for line in f:
             s_l = line.split(
                 '\t')  # handle index at the begining of the line
             if len(s_l) > 1:
                 res.append(s_l[1])
             else:
                 res.append(s_l[0])
     return res
Пример #10
0
    def read_index_file(self, file_path):
        LOG.info("reading index file ({})".format(file_path))
        res = {}
        with open(file_path, 'r') as f:
            for i, line in enumerate(f):
                verse, s_file, t_file = tuple(line.strip().split('\t'))
                self.setup_dict_entry(res, s_file, {})

                self.setup_dict_entry(res[s_file], t_file, {})
                res[s_file][t_file][verse] = i

        return res
Пример #11
0
 def add_to_index(self, index, key, val, to_send):
     loc = self.get_hash(key) % self.index_size
     next_count = 0
     if index[loc] == None:
         index[loc] = {"key": key, "val": val, "next": None}
     else:
         last = index[loc]
         if to_send != None:
             LOG.info("verse {}, hash {}, loc {}, index {}".format(
                 key, self.get_hash(key), loc, to_send))
         while last["next"] != None:
             next_count += 1
             last = last["next"]
         last["next"] = {"key": key, "val": val, "next": None}
     return next_count
Пример #12
0
    def read_dict_file(self, file_path, do_lower=False):
        res = {}
        try:
            with open(file_path, "r") as mapping_list:
                for l in mapping_list:
                    if l.startswith('#'):
                        continue

                    if do_lower:
                        l.lower()

                    pair = l.strip().split('\t')

                    res[pair[0].strip()] = pair[1].strip()
        except FileNotFoundError:
            LOG.warning(f"file {file_path} not found")
        return res
Пример #13
0
 def _exec_cmd(self, cmd):
     cmd = '/root/kafka/bin/kafka-topics.sh {cmd}'.format(cmd=cmd)
     LOG.info('[Command] {cmd}'.format(cmd=cmd))
     p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
     stdout, stderr = p.communicate()
     if p.returncode != 0:
         LOG.error('[kafka-topics] failed')
         LOG.error('[kafka-topics] {stdout}'.format(stdout=stdout))
         LOG.error('[kafka-topics] {stderr}'.format(stderr=stderr))
     return (p.returncode, stdout)
Пример #14
0
    def get_verse_alignment(self,
                            verse_nums,
                            edition_1,
                            edition_2,
                            alignments_loc=None,
                            index_loc=None):
        aligns = {}

        if edition_1 == edition_2:
            return aligns

        if edition_1 in self.bert_files and edition_2 in self.bert_files:
            LOG.info("going to super aglingment for: {}, {}".format(
                edition_1, edition_2))
            return super().get_verse_alignment(verse_nums,
                                               self.lang_prf_map[edition_1],
                                               self.lang_prf_map[edition_2])

        LOG.info("getting eflomal aglingment for: {} , {}".format(
            edition_1, edition_2))
        s_lang, t_lang, s_edition, t_edition = self.get_ordered_editions(
            edition_1, edition_2)
        s_lang_file = self.edition_file_mapping[s_edition]
        t_lang_file = self.edition_file_mapping[t_edition]
        revert = False
        if s_edition == edition_2:
            revert = True

        LOG.info("copying")
        alignments = self.content_cache.get(
            self.get_align_file_path(s_lang, t_lang))
        index = self.indexes_cache.get(self.get_index_file_path(
            s_lang, t_lang))

        if s_lang_file in index:
            if t_lang_file in index[s_lang_file]:
                index = index[s_lang_file][t_lang_file]

        LOG.info("getting verses")
        for verse in verse_nums:
            if verse in index:
                aligns[verse] = self.create_ordered_alignment(
                    alignments, index[verse], revert)
        LOG.info("verses got")
        return aligns
Пример #15
0
 def post(self):
     args = parser.parse_args()
     LOG.info('[request] topic={topic}'.format(topic=args['topic']))
     LOG.info('[request] replication={replication}'.format(
         replication=args['replication']))
     LOG.info('[request] partitions={partitions}'.format(
         partitions=args['partitions']))
     response = kafka_topics.create_topic(args['topic'],
                                          args['replication'],
                                          args['partitions'])
     return response
Пример #16
0
    def post(self):
        args = parser.parse_args()
        LOG.info(
            '[request] username={username}'.format(username=args['username']))
        LOG.info('[request] topic={topic}'.format(topic=args['topic']))
        LOG.info('[request] role={role}'.format(role=args['role']))

        if args['role'] == 'producer':
            response = kafka_acls.create_producer_acl(args['username'],
                                                      args['topic'])
        elif args['role'] == 'consumer':
            response = kafka_acls.create_consumer_acl(args['username'],
                                                      args['topic'])
        else:
            response = 'invalid role'

        return response
Пример #17
0
    def search_documents(self,
                         q,
                         verse=None,
                         all_docs=False,
                         doc_count=10,
                         prefixed_search=True,
                         language=None):
        """
        since elasticsearch doesn't support more that 10000 hits per run we currently 
        stick to at most 10000 retrieved docs,
        we can later implement retrieval of all matched docs
        """
        if q.strip() != "":
            query = {
                "query": {
                    "bool": {
                        "must": {
                            "multi_match": {
                                "fields": ["content", "language"],
                                "query": q,
                                "type": "cross_fields"  #,
                                # "use_dis_max": False
                                # , "analyzer":"autocomplete"
                            }
                        }
                    }
                }
            }

            if verse != None:
                query["query"]["bool"]["filter"] = {
                    "match": {
                        "verse_id": verse
                    }
                }
            if language != None:
                query["query"]["bool"]["filter"] = {
                    "match": {
                        "language": language
                    }
                }
        else:
            query = {
                "query": {
                    "bool": {
                        "must": {
                            "match": {
                                "verse_id": verse
                            }
                        }
                    }
                }
            }

        query[
            "size"] = 10000 if all_docs == True or doc_count > 10000 else doc_count

        LOG.info(query)
        resp = requests.get(
            self.ealstic_search_autocomplete_url
            if prefixed_search else self.ealstic_search_normal_url,
            data=json.dumps(query),
            headers={'Content-Type': 'application/json'})
        return resp.json()
Пример #18
0
 def delete_topic(self, topic):
     cmd = '--delete --zookeeper localhost:2181 --topic {topic}'.format(
         topic=topic)
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[delete_topic] {stdout}'.format(stdout=stdout))
     return stdout
Пример #19
0
 def list_topic(self):
     cmd = '--list --zookeeper localhost:2181'
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[list_topic] {stdout}'.format(stdout=stdout))
     return stdout
Пример #20
0
 def get_user(self, username):
     cmd = '--zookeeper localhost:2181 --describe --entity-type users --entity-name {username}'.format(
         username=username)
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[get_user] {stdout}'.format(stdout=stdout))
     return stdout
Пример #21
0
 def list_user(self):
     cmd = '--zookeeper localhost:2181 --describe --entity-type users'
     _, stdout = self._exec_cmd(cmd)
     LOG.info('[list_user] {stdout}'.format(stdout=stdout))
     return stdout