Пример #1
0
 def get(self, source_id, source_lang, target_lang):
     src_index = TMUtils.lang2es_index(source_lang)
     tgt_index = TMUtils.lang2es_index(target_lang)
     m_index = TMUtils.es_index2mapdb(src_index, tgt_index)
     m_results = self.mongo_db[m_index].find({'source_id': source_id})
     if not m_results or not m_results.count():
         return None
     return m_results[0]['target_id']
Пример #2
0
 def add_segment(self, segment):
     # Add MongoDB document
     m_index = TMUtils.es_index2mapdb(
         TMUtils.lang2es_index(segment.source_lang),
         TMUtils.lang2es_index(segment.target_lang))
     # TODO: do not update if creation date is older than existing one
     m_result = self.mongo_db[m_index].update_one(
         {'source_id': segment.source_id},
         {'$set': self._segment2doc(segment)},
         upsert=True)  # insert if doesn't exist
     return m_result
Пример #3
0
    def add_segments(self, segments):
        if not segments:
            return
        m_index = TMUtils.es_index2mapdb(
            TMUtils.lang2es_index(segments[0].source_lang),
            TMUtils.lang2es_index(segments[0].target_lang))
        try:
            db = self.server[m_index]
        except:
            db = self.server.create(m_index)

        return db.update([self._segment2doc(s) for s in segments])
Пример #4
0
 def get(self, source_id, source_lang, target_lang):
     tname = TMUtils.es_index2mapdb(TMUtils.lang2es_index(source_lang),
                                    TMUtils.lang2es_index(target_lang))
     if not tname in self.tables:
         raise Exception("Language pair : {} - {} doesn't exist".format(
             source_lang, target_lang))
     # TODO: implement bidirectional query
     t = self.tables[tname]
     res = self.conn.execute(
         t.select(t.target_id).where(t.source_id == source_id))
     if res:
         return res.fetchone()[0]
     return None
Пример #5
0
  def get(self, lang, id):
    index = TMUtils.lang2es_index(lang)
    if not self.index_exists(index): return

    hit = self.es.get(index=index, id=id)
    if not hit: return None
    return hit['_source']
Пример #6
0
  def scan(self, lang, filter = None):
    index = TMUtils.lang2es_index(lang)
    if not self.index_exists(index): return

    query = TMDbQuery(es=self.es, index = index, filter=filter)
    for hit in query.scan():
      # Build segment by querying map and target index
      yield hit
Пример #7
0
    def add_segments(self, segments):
        bulk = None
        for segment in segments:
            if not bulk:
                m_index = TMUtils.es_index2mapdb(
                    TMUtils.lang2es_index(segment.source_lang),
                    TMUtils.lang2es_index(segment.target_lang))
                bulk = self.mongo_db[m_index].initialize_unordered_bulk_op()
            bulk.find({'source_id': segment.source_id}) \
              .update_one({'$set': self._segment2doc(segment) })

        try:
            result = bulk.execute()
        except BulkWriteError as bwe:
            result = bwe.details
            logging.error(bwe.details)
        return result
Пример #8
0
 def mget(self, ids_lang):
   if not ids_lang: return []
   body = [{
       '_index': TMUtils.lang2es_index(lang),
       '_id' : id
     } for lang,id in ids_lang]
   hits = self.es.mget(body={'docs' : body})
   if not hits: return None
   return [hit.get('_source',None) for hit in hits['docs']]
Пример #9
0
  def scan_pivot(self, pivot_lang, langs):
    index = TMUtils.lang2es_index(pivot_lang)
    if not self.index_exists(index): return

    search = Search(using=self.es, index=index)
    for lang in langs:
      search = search.query('match', target_language=lang)
    for result in search.scan():
      yield result.meta.id
Пример #10
0
    def _get_index(self, source_lang, target_lang, create_missing=False):
        m_index = TMUtils.es_index2mapdb(TMUtils.lang2es_index(source_lang),
                                         TMUtils.lang2es_index(target_lang))

        if self.es.indices.exists(index=m_index): return m_index, False
        # Try reverse index
        r_index = TMUtils.es_index2mapdb(TMUtils.lang2es_index(target_lang),
                                         TMUtils.lang2es_index(source_lang))
        # Found reverse index - use it
        if self.es.indices.exists(r_index): return r_index, True
        if not create_missing: return None, None
        # Neither direct, nor reverse index exist - create a direct one
        try:
            self.es.indices.create(m_index)
        except:
            pass
        self.refresh_lang_graph()
        return m_index, False
Пример #11
0
 def add_segment(self, segment, ftype):
   # Add segment source and target texts to the correspondent index of ElasticSearch
   id = getattr(segment, ftype + '_id')
   index = TMUtils.lang2es_index(getattr(segment, ftype + '_language'))
   s_result = self.es.index(index=index,
                            doc_type=self.DOC_TYPE,
                            id=id,
                            body = self._segment2doc(segment, ftype))
   return id
Пример #12
0
 def _segment2table(self, segment, suffix=None):
     tname = TMUtils.es_index2mapdb(
         TMUtils.lang2es_index(segment.source_lang),
         TMUtils.lang2es_index(segment.target_lang))
     if suffix: tname += suffix
     if not tname in self.tables:
         md = MetaData()
         self.tables[tname] = Table(tname,
                                    md,
                                    Column('id', Integer, primary_key=True),
                                    Column('source_id', GUID, index=True),
                                    Column('target_id', GUID, index=True),
                                    Column('creation_date', TIMESTAMP),
                                    Column('change_date', TIMESTAMP),
                                    mysql_engine='InnoDB',
                                    mysql_charset='utf8')
         md.bind = self.conn
         self.tables[tname].create(checkfirst=True)
     return self.tables[tname]
Пример #13
0
  def mquery(self, lang, limit, q_list, filter=None):
    index = TMUtils.lang2es_index(lang)
    if not self.index_exists(index): return
    # Query source ES for the text
    query = TMDbQuery(es=self.es,
                          index=index,
                          q=q_list,
                          filter=filter,
			                    limit=limit)
    for response, q in query():
      yield response
Пример #14
0
 def query(self, lang, qstring, filter = None):
   index = TMUtils.lang2es_index(lang)
   if not self.index_exists(index): return
   # Query source ES for the text
   query = TMDbQuery(es=self.es,
                     index = index,
                     q=qstring,
                     filter=filter)
   for response,q in query():
     for hit in response:
       yield hit,q
Пример #15
0
  def delete(self, lang, ids):
    index = TMUtils.lang2es_index(lang)

    actions = [{'_op_type': 'delete',
                '_id': id,
                '_index' : index,
                '_type': self.DOC_TYPE,
                } for id in ids]
    # Bulk delete
    try:
      status = helpers.bulk(self.es, actions)
    except Exception as e:
      logging.warning(e)
      return str(e)
    return status
Пример #16
0
 def _segment2es_bulk(self, segments, ftype, op_type, f_action):
   # Add segment source and target texts to the correspondent index of ElasticSearch in a batch
   actions = []
   added_ids = set()
   for segment in segments:
     id = getattr(segment, ftype + '_id')
     if id in added_ids: continue # avoid duplicates in the same batch
     added_ids.add(id)
     index = TMUtils.lang2es_index(getattr(segment, ftype + '_language'))
     action = {'_id': id,
               '_index' : index,
               '_type' : self.DOC_TYPE,
               '_op_type': op_type,
               '_source' : f_action(segment, ftype) #self._segment2doc(segment, ftype)
               }
     actions.append(action)
   # Bulk insert
   logging.info("Bulk upsert: {}".format(actions))
   s_result = helpers.bulk(self.es, actions)
   self.refresh() # refresh list of indexes (could have been created during insert)
   return s_result
Пример #17
0
 def get(self, source_id, source_lang, target_lang):
     m_index = TMUtils.es_index2mapdb(TMUtils.lang2es_index(source_lang),
                                      TMUtils.lang2es_index(target_lang))
     doc = self.server[m_index].get(source_id.hex)
     if doc: return uuid.UUID(doc['target_id'])
     return None
Пример #18
0
 def add_segment(self, segment):
     m_index = TMUtils.es_index2mapdb(
         TMUtils.lang2es_index(segment.source_lang),
         TMUtils.lang2es_index(segment.target_lang))
     return self.server[m_index].update([self._segment2doc(segment)])