Пример #1
0
def retreiveEntityIds(queries, limitByDistribution):
    logger.info('Retreiving entity IDs...')
    global limitEntityQuery
    global labelMapping

    entityIds = []

    for query in queries:
        logger.debug('Query:')
        logger.debug(query)
        label = query['label']
        if label not in labelMapping:
            labelMapping.append(label)

        limit = None
        if limitByDistribution:
            limit = int(limitEntityQuery *
                        labelDistributionAdjusted[labelMapping.index(label)])
        else:
            limit = limitEntityQuery

        response = elastic_query.sendQuery(
            '/search?' + query['query'] + '&limit=' + str(limit), True)
        for entity in response['entities']:
            logger.debug('Received entity ' + str(entity['entityId']) +
                         ', label: ' + label + '.')
            entityIds.append([entity['entityId'], label])

    return [entityIds, labelMapping]
Пример #2
0
def retreiveEntityIds(queries, limitByDistribution):
   logger.info('Retreiving entity IDs...')
   global limitEntityQuery
   global labelMapping

   entityIds = []

   for query in queries:
      logger.debug('Query:')
      logger.debug(query)
      label = query['label']
      if label not in labelMapping:
         labelMapping.append(label)

      limit = None
      if limitByDistribution:
         limit = int(limitEntityQuery * labelDistributionAdjusted[labelMapping.index(label)])
      else:
         limit = limitEntityQuery

      response = elastic_query.sendQuery('/search?' + query['query'] + '&limit=' + str(limit), True)
      for entity in response['entities']:
         logger.debug('Received entity ' + str(entity['entityId']) + ', label: ' + label + '.')
         entityIds.append([entity['entityId'], label])

   return [entityIds,labelMapping]
Пример #3
0
def retreiveImageIds(entityIds):

   logger.info('Retreiving image IDs linked to entities...')
   imageIds = []
   global labelDistributionAdjusted

   for entity in entityIds:
      response = elastic_query.sendQuery('/entity/' + str(entity[0]), True)
      counter = 0
      for image in response['images']:
         if imagePerEntity != 0 and counter < imagePerEntity:
            logger.debug('Received image ' + str(image['imageId']) + ', label: ' + str(entity[1]) + '.')
            imageIds.append([image['imageId'], entity[1]])
            counter += 1
         else:
            break

   return imageIds
Пример #4
0
def retreiveImageIds(entityIds):

    logger.info('Retreiving image IDs linked to entities...')
    imageIds = []
    global labelDistributionAdjusted

    for entity in entityIds:
        response = elastic_query.sendQuery('/entity/' + str(entity[0]), True)
        counter = 0
        for image in response['images']:
            if imagePerEntity != 0 and counter < imagePerEntity:
                logger.debug('Received image ' + str(image['imageId']) +
                             ', label: ' + str(entity[1]) + '.')
                imageIds.append([image['imageId'], entity[1]])
                counter += 1
            else:
                break

    return imageIds
Пример #5
0
def streamFiles(exportFolder, dictionary, labelMapping):

    logger.info('Downloading image files...')

    nthAsTestImage = 5
    counter = 0
    lastPercent = -1

    invalidLogPath = exportFolder + '/invalid_image_ids.txt'
    trainingFolderPath = exportFolder + '/train/'
    testFolderPath = exportFolder + '/test/'

    for label in labelMapping:

        if not os.path.exists(
                os.path.dirname(trainingFolderPath + label + '/')):
            os.makedirs(os.path.dirname(trainingFolderPath + label + '/'))
            logger.debug('Created folder: ' + trainingFolderPath + label + '/')

        if not os.path.exists(os.path.dirname(testFolderPath + label + '/')):
            os.makedirs(os.path.dirname(testFolderPath + label + '/'))
            logger.debug('Created folder: ' + testFolderPath + label + '/')

    if harvestingTest == False:
        logger.info('Downloading images, every ' + str(nthAsTestImage) +
                    'th is beeing picked as a test image.')
    else:
        logger.info('Skipping image downloads. Just writing index info files.')

    trainingInfoPath = exportFolder + '/label_info_training.txt'
    testInfoPath = exportFolder + '/label_info_test.txt'

    for imageId, label in dictionary.items():

        if harvestingTest == False:
            imageData = elastic_query.sendQuery('/image/' + str(imageId),
                                                False)

            if imageData == None:
                with open(invalidLogPath, 'a') as log:
                    log.write(str(imageId) + '\n')
                continue

        imageFileName = str(imageId) + '.jpg'
        targetPath = ''
        infoPath = None

        if counter % nthAsTestImage == 0:
            targetPath = testFolderPath + label + '/' + imageFileName
            infoPath = testInfoPath
        else:
            targetPath = trainingFolderPath + label + '/' + imageFileName
            infoPath = trainingInfoPath

        labelInfoString = targetPath + ' ' + str(
            labelMapping.index(label)) + '\n'

        if harvestingTest == False:
            with open(targetPath, 'w+') as out:
                out.write(imageData)

        with open(infoPath, 'a') as info:
            info.write(labelInfoString)

        counter += 1
        percent = int((float(counter) / float(len(dictionary))) * 100)

        if percent - lastPercent > 0:
            lastPercent = percent

    logger.info('\nDone.')
Пример #6
0
def streamFiles(exportFolder, dictionary, labelMapping):

   logger.info('Downloading image files...')

   nthAsTestImage = 5
   counter = 0
   lastPercent = -1

   invalidLogPath = exportFolder + '/invalid_image_ids.txt'
   trainingFolderPath = exportFolder + '/train/'
   testFolderPath = exportFolder + '/test/'
   
   for label in labelMapping:

      if not os.path.exists(os.path.dirname(trainingFolderPath + label + '/')):
         os.makedirs(os.path.dirname(trainingFolderPath + label + '/'))
         logger.debug('Created folder: ' + trainingFolderPath + label + '/')

      if not os.path.exists(os.path.dirname(testFolderPath + label + '/')):
         os.makedirs(os.path.dirname(testFolderPath + label + '/'))
         logger.debug('Created folder: ' + testFolderPath + label + '/')

   if  harvestingTest == False:
      logger.info('Downloading images, every '+ str(nthAsTestImage) + 'th is beeing picked as a test image.')
   else:
      logger.info('Skipping image downloads. Just writing index info files.')

   trainingInfoPath = exportFolder + '/label_info_training.txt'
   testInfoPath = exportFolder + '/label_info_test.txt'

   for imageId, label in dictionary.items():

      if harvestingTest== False:
         imageData = elastic_query.sendQuery('/image/' + str(imageId), False)

         if imageData == None:
            with open(invalidLogPath, 'a') as log:
               log.write(str(imageId)  + '\n')
            continue

      imageFileName =  str(imageId) + '.jpg'
      targetPath = ''
      infoPath = None

      if counter % nthAsTestImage == 0:
         targetPath = testFolderPath + label + '/' + imageFileName
         infoPath = testInfoPath
      else:
         targetPath = trainingFolderPath + label + '/' + imageFileName
         infoPath = trainingInfoPath

      labelInfoString = targetPath + ' ' + str(labelMapping.index(label)) + '\n'

      if harvestingTest == False:
         with open(targetPath, 'w+') as out:
            out.write(imageData)

      with open(infoPath, 'a') as info:
         info.write(labelInfoString)

      counter += 1
      percent = int((float(counter) / float(len(dictionary))) * 100)

      if percent - lastPercent > 0:
         lastPercent = percent

   logger.info('\nDone.')