Python ReadRecords.readRecordsFromPickles примеры использования

Язык программирования: Python

Пространство имен/Пакет: lib

Класс/Тип: ReadRecords

Метод/Функция: readRecordsFromPickles

Примеров на hotexamples.com: 1

Python ReadRecords.readRecordsFromPickles - 1 пример найден. Это лучшие примеры Python кода для lib.ReadRecords.readRecordsFromPickles, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

canonicalize_records(2)

readRecordsFromADSExports(1)

readRecordsFromPickles(1)

Пример #1

Показать файл

Файл: run.py Проект: jonnybazookatone/ADSimportpipeline

def main(MONGO=MONGO,*args):
  if args:
    sys.argv.extend(*args)

  parser = argparse.ArgumentParser()

  parser.add_argument(
    '--target-bibcodes',
    nargs='*',
    default=[],
    dest='targetBibcodes',
    help='Only analyze the specified bibcodes, and ignore their JSON fingerprints. Only works when --async=False. Use the syntax @filename.txt to read these from file (1 bibcode per file)'
    )

  parser.add_argument(
    '--async',
    default=False,
    action='store_true',
    dest='async',
    help='start in async mode'
    )

  parser.add_argument(
    '--dont-init-lookers-cache',
    default=False,
    action='store_true',
    dest='dont_init_lookers_cache',
    help='dont call ADSExports2.init_lookers_cache()'
    )

  parser.add_argument(
    '--load-records-from-pickle',
    nargs='*',
    default=None,
    dest='load_records_from_pickle',
    help='Load XML records from a pickle instead of ADSExports',
    )

  parser.add_argument(
    '--dump-output-to-file',
    nargs=1,
    type=str,
    default=None,
    dest='outfile',
    help='Output records to a file'
    )

  parser.add_argument(
    '--ignore-json-fingerprints',
    default=False,
    action='store_true',
    dest='ignore_json_fingerprints',
    help='ignore json fingerprints when finding new records to update (ie, force update)',
    )

  parser.add_argument(
    '--process-deletions',
    default=False,
    action='store_true',
    dest='process_deletions',
    help='Find orphaned bibcodes in the mongodb, then send these bibcodes to delete via rabbitMQ. No updates will be processed with this flag is set.',
    )

  parser.add_argument(
    '--max-deletions',
    default=2000,
    type=int,
    dest='max_deletions',
    help='Maximum number of deletions to attempt; If over this limit, exit and log an error',
    )

  args = parser.parse_args()

  if not args.dont_init_lookers_cache:
    start = time.time()
    logger.info("Calling init_lookers_cache()")
    ReadRecords.INIT_LOOKERS_CACHE()
    logger.info("init_lookers_cache() returned in %0.1f sec" % (time.time()-start))

  records = readBibcodesFromFile(BIBCODE_FILES)
  targets = None
  if args.targetBibcodes:
    if args.targetBibcodes[0].startswith('@'):
      with open(args.targetBibcodes[0].replace('@','')) as fp:
        targetBibcodes = deque([L.strip() for L in fp.readlines() if L and not L.startswith('#')])
    else:
        targetBibcodes = args.targetBibcodes
    targets = {bibcode:records[bibcode] for bibcode in targetBibcodes}
  
  records = deque(ReadRecords.canonicalize_records(records,targets))
  total = float(len(records)) #Save to print later

  if args.ignore_json_fingerprints:
    records = deque([(r[0],'ignore') for r in records])

  if args.process_deletions:
    start = time.time()
    logger.info("Processing deletions. This will block for several hours until the database is compared, then exit.")
    logger.warning("No updates will be processed when --process-deletions is set")
    mongo = MongoConnection.PipelineMongoConnection(**MONGO)
    mongo.close()
    results = mongo.getAllBibcodes()
    if len(results) != mongo.db[mongo.collection].count():
      logger.warning("len getAllBibcodes (%s) != len count (%s). Continue anyways." % (len(results),mongo.db[mongo.collection].count()))
    records = [i[0] for i in records]
    payload = list(set(results).difference(set(records)))
    if len(payload) > args.max_deletions:
      logger.critical("|".join(payload))
      logger.critical("Too many deletions: {} > {}".format(len(payload), args.max_deletions))
      sys.exit(1)
    w = RabbitMQWorker()   
    w.connect(psettings.RABBITMQ_URL)
    publish(w,payload,routing_key='DeletionRoute')
    logger.info("Found %s orphaned bibcodes in %0.1f seconds." % (len(payload),time.time()-start))
    sys.exit(0)


  if not args.async:
    mongo = MongoConnection.PipelineMongoConnection(**MONGO)
    records = mongo.findNewRecords(records)
    if args.load_records_from_pickle:
      records = ReadRecords.readRecordsFromPickles(records,args.load_records_from_pickle)
    else:
      records = ReadRecords.readRecordsFromADSExports(records)
    merged = UpdateRecords.mergeRecords(records)
    if args.outfile:
      with open(args.outfile[0],'w') as fp:
        r = {'merged': merged, 'nonmerged': records}
        json.dump(r,fp,indent=1)
    else:
      bibcodes = mongo.upsertRecords(merged)
      #SolrUpdater.solrUpdate(bibcodes)
  elif args.async:
    w = RabbitMQWorker()   
    w.connect(psettings.RABBITMQ_URL)
    lastLogged = None
    while records:
      payload = []
      while len(payload) < BIBCODES_PER_JOB:
        try:
          payload.append( records.popleft() )
        except IndexError:
          break
      percent = round((1-len(records)/total)*100.0)
      if not percent % 5 and percent!=lastLogged:
        lastLogged=percent
        logger.info("There are %s records left (%0.1f%% completed)" % (len(records),percent))
      publish(w,payload)