def getIndexLocked(self, encQueueName):
    """Get contents of index file. Caller takes care of synchronization."""
    found = false
    queues = self.listCrawlQueues()
    queueName = urllib.unquote(encQueueName)
    for queue in queues:
      if queueName == queue.queueName:
        found = true
        break
    if not found:
      logging.error('Queue %s not found' % encQueueName)
      return (C.CRAWLQUEUE_NAME_NOT_FOUND, 0, 0, None)

    if queue.completeState == C.CRAWLQUEUE_STATUS_PENDING:
      logging.error('Queue %s is incomplete.' % encQueueName)
      return (C.CRAWLQUEUE_INCOMPLETE, 0, 0, None)

    index_file = self.getCrawlQueueIndexFileName(encQueueName)
    try:
      fileContents = gfile.GFile(index_file, 'r').readlines()
      captionTime = int(fileContents[0][:-1])
      numUrls = int(fileContents[1][:-1])
      return (C.CRAWLQUEUE_OK, captionTime, numUrls, fileContents[2:])
    except IOError, e:
      logging.error('Failed to get queue index file %s. IOError: %s' % \
                    (index_file, e))
      return (C.CRAWLQUEUE_INTERNAL_ERROR, 0, 0, None)
Пример #2
0
def main(argv):
  if len(argv) > 1:
    raise app.UsageError('Too many command-line arguments.')
  xm.setup_work_unit()
  if not gfile.Exists(FLAGS.workdir):
    gfile.MakeDirs(FLAGS.workdir)
  utils.dump_flags_to_file(os.path.join(FLAGS.workdir, 'flags.txt'))
  gin.bind_parameter('CuriosityEnvWrapper.scale_task_reward', 0.)
  gin.bind_parameter('CuriosityEnvWrapper.scale_surrogate_reward', 1.)
  gin.bind_parameter('AntWrapper.enable_die_condition',
                     FLAGS.ant_env_enable_die_condition)
  gin.parse_config_files_and_bindings(None,
                                      FLAGS.gin_bindings)
  # Hardware crashes with:
  # Failed to open library!
  # dlopen: cannot load any more object with static TLS
  FLAGS.renderer = 'software'

  work_unit = None
  if FLAGS.xm_xid != -1:
    work_unit = xmanager_api.XManagerApi().get_current_work_unit()

  visualize_curiosity_reward(work_unit)
  with gfile.GFile(os.path.join(FLAGS.workdir, 'gin_config.txt'), 'w') as f:
    f.write(gin.operative_config_str())
Пример #3
0
  def getReport(self, collection, reportName):
    """Return body of a summary report."""
    self.logreplock.acquire()
    try:
      reports = self.getLogReports(collection, liblog.SUMMARY_REPORT)
      found = false
      incomplete = false
      for report in reports:
        if report.reportName == reportName:
          found = true
          if (report.completeState != COMPLETE and
              report.completeState != COMPLETE_REGENERATE):
            incomplete = true
          break

      if not found:
        logging.error('Report %s not found' % reportName)
        return (C.REPORT_NAME_NOT_FOUND, None, None)
      elif incomplete:
        logging.error('Report %s is incomplete' % reportName)
        return (C.REPORT_INCOMPLETE, report.toString(), None)

      (html_file, _) = liblog.get_report_filenames(self.entConfig,
                       liblog.SUMMARY_REPORT, reportName, collection)
      try:
        reportContents = gfile.GFile(html_file, 'r').read()
      except IOError:
        return (C.REPORT_INTERNAL_ERROR, report.toString(), None)

    finally:
      self.logreplock.release()
    return (C.REPORT_OK, report.toString(), reportContents)
Пример #4
0
    def __init__(self,
                 encoder,
                 decoder,
                 mixer,
                 embed_path,
                 config,
                 model="baseline"):
        """
        Initializes your System

        :param encoder: an encoder that you constructed in train.py
        :param decoder: a decoder that you constructed in train.py
        :param args: pass in more arguments as needed
        """
        self.encoder = encoder
        self.mixer = mixer
        self.decoder = decoder
        self.config = config
        if GOOGLE3:
            self.pretrained_embeddings = np.load(
                gfile.GFile(embed_path))["glove"]
        else:
            self.pretrained_embeddings = np.load(embed_path)["glove"]
        self.pretrained_embeddings_special_tokens = self.pretrained_embeddings[
            0:3]
        self.pretrained_embeddings_words = self.pretrained_embeddings[3:]
        self.model = model

        # ==== set up placeholder tokens ========

        self.question_placeholder = tf.placeholder(tf.int32,
                                                   shape=(None, None))
        self.questions_lengths_placeholder = tf.placeholder(tf.int32,
                                                            shape=(None))
        self.context_placeholder = tf.placeholder(
            tf.int32, shape=(None, self.config.output_size))
        self.context_lengths_placeholder = tf.placeholder(tf.int32,
                                                          shape=(None))
        self.answers_numeric_list = tf.placeholder(tf.int32, shape=(None, 2))
        self.dropout_placeholder = tf.placeholder(tf.float32, shape=())

        # context character embedding: batch, max context size in words, max_word_length
        self.context_tokens_placeholder = tf.placeholder(
            tf.int32, shape=[None, None, self.config.max_word_length])
        # question character embedding: batch, max question size in words, max_word_length
        self.question_tokens_placeholder = tf.placeholder(
            tf.int32, shape=[None, None, self.config.max_word_length])

        # ==== assemble pieces ====
        with tf.variable_scope(
                "qa", initializer=tf.uniform_unit_scaling_initializer(1.0)):
            self.setup_embeddings()
            self.setup_system()
            self.setup_loss()
            self.setup_train_op()

        # ==== set up training/updating procedure ====
        self.saver = tf.train.Saver()
 def setCrawlQueuesLocked(self, queues):
   """Set the file content for list of queues of given queueType
   on given collection."""
   try:
     gfile.GFile(self.getCrawlQueueListFileName(), 'w').write(
       string.join(map(lambda(x) : x.toString(), queues), '\n'))
   except Exception, e:
     logging.error('Cannot write CrawlQueue list. %s' % e)
     return false
Пример #6
0
 def getstatus(self, filename):
   '''Return the first line of a feed status file.'''
   try:
     dirname = self.cfg.getGlobalParam('FEED_STATUS_DIR')
     filename = os.path.join(dirname, filename)
     # read only the first line, the rest of the file may be too big
     # see bug 76929
     out = gfile.GFile(filename).readline()
   except IOError, e:
     logging.error(str(e))
     return "1"
Пример #7
0
def makeValid(valid_file, logs):
    """Make a validate file out of a list of Log objects."""
    try:
        out = gfile.GFile(valid_file, 'w')
        for log in logs:
            out.write('%s %d\n' % (log.file, log.size))
        out.close()
    except:
        logging.error('Error writing validation file %s' % valid_file)
        return 0

    return 1
Пример #8
0
def _LoadConfigFile(config_path):
    """Converts Json config file into a dict.

  Args:
    config_path: Path to config file.

  Returns:
    Returns dict representation of the config file.
  """
    with gfile.GFile(config_path, 'r') as input_file:
        config_data = json.load(input_file)
        return config_data
def ReadFile(filename, startFpos, endFpos):
  """Read lines from file @filename, from position @startFpos to @endFpos."""
  try:
    fp = gfile.GFile(filename, 'r')
    fp.seek(startFpos)
    buffer = []
    while fp.tell() < endFpos:
      buffer.append(fp.readline())
    fp.close()
    return (C.CRAWLQUEUE_OK, buffer)
  except IOError, e:
    logging.error('Failed to read file %s. IOError: %s' % (filename, e))
    return (C.CRAWLQUEUE_INTERNAL_ERROR, None)
Пример #10
0
  def setLogReports(self, reportType, collection, reports):
    """Set the file content for list of reports of given reportType
    on given collection."""
    try:
      listfile = liblog.get_report_list_filename(self.entConfig, reportType,
                                                 collection)
      gfile.GFile(listfile, 'w').write(
        string.join(map(ReportToString, reports), '\n'))
    except IOError:
      logging.error('Cannot write new LogReport')
      return false

    return true
  def listCrawlQueues(self):
    """Return a list of crawl queues."""
    filename = self.getCrawlQueueListFileName()
    self.cqueuelock.acquire()
    try:
      try:
        lines = gfile.GFile(filename, 'r').readlines()
      except IOError, e:
        logging.error('Failed to read crawlqueue list. IOError: %s.' % e)
        return []

      queues = []
      for line in lines:
        try:
          queues.append(StringToCrawlQueueForm(line))
        except ValueError, e:
          logging.error('Fail to parse one line: [%s]' % line)
Пример #12
0
def readValidFile(valid_file):
    """Read the file's valid file to get a list of file checkpoints."""
    checkpoints = {}

    try:
        lines = gfile.GFile(valid_file, 'r').readlines()
    except:
        logging.error('Can\'t open %s' % valid_file)
        return None

    for line in lines:
        try:
            file, size_s = string.split(line)
            size = int(size_s)
        except:
            logging.error('Invalid line in validation file %s: %s' %
                          (valid_file, line))
            continue

        checkpoints[file] = size
    return checkpoints
Пример #13
0
  def getLogReports(self, collection, reportType):
    """Return a list of reports of given reportType on given collection."""
    listFile = liblog.get_report_list_filename(self.entConfig, reportType,
                                               collection)
    reports = []
    try:
      lines = gfile.GFile(listFile, 'r').readlines()
      for line in lines:
        if line[-1] == '\n':
          line = line[:-1]
        (reportName, collection, creationDate,  isFinal,
         reportType, reportDate, completeState,
         withResults, topCount,
         diagnosticTerms) = string.split(line, '\t', 9)
        reports.append(LogReport(urllib.unquote(reportName),
                                 collection, creationDate, isFinal,
                                 reportType, reportDate, completeState,
                                 withResults, topCount, diagnosticTerms))

    except IOError:
      return []
    except ValueError:
      return []
    return reports
Пример #14
0
def WriteResult(cq_mixer):
    """Post process the CrawlQueueResponse buffer and write to file
  for adminrunner to use."""
    try:
        rfile = gfile.GFile(cq_mixer.result_file, 'w')
        ifile = gfile.GFile(cq_mixer.index_file, 'w')

        index_buf = []
        result_buf = []

        queues = {}

        # write to data file and index file.
        hosts = cq_mixer.perHostUrlSorters.keys()
        hosts.sort()
        for host in hosts:
            urlSorter = cq_mixer.perHostUrlSorters[host]
            count = 0
            fpos = rfile.tell()
            index_line = '%s\t%d\t%d' % (host, urlSorter.size(), fpos)
            for url in urlSorter.getUrls():
                if url.has_path():
                    path = url.path()
                else:
                    path = ''
                if url.has_pagerank():
                    pagerank = url.pagerank()
                else:
                    pagerank = -1
                if url.has_lastcrawledtime():
                    lastcrawledtime = url.lastcrawledtime()
                else:
                    lastcrawledtime = 0
                if url.has_nextcrawltime():
                    nextcrawltime = url.nextcrawltime()
                else:
                    nextcrawltime = 0
                if url.has_changeinterval():
                    changeinterval = url.changeinterval()
                else:
                    changeinterval = 0
                # Line format should be consistent with that in CrawlingUrl.java
                line = '%d\t%d\t%d\t%d\t%s\n' % (pagerank, lastcrawledtime,
                                                 nextcrawltime, changeinterval,
                                                 path)
                fpos += len(line)
                result_buf.append(line)
                count = count + 1
                if count % crawlqueue_manager.PAGESIZE == 0:
                    index_line = '%s\t%d' % (index_line, fpos)

                if count % 1000 == 0:  # flush the buffer
                    rfile.writelines(result_buf)
                    result_buf = []

            # post-processing one per-host urlSorter.
            if len(result_buf) != 0:
                rfile.writelines(result_buf)
                result_buf = []

            index_line = '%s\t%d\n' % (index_line, fpos)
            index_buf.append(index_line)

        # write index file.
        index_buf.insert(0, '%d\n' % cq_mixer.getCaptionTime())
        index_buf.insert(1, '%d\n' % cq_mixer.size())
        ifile.writelines(index_buf)

        ifile.close()
        rfile.close()
    except Exception, e:
        logging.error('Exception: %s' % e)
        return false