示例#1
0
 def update_processed_count(self, n):
     """Increment the count of files processed in the state directory."""
     processed_filename = os.path.join(self.path, 'state', 'processed.txt')
     files_processed = int(open(processed_filename).read().strip())
     new_count = files_processed + n
     self.files_processed = new_count
     create_file(processed_filename, str(new_count))
示例#2
0
 def update_state(self, limit, t1):
     """Update the content of state/processed.txt and state/processing-history.txt."""
     # TODO: should not just print the files processed in the history, but also the
     # range of files.
     time_elapsed = time.time() - t1
     processed = "%d\n" % self.files_processed
     create_file(os.path.join(self.path, 'state', 'processed.txt'),
                 processed)
     history_file = os.path.join(self.path, 'state',
                                 'processing-history.txt')
     fh = open(history_file, 'a')
     fh.write("%s\t%d\t%s\t%s\t%s\n" %
              (self.stage_name, limit, time.strftime("%Y:%m:%d-%H:%M:%S"),
               get_git_commit(), time_elapsed))
示例#3
0
 def initialize_on_disk(self):
     """All that is guaranteed to exist is a directory like data/patents/en/d1_txt, but
     sub structures is not there. Create the substructure and initial versions of all
     needed files in configuration and state directories."""
     for subdir in ('config', 'state', 'files'):
         ensure_path(os.path.join(self.path, subdir))
     create_file(os.path.join(self.path, 'state', 'processed.txt'), "0\n")
     create_file(os.path.join(self.path, 'state', 'processing-history.txt'))
     trace, head = self.split_pipeline()
     trace_str = pipeline_component_as_string(trace)
     head_str = pipeline_component_as_string([head])
     create_file(os.path.join(self.path, 'config', 'pipeline-head.txt'),
                 head_str)
     create_file(os.path.join(self.path, 'config', 'pipeline-trace.txt'),
                 trace_str)
     self.files_processed = 0