Exemplo n.º 1
0
    def _openForReadWrite(self, fname, header=None):
        '''
      @summary: Read and populate lookup table if file exists, open and return
                file and csvwriter for writing or appending. If lookup file 
                is new, write header if provided.
      '''
        lookupDict = {}
        doAppend = False

        if os.path.exists(fname):
            doAppend = True
            try:
                csvRdr, infile = getCSVReader(fname, DELIMITER)
                # get header
                line, recno = self.getLine(csvRdr, 0)
                # read lookup vals into dictionary
                while (line is not None):
                    line, recno = self.getLine(csvRdr, recno)
                    if line and len(line) > 0:
                        try:
                            # First item is dict key, rest are vals
                            lookupDict[line[0]] = line[1:]
                        except Exception, e:
                            self._log.warn(
                                'Failed to read line {} from {}'.format(
                                    recno, fname))
                self._log.info('Read lookup file {}'.format(fname))
            finally:
Exemplo n.º 2
0
def concatenateLookups(filepath, outfname, pattern=None, fnames=None):
   '''
   @summary: Concatenate named files or files matching pattern into a single file. 
   @param filepath: Pathname to input files
   @param outfname: Basename of output file
   @param pattern: Pattern to match for input files
   @param fnames: Basename of one or more input file
   '''
   outfname = os.path.join(filepath, outfname)
   infnames = []
   try:
      csvwriter, outf = getCSVWriter(outfname, DELIMITER)

      if pattern is not None:
         infnames = glob.glob(os.path.join(filepath, pattern))
      if fnames is not None:
         for fn in fnames:
            infnames.append(os.path.join(filepath, fn))            

      for fname in infnames:
         csvreader, inf = getCSVReader(fname, DELIMITER)
         while csvreader is not None:
            try:
               line = csvreader.next()
            except OverflowError, e:
               print( 'Overflow on line {} ({})'.format(csvreader.line, str(e)))
            except StopIteration:
               print('EOF after line {}'.format(csvreader.line_num))
               csvreader = None
               inf.close()
            except Exception, e:
               print('Bad record on line {} ({})'.format(csvreader.line_num, e))
Exemplo n.º 3
0
   def _writeGBIFParserInput(self, inNameIdFname, outScinameFname):
      '''
      @summary: Read scientificName, taxonKey(s) from input CSV file, write 
                a JSON file with scientificName list for input to the 
                GBIF parser.
      @param inNameIdFname: Input CSV file with scientificName, and one or more
                            taxonKeys (identifier for GBIF taxonomic record).
      @param outScinameFname: Output JSON file with list of ScientificNames 
                            for parsing by the GBIF parser.
      '''
      try:
         scif = open(outScinameFname, 'wb')
         scif.write('[{}'.format(NEWLINE))
         csvreader, inf = getCSVReader(inNameIdFname, DELIMITER)
         # discard header
         _, csvreader = self._readData(csvreader)
         # then get/write first line
         encSciname, csvreader = self._readData(csvreader)
         self._writeData(scif, encSciname)
         
         while csvreader is not None:
            encSciname, csvreader = self._readData(csvreader)

            if encSciname is not None:
               scif.write(',{}'.format(NEWLINE))
               self._writeData(scif, encSciname)

         scif.write('{}]{}'.format(NEWLINE, NEWLINE))
      finally:
         scif.close()
         inf.close()
Exemplo n.º 4
0
def splitFile(bigFname, limit=50000):
   currFnum = 1
   stopLine = limit
   csvreader, inf = getCSVReader(bigFname, DELIMITER)
   csvwriter, outf = _getNextWriter(bigFname, currFnum)
   while csvreader is not None and csvreader.line_num < stopLine:
      try:
         line = csvreader.next()
      except OverflowError, e:
         print( 'Overflow on line {} ({})'.format(csvreader.line, str(e)))
      except StopIteration:
         print('EOF after line {}'.format(csvreader.line_num))
         csvreader = None
         inf.close()
Exemplo n.º 5
0
    def openInputOutput(self):
        '''
      @summary: Read GBIF metadata, open GBIF interpreted data for reading, 
                output file for writing
      '''
        self.fldMeta = self.getFieldMeta()

        (self._iCsvrdr, self._if) = getCSVReader(self.interpFname, DELIMITER)

        (self._outWriter, self._outf) = getCSVWriter(self.outFname,
                                                     DELIMITER,
                                                     doAppend=False)
        # Write the header row
        self._outWriter.writerow(ORDERED_OUT_FIELDS)
        self._log.info('Opened input/output files')
Exemplo n.º 6
0
 def _openReader(self, fname):
    if not os.path.exists(fname):
       raise Exception('Input file {} does not exist'.format(fname))
    reader, outf = getCSVReader(fname, self.delimiter)
    self._openfiles[fname] = outf
    return reader
Exemplo n.º 7
0
 def _getHeader(self):
    reader, inf = getCSVReader(self.messyfile, self.delimiter)
    header = next(reader)
    inf.close()
    return header