def testlistfullpathdirectoryfalse(self): self.p = info('test') f1 = join(self.path,'File1.txt') f2 = join(self.path,'File2.txt') sub = join(self.path,'subdirectory') d = [f1,f2,sub] self.assertFalse(self.p.listfiles() == d)
def main(): usage = "usage: %prog [options] arg" parser = OptionParser(usage) parser.add_option("-d", "--inputdirectory", action="store", dest="indir") parser.add_option("-f", "--mergedfilename", action="store", dest="file", help="Filename for merged file") parser.add_option("-o", "--outputdirectory", action="store", dest="outdir", default="~/", help="Default directory: "+abspath(expanduser('~/'))) (options, args) = parser.parse_args() if not options.indir: parser.error('Input directory not provided') if not options.file: parser.error('Output merged filename not provided') sourcedirectory = options.indir mergedfilename = options.file mergeddirectory = abspath(expanduser(options.outdir)) filegroup = files( info(sourcedirectory).listfullpathfiles() ) filegroup.mergeinto(join(mergeddirectory,mergedfilename)) print "%s/%s" % (mergeddirectory,mergedfilename)
def testlistfullpathfiles(self): self.p = info('test') f1 = join(self.path,'File1.txt') f2 = join(self.path,'File2.txt') d = [f1,f2] self.assertTrue(self.p.listfullpathfiles() == d)
def testlisthiddendirectoryfalse(self): self.p = info('test') d = ['File1.txt','File2.txt','.hiddensubdirectory'] self.assertFalse(self.p.listfiles() == d)
def testlisthiddenfilefalse(self): # tests for files starting with a . self.p = info('test') d = ['File1.txt','File2.txt','.hidden.file'] self.assertFalse(self.p.listfiles() == d)
def testlistfiles(self): self.p = info('test') d = ['File1.txt','File2.txt'] self.assertTrue(self.p.listfiles() == d)
def testlistfullpathdirectoryfalse(self): self.p = info('~') sub = join(self.path,'subdirectory') self.assertFalse(sub in self.p.listfullpathfiles())
def testlistfullpathfiles(self): self.p = info('~') f1 = join(self.path,'File1.txt') f2 = join(self.path,'File2.txt') self.assertTrue(f1 in self.p.listfullpathfiles() and f2 in self.p.listfullpathfiles())
def testlisthiddendirectoryfalse(self): # test for folders starting with a . self.p = info('~') d = '.hiddensubdirectory' self.assertFalse(d in self.p.listfiles())
def testlisthiddenfilefalse(self): # test for files starting with a . self.p = info('~') d = '.hidden.file' self.assertFalse(d in self.p.listfiles())
def testlistdirectoryfalse(self): self.p = info('~') d = 'subdirectory' self.assertFalse(d in self.p.listfiles())
def testlistfiles(self): self.p = info('~') d = 'File1.txt' e = 'File2.txt' self.assertTrue(d in self.p.listfiles() and e in self.p.listfiles())
def testlisthiddendirectoryfalse(self): self.p = info('.') d = '.hiddensubdirectory' self.assertFalse(d in self.p.listfiles())
def main(): usage = "usage: %prog [options] arg" parser = OptionParser(usage) parser.add_option("-d", "--inputdirectory", action="store", dest="indir") parser.add_option("-c", "--connectionstring", action="store", dest="conn") parser.add_option("-k", "--freebaseapikey", action="store", dest="key") (options, args) = parser.parse_args() if not options.indir: parser.error('Input directory not provided') argdir = options.indir connStr = options.conn apiKey = options.key #################################################### colsToKeep = ['airport','airline','flight','location','statusDateTime'] airline_mapping = iatamapping() flist = info(argdir).listfullpathfiles() for f in flist: print (f) df = loadcsvtodf(f) if 'cork' in f: df = corktransf(df) elif 'shan' in f: df = shantransf(df) elif 'dubl' in f: df = dubltransf(df) else: print ("Boink. Unrecognized file found") print f df = DataFrame(None) if not df.empty: # Identify the actual departure date and time: # set to type datetime df['saveDateTime'] = pd.to_datetime(df['saveDateTime']) # status time : today's date + the time extracted from statusMessage df['statusTime'] = df['statusMessage'].map(extracttime) # create a dateTime from statusTime(time) and saveDateTime(date) # and store it in statusDateTime df = buildDateTime(df,'statusTime','saveDateTime','statusDateTime') # Map flight numbers to airlines if no airline column provided cols = list(df.columns.values) if 'airline' not in cols: df['airline'] = df['flight'] # strip the digits from the flight number df['airline'] = df['airline'].map(stripdigits) # map to an airline df['airline'] = df['airline'].map(airline_mapping) # Drop unwanted columns updatedCols = list(df.columns.values) df = df.drop([l for l in updatedCols if l not in colsToKeep],axis=1) print (df.head()) pdb.load_wrapper('timekeeping',df,connStr,if_exists='append') return()