Exemplo n.º 1
0
def main():
    global DEBUG
    parser = argparse.ArgumentParser(description='Retrieve OS Versions using WMI based on hostnames')
    parser.add_argument('--processes','-n', type=int,default=5,
                   help='Number of processes to use')
    parser.add_argument('--username','-u', type=str,
                   help='Username')
    parser.add_argument('--password','-p', type=str,
                   help='Password')
    parser.add_argument('computerlist',metavar='FILE',type=str,default='-',nargs='?')
    parser.add_argument('--debug','-d',action='store_true',default=False)
    parser.add_argument('--output','-o', type=str,
                   help='output')  

    args = parser.parse_args()
    DEBUG=args.debug
    if args.debug and args.username:
        print "[DD] using username: {} and pasword {}".format(args.username,args.password)
    computerlist=None
    if args.output is None:
        outfile=sys.stdout
    else:
        outfile=open(args.output,"wb")
    if args.computerlist=='-':
        computerlist=[x.strip() for x in sys.stdin.xreadlines()]
    else:
        with open(args.computerlist,'rb') as f:
            computerlist=[x.strip() for x in f.xreadlines()]
    p=mp.Pool(args.processes)
    f=functools.partial(getOSVersion,debug=args.debug,username=args.username,password=args.password)
    # f("localhost")
    res=p.map(f,computerlist)
    # for e in res:
    #     computername=e[0]
    #     for ip in e[1]:
    #         outfile.write("{}\t{}\n".format(computername,ip))

    csvout=UnicodeDictWriter(outfile,FIXATTRS,dialect='excel-tab')
    # csvout.writeheader()
    # for computer in res:
    print FIXATTRS
    for computer in res:
        try:
            csvout.writerow(computer)
        except Exception,e:
            print "[EE] Bummed out on row write: {} | {}".format(computer,e)
Exemplo n.º 2
0
def source2csv(source_dir, options):
    ''' Loads the collections in the given source directory. 
    
        Arguments:
            source_dir - the relative path to the directory in which the config.yaml file is located.
    '''
    config = Config(os.path.join(source_dir, 'config.yaml'))        
    logging.info('Collections in %s: %s' % (source_dir, config.collection_names()))
    
    for collection in config.collections(): # For each collection dir in the source dir       
        coll_dir = collection.getdir()

        original_dir = os.getcwd() # We'll need this to restore us to this dir at the end of processing this collection.
        os.chdir(os.path.join(source_dir, coll_dir))
        
        # Create collection.csv writer
        coll_file = open('collection.csv.txt', 'w')
        coll_cols = collection.get_columns()
        coll_cols.sort()
        coll_csv = UnicodeDictWriter(coll_file, coll_cols)
        # coll_csv = csv.DictWriter(coll_file, coll_cols)
        coll_csv.writer.writerow(coll_csv.fieldnames)
        coll_row = collection.get_row()
        coll_row['layer_source'] = source_dir
        coll_row['layer_collection'] = coll_dir            
        
        # Create polygons.csv writer
        poly_file = open('collection.polygons.csv.txt', 'w')
        poly_dw = UnicodeDictWriter(poly_file, ['shapefilename', 'json'])
        # poly_dw = csv.DictWriter(poly_file, ['shapefilename', 'json'])
        poly_dw.writer.writerow(poly_dw.fieldnames)
    
        # Convert DBF to CSV and add to collection.csv
        shpfiles = glob.glob('*.shp')
        logging.info('Processing %d layers in the %s/%s' % (len(shpfiles), source_dir, coll_dir))
        for sf in shpfiles:
            logging.info('Extracting DBF fields from %s' % sf)
            csvfile = '%s.csv' % sf
            if os.path.exists(csvfile): # ogr2ogr barfs if there are *any* csv files in the dir
                os.remove(csvfile)

            # For Macs which have GDAL.framework, we can autodetect it
            # and use it automatically.
            ogr2ogr_path = '/Library/Frameworks/GDAL.framework/Programs/ogr2ogr'
            if not os.path.exists(ogr2ogr_path):
                # We don't have a path to use; let subprocess.call
                # find it.
                ogr2ogr_path = 'ogr2ogr'

            # TODO: optional command line option for ogr2ogr command

            command = ogr2ogr_path + ' -f CSV "%s" "%s"' % (csvfile, sf)
            args = shlex.split(command)
            try:
                subprocess.call(args)
            except OSError as errmsg:
                logging.error("""Error occurred while executing command line '%s': %s
    Please ensure that %s is executable and available on your path.
                """, command, args[0], errmsg)
                raise # Re-raise the OSError exception.
            
            # Copy and update coll_row with DBF fields
            row = copy.copy(coll_row)                
            row['layer_filename'] = os.path.splitext(sf)[0]
            dr = csv.DictReader(open(csvfile, 'r'), skipinitialspace=True)

            # Lowercase all field names.
            dr.fieldnames = map(lambda fn: fn.lower(), dr.fieldnames)
           
            layer_polygons = []
            
            for dbf in dr: # For each row in the DBF CSV file (1 row per polygon)
    
                polygon = {}
    
                for source, mols in collection.get_mapping().iteritems(): # Required DBF fields

                    # Source may be blank for required fields, which is wrong.
                    if source is None or source == '':
                        logging.error('Required field(s) %s are not mapped to any value. Please check %s/config.yaml!' % (", ".join(mols), source_dir))
                        sys.exit(1)        

                    for mol in mols:
                        if unicode(source)[0] == '=':
                            # Map a DBF column to a field.
                            # For case-insensitivity, we lowercase all field names.
                            source_name = source[1:].lower()

                            sourceval = dbf.get(source_name)
                            if not source_name in dbf:
                                logging.error('Unable to map required DBF field %s to %s. Valid fieldnames include: %s.' % (source_name, mol,  ", ".join(dr.fieldnames)))
                                sys.exit(1)        
                            row[mol] = sourceval
                            polygon[mol] = sourceval

                        else:
                            # Sets the value of the field based on 'source'
                            row[mol] = source
                            polygon[mol] = source
    
                for source, mols in collection.get_mapping(required=False).iteritems(): #Optional DBF fields

                    for mol in mols:
                        # Source can be blank for optional fields, which is fine.
                        if source is None or source == '':
                            row[mol] = ''
                            polygon[mol] = ''
                            
                        elif unicode(source)[0] == '=':
                            # Map a DBF column to a field.
                            # For case-insensitivity, we lowercase all field names.
                            source_name = source[1:].lower()

                            # Map a DBF column to a field.
                            sourceval = dbf.get(source_name)
                            if not source_name in dbf:
                                logging.error('Unable to map optional DBF field %s to %s. Valid fieldnames include: %s.' % (source_name, mol, ", ".join(dr.fieldnames)))
                                sys.exit(1) 
                            row[mol] = sourceval
                            polygon[mol] = sourceval

                        else:
                            # Sets the value of the field based on 'source'
                            row[mol] = source
                            polygon[mol] = source

                # MOL-calculated fields (see issue #120) will eventually be calculated here.
                # For now, that's just 'provider', 'contributor' and 'filename'.
                row['filename'] = row['layer_filename']
    
                # Write coll_row to collection.csv
                coll_csv.writerow(row)
                layer_polygons.append(polygon)
    
            # Create JSON representation of dbfjson
            polygons_json = simplejson.dumps(layer_polygons) # TODO: Showing up as string instead of JSON in API
            d=dict(shapefilename=row['layer_filename'], json=polygons_json)
            poly_dw.writerow(dict(shapefilename=row['layer_filename'], json=polygons_json))
        poly_file.flush()
        poly_file.close()
    
        # Important: Close the DictWriter file before trying to bulkload it
        logging.info('All collection metadata saved to %s' % coll_file.name)
        logging.info('All collection polygons saved to %s' % poly_file.name)
        coll_file.flush()
        coll_file.close()

        # Bulkload...

        # os.chdir(current_dir)
        if not options.dry_run:
            os.chdir('../../')
            filename = os.path.abspath('%s/%s/collection.csv.txt' % (source_dir, coll_dir))

            if options.config_file is None:
                logging.error("No bulkloader configuration file specified: please specify one with the --config_file option.")
                exit(2) # Since apparently '2' signals that something is wrong in the command line arguments.

            config_file = os.path.abspath(options.config_file)

            if options.localhost:
                options.url = 'http://localhost:8080/_ah/remote_api'

            # *nixes can run appcfg.py as a program without any problem. Windows, however,
            # can only run appcfg.py if run through the shell. Therefore, we set the flag_run_in_shell
            # depending on which operating system we're in.
            flag_run_in_shell = (os.name == 'nt') # True if we're running in Windows; false otherwise.

            # Bulkload Layer entities to App Engine for entire collection
            cmd = [
                'appcfg.py', 'upload_data', 
                '--config_file=%s' % config_file, 
                '--filename=%s' % filename, 
                '--kind=Layer', 
                '--url=%s' % options.url,
                '--log_file=logs/bulkloader-log-%s' % time.strftime('%Y%m%d.%H%M%S'),
                '--db_filename=progress/bulkloader-progress-%s.sql3' % time.strftime('%Y%m%d.%H%M%S')
            ] 
            subprocess.call(cmd, shell=flag_run_in_shell)

            # Bulkload LayerIndex entities to App Engine for entire collection
            cmd = [
                'appcfg.py', 'upload_data', 
                '--config_file=%s' % config_file, 
                '--filename=%s' % filename, 
                '--kind=LayerIndex', 
                '--url=%s' % options.url,
                '--log_file=logs/bulkloader-log-%s' % time.strftime('%Y%m%d.%H%M%S'),
                '--db_filename=progress/bulkloader-progress-%s.sql3' % time.strftime('%Y%m%d.%H%M%S')
            ] 
            subprocess.call(cmd, shell=flag_run_in_shell)

        # Go back to the original directory for the next collection.
        os.chdir(original_dir)
import csv, codecs, cStringIO
from lxml import etree
from lxml.cssselect import CSSSelector
from unicodewriter import UnicodeDictWriter

if __name__ == '__main__':
    # Start our list
    votings = []
    # Get the XML data
    root = etree.fromstring(open('camara.xml', 'r').read())
    # Optionally, get the XML data from an url
    # root = etree.parse('http://domain.com/source.xml')

    for voting in CSSSelector('Votacao')(root):
        thisvoting = {
            'description': voting.get('ObjVotacao'),
            'date': voting.get('Data'),
        }
        for deputado in CSSSelector('Deputado')(voting):
            row = {'name': deputado.get('Nome'),
                    'party': deputado.get('Partido'),
                    'state': deputado.get('UF'),
                    'stance': deputado.get('Voto')}
            row.update(thisvoting)
            votings.append(row)

    with open('pitacosdeputados.csv', 'wb') as csvfile:
        writer = UnicodeDictWriter(csvfile, ['description', 'date', 'name', 'party', 'state', 'stance'])
        for row in votings:
            writer.writerow(row)