Exemplo n.º 1
0
def choose_engine(opts, choice=True):
    """Prompts the user to select a database engine"""
    if "engine" in list(opts.keys()):
        enginename = opts["engine"]
    elif opts["command"] == "download":
        enginename = "download"
    else:
        if not choice:
            return None
        print("Choose a database engine:")
        for engine in engine_list:
            if engine.abbreviation:
                abbreviation = "(" + engine.abbreviation + ") "
            else:
                abbreviation = ""
            print("    " + abbreviation + engine.name)
        enginename = input(": ")
    enginename = enginename.lower()

    engine = Engine()
    if not enginename:
        engine = engine_list[0]
    else:
        for thisengine in engine_list:
            if (enginename == thisengine.name.lower() or thisengine.abbreviation and
                    enginename == thisengine.abbreviation):
                engine = thisengine

    engine.opts = opts
    return engine
Exemplo n.º 2
0
def choose_engine(opts, choice=True):
    """Prompts the user to select a database engine"""
    if "engine" in list(opts.keys()):
        enginename = opts["engine"]
    elif opts["command"] == "download":
        enginename = "download"
    else:
        if not choice:
            return None
        print("Choose a database engine:")
        for engine in engine_list:
            if engine.abbreviation:
                abbreviation = "(" + engine.abbreviation + ") "
            else:
                abbreviation = ""
            print("    " + abbreviation + engine.name)
        enginename = input(": ")
    enginename = enginename.lower()

    engine = Engine()
    if not enginename:
        engine = engine_list[0]
    else:
        for thisengine in engine_list:
            if (enginename == thisengine.name.lower() or thisengine.abbreviation and
                    enginename == thisengine.abbreviation):
                engine = thisengine
    if 'table_name' in opts:
        if opts['table_name'] and "{table}" not in opts['table_name'] or "{db}" not in opts['table_name']:
            for opt in engine.required_opts:
                if opt[0] == 'table_name':
                    raise Exception('Accepted Table format {fom}'.format(fom=opt[2]))

    engine.opts = opts
    return engine
Exemplo n.º 3
0
def create_resources(file, skip_lines):
    """Creates resources for the script or errors out if not possible"""
    engine = Engine()
    table = engine.auto_create_table(Table(str(file), header_rows=skip_lines),
                                     filename=file,
                                     make=False)
    clean_table = table.__dict__
    resource_dict = {}
    path_to_table = os.path.basename(clean_table["name"])
    resource_dict["name"] = os.path.splitext(path_to_table)[0]
    resource_dict["schema"] = {}
    resource_dict["dialect"] = {}
    resource_dict["schema"]["fields"] = []
    for cname, ctuple in clean_table["columns"]:
        resource_dict["schema"]["fields"].append({
            "name": cname,
            "type": ctuple[0]
        })
    resource_dict["url"] = "FILL"
    return resource_dict
Exemplo n.º 4
0
 def create_tabular_resources(self, file, skip_lines, encoding):
     """Create resources for tabular scripts"""
     engine = Engine()
     self.encoding = encoding
     engine.encoding = encoding
     table_val = Table(str(file), header_rows=skip_lines)
     table = engine.auto_create_table(table_val, filename=file, make=False)
     clean_table = table.__dict__
     resource_dict = {}
     path_to_table = os.path.basename(clean_table["name"])
     print("Processing... {file_name}".format(file_name=path_to_table))
     r_name = os.path.splitext(path_to_table)[0].lower()
     resource_dict["name"] = clean_table_name(r_name)
     resource_dict["path"] = path_to_table
     resource_dict["schema"] = {}
     resource_dict["dialect"] = {"delimiter": ","}
     resource_dict["schema"]["fields"] = []
     for cname, ctuple in clean_table["columns"]:
         if len(ctuple) >= 2:
             if ctuple[0] == "char":
                 # char sizes need quotes
                 char_size = "{a}".format(a=ctuple[1])
                 resource_dict["schema"]["fields"].append({
                     "name": cname,
                     "type": ctuple[0],
                     "size": char_size
                 })
             else:
                 resource_dict["schema"]["fields"].append({
                     "name": cname,
                     "type": ctuple[0],
                     "size": ctuple[1]
                 })
         else:
             resource_dict["schema"]["fields"].append({
                 "name": cname,
                 "type": ctuple[0]
             })
     resource_dict["url"] = "fill"
     return resource_dict
Exemplo n.º 5
0
def choose_engine(opts, choice=True):
    """Prompts the user to select a database engine"""
    if "engine" in list(opts.keys()):
        enginename = opts["engine"]
    elif opts["command"] == "download":
        enginename = "download"
    else:
        if not choice:
            return None
        print("Choose a database engine:")
        for engine in engine_list:
            if engine.abbreviation:
                abbreviation = "(" + engine.abbreviation + ") "
            else:
                abbreviation = ""
            print("    " + abbreviation + engine.name)
        enginename = input(": ")
    enginename = enginename.lower()

    engine = Engine()
    if not enginename:
        engine = engine_list[0]
    else:
        for thisengine in engine_list:
            if (enginename == thisengine.name.lower()
                    or thisengine.abbreviation
                    and enginename == thisengine.abbreviation):
                engine = thisengine
    if 'table_name' in opts:
        if opts['table_name'] \
                and "{table}" not in opts['table_name'] \
                or "{db}" not in opts['table_name']:
            for opt in engine.required_opts:
                if opt[0] == 'table_name':
                    raise Exception('Accepted Table format '
                                    '{fom}'.format(fom=opt[2]))

    engine.opts = opts
    return engine
Exemplo n.º 6
0
def create_resources(file, skip_lines):
    """Creates resources for the script or errors out if not possible"""
    engine = Engine()
    table = engine.auto_create_table(Table(str(file), header_rows=skip_lines),
                                     filename=file,
                                     make=False)
    clean_table = table.__dict__
    resource_dict = {}
    path_to_table = os.path.basename(clean_table["name"])
    print("Processing... {file_name}".format(file_name=path_to_table))
    resource_dict["name"] = os.path.splitext(path_to_table)[0].lower()
    resource_dict["path"] = path_to_table
    resource_dict["schema"] = {}
    resource_dict["dialect"] = {"delimiter": ","}
    resource_dict["schema"]["fields"] = []
    for cname, ctuple in clean_table["columns"]:
        if len(ctuple) >= 2:
            if ctuple[0] == 'char':
                # char sizes need quotes
                char_size = "{a}".format(a=ctuple[1])
                resource_dict["schema"]["fields"].append({
                    "name": cname,
                    "type": ctuple[0],
                    "size": char_size
                })
            else:
                resource_dict["schema"]["fields"].append({
                    "name": cname,
                    "type": ctuple[0],
                    "size": ctuple[1]
                })
        else:
            resource_dict["schema"]["fields"].append({
                "name": cname,
                "type": ctuple[0]
            })
    resource_dict["url"] = "FILL"
    return resource_dict
Exemplo n.º 7
0
"""Tests for the EcoData Retriever"""

import os
from StringIO import StringIO
from retriever.lib.engine import Engine
from retriever.lib.table import Table
from retriever.lib.templates import BasicTextTemplate
from retriever.lib.tools import getmd5
from retriever import DATA_WRITE_PATH
from nose.tools import with_setup

# Create simple engine fixture
test_engine = Engine()
test_engine.table = Table("test")
test_engine.script = BasicTextTemplate(tables={'test':test_engine.table},
                                       shortname='test')
test_engine.opts = {'database_name': '{db}_abc'}
HOMEDIR = os.path.expanduser('~')

def test_auto_get_columns():
    """Basic test of getting column labels from header"""
    test_engine.table.delimiter = ","
    columns, column_values = test_engine.table.auto_get_columns("a,b,c,d")
    assert columns == [['a', None], ['b', None], ['c', None], ['d', None]]


def test_auto_get_columns_cleanup():
    """Test of automatically cleaning up column labels from header"""
    test_engine.table.delimiter = ","
    columns, column_values = test_engine.table.auto_get_columns("a),b.b,c/c,d___d,group")
    assert columns == [['a', None], ['b_b', None], ['c_c', None], ['d_d', None],
Exemplo n.º 8
0
import retriever as rt
from retriever.lib.engine import Engine
from retriever.lib.table import TabularDataset
from retriever.lib.templates import BasicTextTemplate
from retriever.lib.cleanup import correct_invalid_value
from retriever.lib.engine_tools import getmd5
from retriever.lib.engine_tools import xml2csv
from retriever.lib.engine_tools import json2csv
from retriever.lib.engine_tools import sort_file
from retriever.lib.engine_tools import sort_csv
from retriever.lib.engine_tools import create_file
from retriever.lib.engine_tools import file_2list
from retriever.lib.datapackage import clean_input, is_empty

# Create simple engine fixture
test_engine = Engine()
test_engine.table = TabularDataset(**{"name": "test"})
test_engine.script = BasicTextTemplate(
    **{"tables": test_engine.table, "name": "test"})
test_engine.opts = {'database_name': '{db}_abc'}

# Main paths
HOMEDIR = os.path.expanduser('~')
file_location = os.path.dirname(os.path.realpath(__file__))
retriever_root_dir = os.path.abspath(os.path.join(file_location, os.pardir))

# Setup paths for the raw data files used
raw_dir_files = os.path.normpath(os.path.join(retriever_root_dir,
                                              'raw_data/{file_name}'))
# file: sample_zip.csv
achive_zip = raw_dir_files.format(file_name='sample_zip.zip')
Exemplo n.º 9
0
import retriever as rt
from retriever.lib.engine import Engine
from retriever.lib.table import TabularDataset
from retriever.lib.templates import BasicTextTemplate
from retriever.lib.cleanup import correct_invalid_value
from retriever.lib.engine_tools import getmd5
from retriever.lib.engine_tools import xml2csv
from retriever.lib.engine_tools import json2csv
from retriever.lib.engine_tools import sort_file
from retriever.lib.engine_tools import sort_csv
from retriever.lib.engine_tools import create_file
from retriever.lib.engine_tools import file_2list
from retriever.lib.datapackage import clean_input, is_empty

# Create simple engine fixture
test_engine = Engine()
test_engine.table = TabularDataset(**{"name": "test"})
test_engine.script = BasicTextTemplate(**{"tables": test_engine.table, "name": "test"})
test_engine.opts = {'database_name': '{db}_abc'}

# Main paths
HOMEDIR = os.path.expanduser('~')
file_location = os.path.dirname(os.path.realpath(__file__))
retriever_root_dir = os.path.abspath(os.path.join(file_location, os.pardir))

# Setup paths for the raw data files used
raw_dir_files = os.path.normpath(os.path.join(retriever_root_dir,
                                              'raw_data/{file_name}'))
# file: sample_zip.csv
achive_zip = raw_dir_files.format(file_name='sample_zip.zip')