Exemplo n.º 1
0
def import_to_datastore(directory, provider_code, batch_size):
    """
    goes through all the files in a given directory, parses and commits them
    """
    try:
        files_from_lms = dict(get_files(directory))
        if len(files_from_lms) == 0:
            return 0
        logging.info("Importing '{}'".format(directory))
        started = datetime.now()

        accidents = list(import_accidents(provider_code=provider_code, **files_from_lms))
        db.session.execute(Marker.__table__.insert(), accidents)
        db.session.commit()
        involved = list(import_involved(provider_code=provider_code, **files_from_lms))
        db.session.execute(Involved.__table__.insert(), involved)
        db.session.commit()
        vehicles = list(import_vehicles(provider_code=provider_code, **files_from_lms))
        db.session.execute(Vehicle.__table__.insert(), vehicles)
        db.session.commit()

        total = len(accidents) + len(involved) + len(vehicles)
        logging.info("\t{0} items in {1}".format(total, time_delta(started)))
        return total
    except ValueError as e:
        failed_dirs[directory] = e.message
        return 0
Exemplo n.º 2
0
def import_to_datastore(directory, provider_code, batch_size):
    """
    goes through all the files in a given directory, parses and commits them
    """
    try:
        files_from_lms = dict(get_files(directory))
        if len(files_from_lms) == 0:
            return 0
        logging.info("Importing '{}'".format(directory))
        started = datetime.now()

        accidents = list(import_accidents(provider_code=provider_code, **files_from_lms))

        new_ids = [m["id"] for m in accidents
                   if 0 == Marker.query.filter(and_(Marker.id == m["id"],
                                                    Marker.provider_code == m["provider_code"])).count()]
        if not new_ids:
            logging.info("\t\tNothing loaded, all accidents already in DB")
            return 0

        db.session.execute(Marker.__table__.insert(), [m for m in accidents if m["id"] in new_ids])
        db.session.commit()
        involved = list(import_involved(provider_code=provider_code, **files_from_lms))
        db.session.execute(Involved.__table__.insert(), [i for i in involved if i["accident_id"] in new_ids])
        db.session.commit()
        vehicles = list(import_vehicles(provider_code=provider_code, **files_from_lms))
        db.session.execute(Vehicle.__table__.insert(), [v for v in vehicles if v["accident_id"] in new_ids])
        db.session.commit()

        total = len(accidents) + len(involved) + len(vehicles)
        logging.info("\t{0} items in {1}".format(total, time_delta(started)))
        return total
    except ValueError as e:
        failed_dirs[directory] = e.message
        return 0
Exemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--specific_folder',
                        dest='specific_folder',
                        action='store_true',
                        default=False)
    parser.add_argument('--delete_all',
                        dest='delete_all',
                        action='store_true',
                        default=True)
    parser.add_argument('--path', type=str, default="static/data/lms")
    parser.add_argument('--batch_size', type=int, default=100)
    parser.add_argument('--provider_code', type=int)
    args = parser.parse_args()

    if args.specific_folder:
        if fileDialog:
            dir_name = tkFileDialog.askdirectory(
                initialdir=os.path.abspath(args.path),
                title='Please select a directory')
        else:
            dir_name = raw_input('Please provide the directory path: ')

        dir_list = [dir_name]
        if args.delete_all:
            confirm_delete_all = raw_input(
                "Are you sure you want to delete all the current data? (y/n)\n"
            )
            if confirm_delete_all.lower() == 'n':
                args.delete_all = False
    else:
        dir_list = glob.glob("{0}/*/*".format(args.path))

    # wipe all the Markers and Involved data first
    if args.delete_all:
        tables = (Vehicle, Involved, Marker)
        logging.info("Deleting tables: " + ", ".join(table.__name__
                                                     for table in tables))
        for table in tables:
            db.session.query(table).delete()
            db.session.commit()

    started = datetime.now()
    total = 0L
    for directory in dir_list:
        parent_directory = os.path.basename(
            os.path.dirname(os.path.join(os.pardir, directory)))
        provider_code = args.provider_code if args.provider_code else get_provider_code(
            parent_directory)
        total += import_to_datastore(directory, provider_code, args.batch_size)

    delete_invalid_entries()

    failed = [
        "\t'{0}' ({1})".format(directory, fail_reason)
        for directory, fail_reason in failed_dirs.iteritems()
    ]
    logging.info("Finished processing all directories{0}{1}".format(
        ", except:\n" if failed else "", "\n".join(failed)))
    logging.info("Total: {0} items in {1}".format(total, time_delta(started)))
Exemplo n.º 4
0
def import_to_datastore(directory, provider_code, batch_size):
    """
    goes through all the files in a given directory, parses and commits them
    """
    try:
        files_from_lms = dict(get_files(directory))
        if len(files_from_lms) == 0:
            return 0
        logging.info("Importing '{}'".format(directory))
        started = datetime.now()

        accidents = list(
            import_accidents(provider_code=provider_code, **files_from_lms))
        db.session.execute(Marker.__table__.insert(), accidents)
        db.session.commit()
        involved = list(
            import_involved(provider_code=provider_code, **files_from_lms))
        db.session.execute(Involved.__table__.insert(), involved)
        db.session.commit()
        vehicles = list(
            import_vehicles(provider_code=provider_code, **files_from_lms))
        db.session.execute(Vehicle.__table__.insert(), vehicles)
        db.session.commit()

        total = len(accidents) + len(involved) + len(vehicles)
        logging.info("\t{0} items in {1}".format(total, time_delta(started)))
        return total
    except ValueError as e:
        failed_dirs[directory] = e.message
        return 0
Exemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--specific_folder', dest='specific_folder', action='store_true', default=False)
    parser.add_argument('--delete_all', dest='delete_all', action='store_true', default=True)
    parser.add_argument('--path', type=str, default="static/data/lms")
    parser.add_argument('--batch_size', type=int, default=100)
    parser.add_argument('--provider_code', type=int)
    args = parser.parse_args()

    if args.specific_folder:
        if fileDialog:
            dir_name = tkFileDialog.askdirectory(initialdir=os.path.abspath(args.path),
                                                 title='Please select a directory')
        else:
            dir_name = raw_input('Please provide the directory path: ')

        dir_list = [dir_name]
        if args.delete_all:
            confirm_delete_all = raw_input("Are you sure you want to delete all the current data? (y/n)\n")
            if confirm_delete_all.lower() == 'n':
                args.delete_all = False
    else:
        dir_list = glob.glob("{0}/*/*".format(args.path))

    # wipe all the Markers and Involved data first
    if args.delete_all:
        tables = (Vehicle, Involved, Marker)
        logging.info("Deleting tables: " + ", ".join(table.__name__ for table in tables))
        for table in tables:
            db.session.query(table).delete()
            db.session.commit()

    started = datetime.now()
    total = 0L
    for directory in dir_list:
        parent_directory = os.path.basename(os.path.dirname(os.path.join(os.pardir, directory)))
        provider_code = args.provider_code if args.provider_code else get_provider_code(parent_directory)
        total += import_to_datastore(directory, provider_code, args.batch_size)

    delete_invalid_entries()

    failed = ["\t'{0}' ({1})".format(directory, fail_reason) for directory, fail_reason in
              failed_dirs.iteritems()]
    logging.info("Finished processing all directories{0}{1}".format(", except:\n" if failed else "",
                                                             "\n".join(failed)))
    logging.info("Total: {0} items in {1}".format(total, time_delta(started)))
Exemplo n.º 6
0
def import_to_datastore(directory, provider_code, batch_size):
    """
    goes through all the files in a given directory, parses and commits them
    """
    try:
        files_from_lms = dict(get_files(directory))
        if len(files_from_lms) == 0:
            return 0
        logging.info("Importing '{}'".format(directory))
        started = datetime.now()

        accidents = list(
            import_accidents(provider_code=provider_code, **files_from_lms))

        new_ids = [
            m["id"] for m in accidents if 0 == Marker.query.filter(
                and_(Marker.id == m["id"], Marker.provider_code ==
                     m["provider_code"])).count()
        ]
        if not new_ids:
            logging.info("\t\tNothing loaded, all accidents already in DB")
            return 0

        db.session.execute(Marker.__table__.insert(),
                           [m for m in accidents if m["id"] in new_ids])
        db.session.commit()
        involved = list(
            import_involved(provider_code=provider_code, **files_from_lms))
        db.session.execute(
            Involved.__table__.insert(),
            [i for i in involved if i["accident_id"] in new_ids])
        db.session.commit()
        vehicles = list(
            import_vehicles(provider_code=provider_code, **files_from_lms))
        db.session.execute(
            Vehicle.__table__.insert(),
            [v for v in vehicles if v["accident_id"] in new_ids])
        db.session.commit()

        total = len(accidents) + len(involved) + len(vehicles)
        logging.info("\t{0} items in {1}".format(total, time_delta(started)))
        return total
    except ValueError as e:
        failed_dirs[directory] = e.message
        return 0
Exemplo n.º 7
0
def main(username=None, password=None, lastmail=False):

    username = username or os.environ.get("MAILUSER")
    password = password or os.environ.get("MAILPASS")
    if not username:
        logging.error("Username not set. Please set env var MAILUSER or use the --username argument")
    if not password:
        logging.error("Password not set. Please set env var MAILPASS or use the --password argument")
    if not username or not password:
        exit()

    imapsession = imaplib.IMAP4_SSL("imap.gmail.com")
    try:
        imapsession.login(username, password)
    except imaplib.IMAP4.error:
        logging.error("Bad credentials, unable to sign in!")
        exit()

    try:
        imapsession.select(mail_dir)
        typ, data = imapsession.search(None, "ALL")
    except imaplib.IMAP4.error:
        logging.error("Error searching given mailbox: %s" % mail_dir)
        exit()

    file_found = False
    listdir = os.listdir(detach_dir)

    is_empty = len(listdir) <= 1 or not lastmail
    total = 0

    # Iterating over all emails
    started = datetime.now()
    logging.info("Login successful! Importing files, please hold...")
    for msgId in data[0].split():
        typ, message_parts = imapsession.fetch(msgId, "(RFC822)")
        if typ != "OK":
            logging.error("Error fetching mail.")
            raise

        email_body = message_parts[0][1]
        mail = email.message_from_string(email_body)
        mtime = datetime.strptime(mail["Date"][:-6], "%a, %d %b %Y %H:%M:%S")

        if not is_empty:
            # Accident folder is not empty, we only need the latest
            if datetime.now() - mtime < timedelta(hours=4):
                file_found = True
            else:
                continue

        # Handles Gmail bug which hasn't physically removed some of the deleted files
        mail_date = datetime(2015, 10, 06, 10)
        if mtime < mail_date:
            continue

        for part in mail.walk():
            if part.get_content_maintype() == "multipart" or part.get("Content-Disposition") is None:
                continue
            filename = part.get_filename()

            if bool(filename) and filename.endswith(".csv"):
                filename = "UH-{0}_{1}-{2}.csv".format(mtime.date(), mtime.hour, mtime.minute)
                filepath = os.path.join(detach_dir, filename)
                if os.path.isfile(filepath):
                    break
                total += 1
                print "Currently loading: " + filename + "       "
                sys.stdout.write("\033[F")
                time.sleep(0.1)
                with open(filepath, "wb") as fp:
                    fp.write(part.get_payload(decode=True))

        if file_found:
            break

    logging.info("Imported {0} file(s) in {1}".format(total, time_delta(started)))
    imapsession.close()
    imapsession.logout()
Exemplo n.º 8
0
def main(username=None, password=None, lastmail=False):

    username = username or os.environ.get('MAILUSER')
    password = password or os.environ.get('MAILPASS')
    if not username:
        logging.error(
            "Username not set. Please set env var MAILUSER or use the --username argument"
        )
    if not password:
        logging.error(
            "Password not set. Please set env var MAILPASS or use the --password argument"
        )
    if not username or not password:
        exit()

    imapsession = imaplib.IMAP4_SSL('imap.gmail.com')
    try:
        imapsession.login(username, password)
    except imaplib.IMAP4.error:
        logging.error('Bad credentials, unable to sign in!')
        exit()

    try:
        imapsession.select(mail_dir)
        typ, data = imapsession.search(None, 'ALL')
    except imaplib.IMAP4.error:
        logging.error('Error searching given mailbox: %s' % mail_dir)
        exit()

    file_found = False
    listdir = os.listdir(detach_dir)

    is_empty = len(listdir) <= 1 or not lastmail
    total = 0

    # Iterating over all emails
    started = datetime.now()
    logging.info("Login successful! Importing files, please hold...")
    for msgId in data[0].split():
        typ, message_parts = imapsession.fetch(msgId, '(RFC822)')
        if typ != 'OK':
            logging.error('Error fetching mail.')
            raise

        email_body = message_parts[0][1]
        mail = email.message_from_string(email_body)
        mtime = datetime.strptime(mail['Date'][:-6], '%a, %d %b %Y %H:%M:%S')

        if not is_empty:
            # Accident folder is not empty, we only need the latest
            if datetime.now() - mtime < timedelta(hours=4):
                file_found = True
            else:
                continue

        # Handles Gmail bug which hasn't physically removed some of the deleted files
        mail_date = datetime(2015, 10, 06, 10)
        if mtime < mail_date:
            continue

        for part in mail.walk():
            if part.get_content_maintype(
            ) == 'multipart' or part.get('Content-Disposition') is None:
                continue
            filename = part.get_filename()

            if bool(filename) and filename.endswith(".csv"):
                filename = 'UH-{0}_{1}-{2}.csv'.format(mtime.date(),
                                                       mtime.hour,
                                                       mtime.minute)
                filepath = os.path.join(detach_dir, filename)
                if os.path.isfile(filepath):
                    break
                total += 1
                print 'Currently loading: ' + filename + '       '
                sys.stdout.write("\033[F")
                time.sleep(0.1)
                with open(filepath, 'wb') as fp:
                    fp.write(part.get_payload(decode=True))

        if file_found:
            break

    logging.info("Imported {0} file(s) in {1}".format(total,
                                                      time_delta(started)))
    imapsession.close()
    imapsession.logout()
Exemplo n.º 9
0
                continue
            self.cache[prop] = [response.get('daily').get('data')[0].get(prop)]


if __name__ == '__main__':
    temperature_client = TemperatureClient('https://api.darksky.net/forecast')
    properties = [
        'time', 'sunriseTime', 'sunsetTime', 'temperatureHigh', 'dewPoint',
        'humidity', 'windSpeed', 'cloudCover'
    ]
    start_date = starting_date()
    end_date = start_date + year()
    # locations = ['USA_AK_FAIRBANKS', 'USA_CA_LOS_ANGELES', 'USA_IL_CHICAGO-OHARE', 'USA_MN_MINNEAPOLIS', 'USA_TX_HOUSTON', 'USA_WA_SEATTLE']
    # locations = ['USA_AK_FAIRBANKS','USA_CA_LOS_ANGELES', 'USA_IL_CHICAGO-OHARE', 'USA_MN_MINNEAPOLIS', 'USA_TX_HOUSTON', 'USA_WA_SEATTLE']
    locations = [
        'USA_NV_LAS_VEGAS', 'USA_CA_SAN_FRANCISCO', 'USA_AZ_PHOENIX',
        'USA_GA_ATLANTA', 'USA_MD_BALTIMORE', 'USA_CO_BOULDER'
    ]
    for location in locations:
        temperature_client.cache = {}
        coordinates = get_lat_lng(location)
        current_date = start_date
        while current_date <= end_date:
            response = temperature_client.get_data(coordinates + ',' +
                                                   str(current_date))
            temperature_client.parse_data(response, properties)
            current_date = current_date + time_delta()
        data = temperature_client.get_cache()
        temperature_data = pd.DataFrame.from_dict(data)
        temperature_data.to_csv('data/' + location + '_temperature_usage.csv')