Пример #1
0
def run(geoentities):

    try:

        print "starting ai.predict"
        connection.close()

        start = datetime.now()        

        classifier = DNNLinearCombinedClassifier(
            model_dir=MODEL_DIR,
            linear_feature_columns=wide_columns,
            dnn_feature_columns=deep_columns,
            dnn_hidden_units=[100,50]
        )
        print "classifier:", classifier

        print "creating the classifier took", (datetime.now() - start).total_seconds(), "seconds"

        df = get_fake_df()
        print "about to populate data frame for prediction"
        start_df = datetime.now()
        
        for index, geoentity in enumerate(geoentities):
            place_id = geoentity.place_id
            name = geoentity.target

	    feature_admin_levels = set([g.admin_level for g in geoentities if g.admin_level and g.target == name])
            if feature_admin_levels:
                lowest_admin_level = min(feature_admin_levels)
            else:
                lowest_admin_level = -99

            population = g.population
            is_highest_population = population and population == max([g.population for g in geoentities if g.target == name]) or False

            admin_level = geoentity.admin_level
            df['admin_level'].append(str(geoentity.admin_level or "None"))
            df['cluster_frequency'].append(geoentity.cluster_frequency or 0)
            df['country_code'].append(geoentity.country_code or "UNKNOWN")
            df['country_rank'].append(geoentity.country_rank or 999)
            df['edit_distance'].append(str(geoentity.edit_distance))
            df['has_mpoly'].append(str(geoentity.has_mpoly or False))
            df['has_pcode'].append(str(geoentity.has_pcode or False))
            df['is_country'].append(str(admin_level == 0))
            df['is_lowest_admin_level'].append(str(lowest_admin_level == g.admin_level))
            df['is_highest_population'].append(str(is_highest_population))
            df['median_distance'].append(geoentity.median_distance_from_all_other_points)
            df['matches_topic'].append(str(geoentity.matches_topic or "False"))
            df['population'].append(geoentity.population)
            df['popularity'].append(geoentity.popularity)

        print "populating df took", ((datetime.now() - start_df).total_seconds() / 60), "minutes"

        for index, row in enumerate(classifier.predict_proba(input_fn=lambda: input_fn(df))):
            geoentities[index].probability = row[1]

    except Exception as e:
        fail("EXCPETION in scripts.ai.predict.run: " + str(e))