Exemplo n.º 1
0
def load_incidence_data(db):
    url = 'http://www.who.int/entity/immunization/monitoring_surveillance/data/incidence_series.xls?ua=1'
    output_file = 'incidences.xls'
    download_xls.get_file(url, output_file)
    contents = download_xls.read_file(output_file)

    print 'Inserting incidence data to db'

    c = db.cursor()

    for sheet in contents:

        if sheet in ['Readme Incidence', 'Reg_&_Global_Incidence']:
            continue

        rows = contents.get(sheet)

        headers = []
        for i in range(len(rows)):
            if i == 0:
                headers = rows[i]
            else:
                data = rows[i]
                region = data[0]
                iso_code = data[1]
                country = data[2]
                disease = data[3]

                for j in range(4, len(data)):
                    year = headers[j]
                    value = data[j]
                    if value:
                        SQLite3.insert(c, 'incidents', (region, iso_code, country, disease, year, value))
        db.commit()
Exemplo n.º 2
0
def save_complete_map():
    db = SQLite3('data.db')
    c = db.cursor()
    df = pd.read_csv('country_map_complete.csv')

    for i in df.index:
        who_country = df.ix[i]['who_country']
        p_country = df.ix[i]['p_country']

        SQLite3.insert(c, 'countries', [who_country, p_country])

    db.commit()
Exemplo n.º 3
0
def get_populations():

    populations = []

    db = SQLite3('data.db')
    c = db.cursor().execute("""
    SELECT DISTINCT year,
                    population_country_name as country
      FROM (
               SELECT year,
                      country_name
                 FROM coverage
               UNION ALL
               SELECT year,
                      country_name
                 FROM incidents
           )
           LEFT OUTER JOIN
           countries ON who_country_name = country_name
     WHERE population_country_name IS NOT NULL
     ORDER BY population_country_name,
              year;
    """)

    rows = c.fetchall()

    bar = pyprind.ProgBar(len(rows), title='Downloading population data')
    i = 0
    for row in rows:

        country = row['country']
        year = row['year']

        url = 'http://api.population.io:80/1.0/population/%s/%s/' % (year, country)
        r = requests.get(url)
        r.raise_for_status()
        entries = r.json()
        total = 0
        for entry in entries:
            total += entry['total']

        data = (country, year, total)
        populations.append(data)
        bar.update()

    print 'Inserting population data into db'
    SQLite3.insertmany(c, 'populations', populations)
    db.commit()
Exemplo n.º 4
0
def cluster(similarity=pearson):
    db = SQLite3().cursor()
    pins = Pins()
    words = Words()
    maxcount = pins.size()
    labels = [r[0] for r in words.set()]
    offset = 0
    n = 1
    sum_num = sum([i for i in range(1, maxcount)])
    while True:
        pin_a = pins.find_by_offset(offset)
        pin_a_words = [w[0] for w in words.find_by_pinid(pin_a[0])]
        pin_a_wordcount = [pin_a_words.count(w) for w in list(set(labels))]
        # calculate distance of two pins.
        for i in range(offset + 1, maxcount):
            pin_b = pins.find_by_offset(i)
            pin_b_words = [w[0] for w in words.find_by_pinid(pin_b[0])]
            pin_b_wordcount = [pin_b_words.count(w) for w in list(set(labels))]
            # calculate distance of two pins
            print '[%s] %s / %s calculate score of %s and %s' % (
                datetime.today().strftime('%Y-%m-%d %H:%M:%S'), n, sum_num,
                pin_a[0], pin_b[0])
            sim = 1.0 - similarity(pin_a_wordcount, pin_b_wordcount)
            # save distance to database for cache
            clusters = Clusters()
            clusters.data['pin_id_a'] = pin_a[0]
            clusters.data['pin_id_b'] = pin_b[0]
            clusters.data['score'] = sim
            clusters.save()
            n += 1
        if offset >= maxcount - 1:
            break
        offset += 1
Exemplo n.º 5
0
 def get_candidates(self, offset=0, n=5, threshold=0.8):
     db = SQLite3().cursor()
     m = [r for r in db.execute('''
     select
         pins_a.pin_id,
         pins_a.image_url
     from
         clusters
     inner join
         pins pins_a on clusters.pin_id_a = pins_a.pin_id
     where
         clusters.score > ?
     order by clusters.score desc
     ''', (threshold,))]
     m = list(set(m))
     m.sort()
     m.reverse()
     return m[offset:offset+n]
Exemplo n.º 6
0
 def get_top_matches(self, pin_id, n=5, threshold=0.8):
     q1 = '''
     select
         clusters.score,
         pins_a.image_url,
         pins_b.image_url,
         pins_b.pin_id
     from
         clusters
     inner join
         pins pins_a on clusters.pin_id_a = pins_a.pin_id
     inner join
         pins pins_b on clusters.pin_id_b = pins_b.pin_id
     where
         clusters.pin_id_a = ?
     AND clusters.score > ?
     order by clusters.score desc
     limit ?
     '''
     q2 = '''
     select
         clusters.score,
         pins_b.image_url,
         pins_a.image_url,
         pins_a.pin_id
     from
         clusters
     inner join
         pins pins_a on clusters.pin_id_a = pins_a.pin_id
     inner join
         pins pins_b on clusters.pin_id_b = pins_b.pin_id
     where
         clusters.pin_id_b = ?
     AND clusters.score > ?
     order by clusters.score desc
     limit ?
     '''
     db = SQLite3().cursor()
     m = [r for r in db.execute(q1, (pin_id, threshold, n))] +\
         [r for r in db.execute(q2, (pin_id, threshold, n))]
     m = list(set(m))
     m.sort()
     m.reverse()
     return m[:n]
Exemplo n.º 7
0
 def exists(self):
     data = (self.data['pin_id'],)
     db = SQLite3().cursor()
     db.execute("select count(*) from pins where pin_id=?", data)
     return db.fetchone()[0]
Exemplo n.º 8
0
 def __init__(self):
     self.db = SQLite3()
Exemplo n.º 9
0
 def exists(self):
     data = (self.data['pin_id_a'], self.data['pin_id_b'])
     db = SQLite3().cursor()
     db.execute("select count(*) from clusters where pin_id_a=? and pin_id_b=?", data)
     return db.fetchone()[0]
Exemplo n.º 10
0
 def exists(self):
     data = (self.data['pin_id'], self.data['user'], self.data['word'])
     db = SQLite3().cursor()
     db.execute("select count(*) from words where pin_id=? and user=? and word=?", data)
     return db.fetchone()[0]