def export_db_to_log(my_date):

    oem_name='Shabik_360'
    stat_category='friend_relation'

    current_date=helper_regex.translate_date(sg_timestamp=my_date,timezone_offset_to_sg=config.timezone_offset_shabik_360)

    start_time=helper_regex.time_add_by_hour(current_date+' 00:00:00',-3)
    end_time=helper_regex.time_add_by_hour(current_date+' 00:00:00',-3+24)

    source_db=config.conn_stc_1
    source_table='friendship.friendship'
    source_condition_name='adding_friend'

    target_log_name='E:\\RoutineScripts\\log\\%s_%s_%s_record_from_%s.log.%s' \
                    % (oem_name,stat_category,source_condition_name,source_table,current_date)

    start_id=helper_mysql.guess_pk_id_by_time(table_name=source_table, \
                                          target_time=start_time,pk_column_name='id', \
                                          sequence_time_column_name='created_on',db_conn=config.conn_stc_1)

    end_id=helper_mysql.guess_pk_id_by_time(table_name=source_table, \
                                          target_time=end_time,pk_column_name='id', \
                                          sequence_time_column_name='created_on',db_conn=config.conn_stc_1)

    sql=r'''
    
    select id,created_on,user_id,friend_id
    from friendship.friendship
    where id>=%s and id<%s
    and following=1
    order by id asc

    ''' % (start_id,end_id)

    existing_adding=set([])

    target_log=codecs.open(target_log_name,'w','utf-8',None,1024*1024*4)

    rows=helper_mysql.fetch_rows(sql=sql,db_conn=config.conn_stc_1)

    for r in rows:
        if (r['friend_id'],r['user_id']) in existing_adding:
            existing_adding.remove((r['friend_id'],r['user_id']))
            continue
        
        t=helper_regex.time_add_by_hour(r['created_on'],8)
        
        target_log.write('%s [[friend add]] monetid=%s to=%s\n' % \
                            (t,r['user_id'],r['friend_id']))

        existing_adding.add((r['friend_id'],r['user_id']))

    target_log.close()

    print "start_id,end_id:",start_id,end_id
示例#2
0
def duplicate_record(item,target_date):

    step=10000
    current_idx=0
    helper_mysql.quick_insert=True
    helper_mysql.print_log=False

    sql=r"select * from %s where `oem_name`='%s' and `category`='%s'" % (item[0],item[1],item[2])
    if item[3]:
        sql+=r" and `key`='%s'" % (item[3],)
    sql+=r" and `date`='%s'" % (target_date,)

    for i in range(1000):
        counter=1

        sql_limit=r" limit %s,%s" % (current_idx,step)

        #print sql+sql_limit

        rows=helper_mysql.fetch_rows(sql+sql_limit,db_conn=item[4])
        if not rows:
            print 'end.'
            break
        
        print target_date,current_idx,len(rows),item
        
        #do copy
        
        for row in rows:
            helper_mysql.put_raw_data(
                oem_name=row['oem_name'],
                category=row['category'],
                key=row['key'],
                sub_key=row['sub_key'],
                value=row['value'],
                table_name=item[0],
                date=row['date'],
                created_on=row['created_on'],
                db_conn=item[5])

            counter+=1
            #print counter
            
        current_idx+=step
def calculate_collection(oem_name,category,db_name='raw_data',date_units_nday_unique=[600],date_units_retain_rate=[1,2,3]):
    


    #fetch target keys

    config.collection_cache_enabled=True
    



    target_collection_name_sql=r'''

    select 

    distinct `oem_name`
    ,`category`
    ,replace(`key`,'_average','') as `key`
    ,`sub_key`

    from `raw_data_shabik_360`

    where `oem_name`="Shabik_360" and `category`="moagent" and `key`="app_page_daily_visitor_unique"

    '''


    keys=helper_mysql.fetch_rows(target_collection_name_sql)

    print keys
    #exit()


    
    #do action

    for key_space in keys:
        print key_space
        calculate_ndays_unique(key_space,db_name,date_units_nday_unique)
for table_name in target_tables:
    
    #current_max_id=helper_mysql.get_raw_data(oem_name='Stat_Portal',category='data_migrate',key='max_transfered_id',sub_key=table_name,default_value=0,table_name='raw_data_debug',date='',db_conn=None)
    
    current_start_id=helper_mysql.get_raw_data(oem_name='Stat_Portal',category='data_migrate',key='max_transfered_id',sub_key=table_name,default_value=0,table_name='raw_data_debug',date='',db_conn=None)

    source_step=10000
    target_step=10000

    for i in range(0,100000000):
        current_start_id+=1
        print '===now at:',table_name,current_start_id

        source_rows=helper_mysql.fetch_rows(sql=r'''
        select * from %s where id>=%s order by id limit %s
        ''' % (table_name,current_start_id,source_step),db_conn=source_conn)
        if not source_rows:
            print '===finished at:',table_name,current_start_id
            print helper_regex.get_time_str_now()
            break        

        print 'length of source:',len(source_rows)
        print helper_regex.get_time_str_now()

        sql_temp=[]

        for row in source_rows:
            sql_temp.append("('%s','%s','%s','%s','%s')" % \
            (row['id'],row['created_on'],row['element_count'],row['element_string_md5'],helper_mysql.escape_string(row['element_string']),))
            current_start_id=max(current_start_id,int(row['id']))
示例#5
0
@author: development
'''
import helper_mysql
import urllib
import os.path
_conn_sticker={
    'host':'m1-mysql-master-03.mozat.com',
    'port':'3333',
    'account':'mozone',
    'pwd':'morangerunmozone', 
    'db':'sticker',
    'db_type':'mysql',
    }

sql = 'select * from sticker.sticker'
sticker= helper_mysql.fetch_rows(sql, _conn_sticker)
print len(sticker)
for row in sticker:
    print row['id']
    print row['url']
    f_name='./%s.png'%(row['id'])
    if os.path.isfile(f_name):
        continue
    f = open(f_name,'wb')
    f.write(urllib.urlopen(row['url']).read())
    f.close()
#    break    
print 'finish'


if __name__ == '__main__':
    source_step=50000
    #target_step=50000
    #current_start_id=20000000

    for i in xrange(0,100000001):
        time.sleep(3)
        current_start_id+=1
        print '===now at:',table_name,current_start_id

        sql=r'''
        select id,user_id,friend_id,0+following as following,0+followed as followed,0+blocking as blocking,0+blocked as blocked,0+flags as flags,created_on,modified_on
        from %s 
        where id>=%s and id<%s+%s
        ''' % (table_name,current_start_id,current_start_id,source_step)

        source_rows=helper_mysql.fetch_rows(sql,db_conn=source_conn)
        print sql
        if not source_rows:
            print '===finished at:',table_name,current_start_id
            print helper_regex.get_time_str_now()
            break

        print 'length of source:',len(source_rows)
        print helper_regex.get_time_str_now()

        sql_temp=[]

        for row in source_rows:
            sql_temp.append("('%s','%s','%s','%s','%s','%s','%s','%s','%s','%s')" % \
                (row['id'],row['user_id'],row['friend_id'],row['following'],row['followed'],row['blocking'], \
                row['blocked'],row['flags'],row['created_on'],row['modified_on'],))
    begin_date='2011-07-11'
    step=1000

    for i in range(0,1000):
        current_date=helper_regex.date_add(begin_date,-i)
        print table_name,current_date

        r'''
        select * 
        from %s 
        where date='%s' and `oem_name`="Vodafone" and `category`="website"
        ''' % (table_name,current_date)

        source_rows=helper_mysql.fetch_rows(sql=r'''
        select * 
        from %s 
        where date='%s' and `oem_name`="Vodafone" and `category`="website"
        ''' % (table_name,current_date),db_conn=source_conn)
        

        #exit()
        print 'length of source:',len(source_rows)
        print helper_regex.get_time_str_now()

        sql_temp=[]
        for row in source_rows:
            sql_temp.append("('%s','%s','%s','%s','%s','%s','%s')" % \
            (row['oem_name'],row['category'],row['key'],helper_mysql.escape_string(row['sub_key']),row['date'],row['value'],row['created_on'],))

        for i in range(0,100000001,step):
            print helper_regex.get_time_str_now()