示例#1
0
文件: repost.py 项目: CCPLab/miner
    def setattr(self,attr,v):
        if attr == 'created_at':
            str_original = None
            if isinstance(v,int):   #Epoch Time as int or datetime.datetime
                str_original = util.time2str(v)
            elif isinstance(v,datetime):
                v = util.time2epoch(v)
                str_original = util.time2str(v)
            else:   # str
                str_original = v
                v = util.time2epoch(v)  #创建日期,转换为基于东八区的Epoch整数格式
            #print '%25s\t:=\t%s' % ('c_at_or',v)
            self.__setattr__('c_at_or',str_original)

        #print '%25s\t:=\t%s' % (attr,v)
        self.__setattr__(attr,v)
示例#2
0
def dig_user(fpath):
    with codecs.open(fpath, 'r', encoding='utf-8-sig') as fp:
        statuses = json.load(fp,encoding='utf-8-sig')

        temp = {}

        for s in statuses:
            uid = s['user']['id']
            name = s['user']['screen_name']

            text = s['text']

            time = s['created_at']
            time = util.time2epoch(time)
            time = datetime.fromtimestamp(time)

            (year, week, _) = time.isocalendar()
            key = "%04d%02d" % (year,week)
            tval = temp.get(key,None)
            if tval is not None:
                temp[key] = tval + '\n' + text
            else:
                temp[key] = text

        output_file = '%s/%s.txt' % (output_dir, uid)
        sorted_k = sorted(temp.iterkeys())

        with codecs.open(output_file,'w',encoding='ascii') as fp:
            for k in sorted_k:
                v = temp[k]
                t = wenxin.TextMind()
                v = v.encode('utf-8')
                r = t.process_paragraph(v,encoding='utf-8')
                fp.write( "%s\t%s\n" % (k, repr(r)) )
示例#3
0
    def setattr(self, attr, v):
        if attr == 'created_at':
            str_original = None
            if isinstance(v, int):  #Epoch Time as int or datetime.datetime
                str_original = util.time2str(v)
            elif isinstance(v, datetime):
                v = util.time2epoch(v)
                str_original = util.time2str(v)
            else:  # str
                str_original = v
                v = util.time2epoch(v)  #创建日期,转换为基于东八区的Epoch整数格式
            #print '%25s\t:=\t%s' % ('c_at_or',v)
            self.__setattr__('c_at_or', str_original)

        #print '%25s\t:=\t%s' % (attr,v)
        self.__setattr__(attr, v)
示例#4
0
文件: profile.py 项目: CCPLab/miner
    def setattr(self, attr, v):
        if attr in ['province', 'city', 'verified_type'] and not isinstance(v, str):
            # 认证类别、省、市ID用用str存储
            v = str(v)
        elif attr == 'idstr' and v is None:
            v = str(id)
        elif attr == 'created_at':
            str_original = None
            if isinstance(v, int):   # Epoch Time as int or datetime.datetime
                str_original = util.time2str(v)
            elif isinstance(v, datetime):
                v = util.time2epoch(v)
                str_original = util.time2str(v)
            else:   # str
                str_original = v
                v = util.time2epoch(v)  # 创建日期,转换为基于东八区的Epoch整数格式
            # print '%25s\t:=\t%s' % ('created_at_or',v)
            self.__setattr__('created_at_or',str_original)

        # print '%25s\t:=\t%s' % (attr,v)
        self.__setattr__(attr,v)
示例#5
0
    def setattr(self, attr, v):
        if attr in ['province', 'city', 'verified_type'
                    ] and not isinstance(v, str):
            #认证类别、省、市ID用用str存储
            v = str(v)
        elif attr == 'idstr' and v is None:
            v = str(id)
        elif attr == 'created_at':
            str_original = None
            if isinstance(v, int):  #Epoch Time as int or datetime.datetime
                str_original = util.time2str(v)
            elif isinstance(v, datetime):
                v = util.time2epoch(v)
                str_original = util.time2str(v)
            else:  # str
                str_original = v
                v = util.time2epoch(v)  #创建日期,转换为基于东八区的Epoch整数格式
            #print '%25s\t:=\t%s' % ('created_at_or',v)
            self.__setattr__('created_at_or', str_original)

        #print '%25s\t:=\t%s' % (attr,v)
        self.__setattr__(attr, v)
示例#6
0
def get_statuses_groupby_week(uid):
    fields = ['created_at', 'text']
    result = defaultdict(str)
    for s in dSource.get_statuses(uid, fields):
        time = s.get('created_at')
        time = util.time2epoch(time)
        time = datetime.fromtimestamp(time)

        (year, week, _) = time.isocalendar()
        key = "%04d%02d" % (year,week)

        text = s.get('text')
        tval = result.get(key,None)
        if tval is not None:
            result[key] = tval + '\n' + text
        else:
            result[key] = text
    return result
示例#7
0
def get_statuses_groupby_week(uid):
    fields = ['created_at', 'text']
    result = defaultdict(str)
    for s in dSource.get_statuses(uid, fields):
        time = s.get('created_at')
        time = util.time2epoch(time)
        time = datetime.fromtimestamp(time)

        (year, week, _) = time.isocalendar()
        key = "%04d%02d" % (year, week)

        text = s.get('text')
        tval = result.get(key, None)
        if tval is not None:
            result[key] = tval + '\n' + text
        else:
            result[key] = text
    return result
示例#8
0
def get_status(uid):
    cur.execute('select created_at, text from sina_statuses where user_id=%s' % uid)
    result = defaultdict(str)
    for s in cur:
        time = s.get('created_at')
        time = util.time2epoch(time)
        time = datetime.fromtimestamp(time)

        (year, week, _) = time.isocalendar()
        key = "%04d%02d" % (year,week)

        text = s.get('text')
        tval = result.get(key,None)
        if tval is not None:
            result[key] = tval + '\n' + text
        else:
            result[key] = text
    return result
示例#9
0

if __name__ == '__main__':
    users = load_user()

    max_users = float('inf')
    index = 1

    fp = codecs.open(compare_user_path, mode='w', encoding='utf-8')

    for u, (name, gender, created_at) in users.iteritems():
        if index > max_users: break

        t0 = datetime.strptime(created_at, '%Y/%m/%d')
        t_m = t0 - timedelta(days=2)  #minus
        t_p = t0 + timedelta(days=2)  #plus

        fp.write('GROUP%s\t%s\t%s\t%s\t%s\n' %
                 (index, u, name, gender, created_at))
        results = pick_user(gender, t_m.strftime('%Y/%m/%d'),
                            t_p.strftime('%Y/%m/%d'))
        for r in results:
            u_created_at = util.time2epoch(r.created_at_or)
            u_created_at = time.strftime("%Y/%m/%d",
                                         time.localtime(u_created_at))

            fp.write('GROUP%s\t%s\t%s\t%s\t%s\n' %
                     (index, r.key, r.name, r.gender, u_created_at))

        fp.write('\n')
        index += 1