def progbar(ip): r = redis.Redis(ip) initial = r.llen('blogs') prog = ProgressBar(0, initial, 77, mode='fixed', char='#') while True: prog.update_amount(initial - r.llen('blogs')) print prog, '\r', sys.stdout.flush() if initial - r.llen('blogs') == initial: break return
def read_callstack_dictionary(fname): stack_dict = {} f_dict = open(fname, "rb") s_int = struct.Struct('I') version, = s_int.unpack(f_dict.read(4)) print "version: %d" % version s = f_dict.read(4) prog = ProgressBar(0, os.path.getsize(fname), 77, mode='fixed', char='#', autoprint=True) while len(s) == 4: hash, = s_int.unpack(s) num_frames, = s_int.unpack(f_dict.read(4)) stack_id = len( stack_dict ) + 1 # use to make more human readable callstack names in the report stack = Callstack(hash, stack_id) for i in range(num_frames): address, = s_int.unpack(f_dict.read(4)) stack.calls.append(address) stack.prepare() stack_dict[hash] = stack s = f_dict.read(4) prog.update_amount(f_dict.tell()) print "\n" print "callstacks: %d" % len(stack_dict) f_dict.close() return stack_dict
def get_social_media(self, company_list, db_filename): """ Call scraper to get company social media data @type company_list: list @param company_list: the CompanyURL object list @rtype: list @return: the CompanySocialMedia object list """ # Define a progress bar on console limit = len(company_list) prog = ProgressBar(0, limit, 70, mode='fixed') oldprog = str(prog) i = 0 # Call twitter api in batch mode company_dict = {} for company in company_list: company_sm_data = CompanySocialMedia(company.company_name) if scraper.check_url(company.tw_url, 'twitter.com'): twitter_id = scraper.get_twitter_id(company.tw_url) if twitter_id: company_dict[twitter_id] = company_sm_data count = 0 twitter_ids = '' twitter_user_list = [] for k, v in company_dict.iteritems(): twitter_ids = twitter_ids + ',' + k count += 1 if count == 100: # Remove the first ',' twitter_ids = twitter_ids[1:] # call api twitter_user_list.extend(simple_twitter_api.UsersLookup(twitter_ids)) twitter_ids = '' count = 0 twitter_user_list.extend(simple_twitter_api.UsersLookup(twitter_ids)) twitter_user_dict = simple_twitter_api.build_dict(twitter_user_list) result = [] current_datetime = datetime.now() for company in company_list: company_sm_data = CompanySocialMedia(company.company_name) fb_data = scraper.scrap_facebook_raw_data(company.fb_url) # If can not get fb data from html, just try to get it from graph api if fb_data['likes'] == 0 and fb_data['talking_about_count'] == 0 and fb_data['checkins'] == 0: fb_data = scraper.fb_scrape(company.fb_url) # Get max checkins from previous records fb_data['checkins']=getMaxCheckins(company.company_name, db_filename) if fb_data['likes'] == 0: fb_data['likes'] = getMaxLikes(company.company_name, db_filename) if fb_data['talking_about_count'] == 0: fb_data['talking_about_count'] = getMaxTalkingAboutCount(company.company_name, db_filename) fb_data = handleFBData(fb_data) #tw_data = scraper.tw_scrape(company.tw_url) data = {'twitter_id': '', 'followers_count': 0, 'tweets': 0} if scraper.check_url(company.tw_url, 'twitter.com'): tw_id = scraper.get_twitter_id(company.tw_url) data['twitter_id'] = tw_id tw_data = twitter_user_dict.get(tw_id.lower(), data) else: tw_data = data yt_data = scraper.yt_scrape(company.yt_url) company_sm_data.fb_likes = fb_data['likes'] company_sm_data.fb_talking_about_count = fb_data['talking_about_count'] company_sm_data.fb_checkins = fb_data['checkins'] company_sm_data.tw_followers_count = tw_data['followers_count'] company_sm_data.tw_tweets = tw_data['tweets'] company_sm_data.yt_subscriber_count = yt_data['subscriber_count'] company_sm_data.yt_view_count = yt_data['view_count'] #log.debug('%d, %d, %d' % (company_sm_data.fb_likes, company_sm_data.fb_talking_about_count, company_sm_data.fb_checkins)) fb_metrics = calculator.cal_fb_hm(company_sm_data.fb_likes, company_sm_data.fb_talking_about_count, company_sm_data.fb_checkins) tw_metrics = calculator.cal_tw_hm(tw_data['twitter_id'], company_sm_data.tw_followers_count, company_sm_data.tw_tweets) yt_metrics = calculator.cal_yt_hm(company_sm_data.yt_subscriber_count, company_sm_data.yt_view_count) micro_metrics = calculator.cal_macro_metrics(fb_metrics['fb_health'], tw_metrics['tw_health'], yt_metrics['yt_health']) company_sm_data.fb_metrics = fb_metrics company_sm_data.tw_metrics = tw_metrics company_sm_data.yt_metrics = yt_metrics company_sm_data.micro_metrics = micro_metrics # Keep same time_taken for this batch operation company_sm_data.time_taken = current_datetime result.append(company_sm_data) # Print a progress bar on console i += 1 prog.update_amount(i) if oldprog != str(prog): print str(prog), '\r', sys.stdout.flush() oldprog=str(prog) return result
def read_event_log(fname): event_list = [] fin = open(fname, "rb") s_int = struct.Struct('I') version, = struct.unpack('I', fin.read(4)) print "version: %d" % version #~ typedef struct #~ { #~ unsigned int event_size; // size of complete chunk (header + payload) #~ unsigned int event_type; // type of payload #~ unsigned int request_id; // sequence number of the event or id #~ unsigned int stack_id; // event callstack hash #~ unsigned int thread_id; // id of thread causing the event #~ } MemTrackEvent_Header; s_header = struct.Struct('IIIII') prog = ProgressBar(0, os.path.getsize(fname), 77, mode='fixed', char='#', autoprint=True) hook_event_count = 0 unknown_event_count = 0 count = 0 s = fin.read(s_header.size) while len(s) == s_header.size and len( event_list) < MemTrack.event_log_read_limit: count += 1 header = s_header.unpack(s) event = Event() event.event_size = header[0] event.event_type = header[1] event.request_id = header[2] event.stack_id = header[3] event.thread_id = header[4] if event.event_type == Event.eAlloc: #~ MemTrackEvent_Alloc #~ void* pData; #~ size_t allocSize; event.pData, = s_int.unpack(fin.read(4)) event.allocSize, = s_int.unpack(fin.read(4)) event_list.append(event) elif event.event_type == Event.eRealloc: #~ MemTrackEvent_Realloc #~ void* pData; #~ size_t allocSize; #~ void* pDataPrev; // used by realloc to link to original allocation event.pData, = s_int.unpack(fin.read(4)) event.allocSize, = s_int.unpack(fin.read(4)) event.pDataPrev, = s_int.unpack(fin.read(4)) event_list.append(event) elif event.event_type == Event.eFree: # free #~ MemTrackEvent_Free #~ void* pData; event.pData, = s_int.unpack(fin.read(4)) #todo # it is useful to for 'free' blocks to also have an 'allocSize" field that represents how much #memory it was freeing (this could be part of the capture stream for CRT debug captures) # in place of that we annotate this event with the amount free'd during the simulation phase event.allocSize = 0 event_list.append(event) elif event.event_type == Event.eMarker: #~ MemTrackEvent_Marker #~ int marker_type #~ char * marker_string event.marker_type, = s_int.unpack(fin.read(4)) marker_string = "" event.marker_string = "" while True: char = fin.read(1) if char == "\0": break marker_string += char event.marker_string = marker_string event_list.append(event) elif event.event_type == Event.eHook: #@todo hook events are used for debugging and validation #they should be removed from the stream for normal use # Do we add these to a separate list? hook_event_count += 1 fin.read(event.event_size - 20) else: #unknown chunk, skip, don't add to event list unknown_event_count += 1 fin.read(event.event_size - 20) s = fin.read(s_header.size) prog.update_amount(fin.tell()) print "\n" print "events that wil be tracked: %d" % len(event_list) print "events total in log: %d" % count print "events unknown: %d" % unknown_event_count if hook_event_count: print "events hook: %d" % hook_event_count print "WARNING: event log contains 'hook' events used for debugging and validation. These should not be output by the runtime under normal usage\n" fin.close() return event_list