age_to_plug = 0 else: age_to_plug = int(formatted_res_age) #print ">", age_to_plug age_list_.append(age_to_plug) age_to_plug=0 formatted_res_age='' tupToRet =(age_list_, country_list_) return tupToRet print "Started at: ", utility.giveTimeStamp() reputationList = preProcessQuestionInfo(getReputations(), 'REP') print "******************************" ### Reputation print "Count of users with reputation" print len(reputationList) print "Reputation stats:" st_ = utility.giveStat(reputationList) print st_ lh_threhold=st_[3] # high reputation high_users = preProcessQuestionInfo(getHighUsers(lh_threhold), 'Id') high_users.sort() print "Count of highly reputed users"
convert fitted matrix to pandas dataframe: not using CSV, dumping list of strings as pickle in line#80 ''' # df_ = utility.dumpTransformedTokenMatrixToCSV(transformed_features, feature_names, reproc_dump_output_file) # print df_.shape # print 'Dumping completed ...' ''' and then call prediction module ''' tokenization_predictor.performPrediction(iterDumpDir, all_features, labels, feature_names, count_vec_flag_param) print "=" * 100 if __name__ == '__main__': print "Started at", utility.giveTimeStamp() print "-" * 125 dir2save = '/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/output/' # dataset_file="/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/dataset/MIRANTIS_FULL_DATASET.csv" # reproc_dump_output_file= '/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/reproc/TFIDF_MIR.dump' # theCompleteCategFile='/Users/akond/Documents/AkondOneDrive/OneDrive/IaC-Defect-Categ-Project/output/Mirantis_Categ_For_DB.csv' # reproc_dump_output_file= '/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/reproc/TFIDF_MIR.csv' # dataset_file="/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/dataset/SYNTHETIC_MOZ_FULL_DATASET.csv" # reproc_dump_output_file= '/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/reproc/TFIDF_MOZILLA.dump' # theCompleteCategFile='/Users/akond/Documents/AkondOneDrive/OneDrive/IaC-Defect-Categ-Project/output/Mozilla.Final.Categ.csv' # reproc_dump_output_file= '/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/reproc/TFIDF_MOZILLA.csv' # dataset_file="/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/dataset/SYNTHETIC_OPENSTACK_FULL_DATASET.csv" # reproc_dump_output_file='/Users/akond/Documents/AkondOneDrive/OneDrive/stvr/reproc/TFIDF_OPENSTACK.dump'
if val_=='': countto_ret = 0.0 else: countto_ret= float(val_) return countto_ret def getIssueReportsList(author_id_param_list): resultantIssueReportList = [] for author_id_ in author_id_param_list: issue_report_count = preProcessCount(getIssueReportFromDB(author_id_)) resultantIssueReportList.append(issue_report_count) return resultantIssueReportList print "Starting at:", utility.giveTimeStamp() allTheFemales = preProcessauthorIDs(getAllFemaleAuthors()) print "Identified females:",len(allTheFemales) allTheMales = preProcessauthorIDs(getAllMaleAuthors()) print "Identified males:",len(allTheMales) issue_reports_for_females = getIssueReportsList(allTheFemales) status=utility.dumpContent(issue_reports_for_females, 'F_ALL') print "Dumped a file of {} bytes".format(status) print "Total Female reports",sum(issue_reports_for_females) issue_reports_for_males = getIssueReportsList(allTheMales) status=utility.dumpContent(issue_reports_for_males, 'M_ALL') print "Dumped a file of {} bytes".format(status) print "Total Male reports",sum(issue_reports_for_males) print "Ending at:", utility.giveTimeStamp()