def cleanKeyWords(): """ inserts the distinct words and occurence into the table """ conn = db.getDBConnection() cursor = conn.cursor() sql = "select word from keywords1" rows = db.executeSQL(conn, sql) wordMap = {} for row in rows: word = row[0] if word in wordMap: count = wordMap.get(word) wordMap[word] = count + 1 else: wordMap[word] = 1 counter = 1 for key in wordMap.keys(): if (util.emptyString(key) == 0): sql1 = "insert into clean_keywords1 values (" + str( counter) + ",'" + key + "'," + str( wordMap[key]) + "," + str(0) + ")" print sql1 cursor.execute(sql1) conn.commit() counter = counter + 1
def cleanKeyWords(): """ inserts the distinct words and occurence into the table """ conn = db.getDBConnection() cursor = conn.cursor() sql = "select word from keywords1" rows= db.executeSQL(conn, sql) wordMap = {} for row in rows: word = row[0] if word in wordMap: count=wordMap.get(word) wordMap[word] = count+1 else: wordMap[word] = 1 counter = 1 for key in wordMap.keys(): if (util.emptyString(key) ==0): sql1="insert into clean_keywords1 values ("+str(counter)+",'"+key+"',"+str(wordMap[key])+","+str(0)+")" print sql1 cursor.execute(sql1) conn.commit() counter = counter+1
def populateRepoTable(obj, conn): """ Inserts the data into the table input: obj- json object conn- database connection """ try: cursor = conn.cursor() user = obj['actor'] if type(user) == dict: user = user['login'] loginName = '' repoUrl ='' repoName = '' repoId =0 language ='' repoDesc ='' global repoIdCounter try: loginName = obj['actor_attributes']['login'] except Exception as e: try: loginName = obj['actor']['login'] except Exception as e: print e, sys.exc_traceback.tb_lineno print e, sys.exc_traceback.tb_lineno pass try: repoId = obj['repository']['id'] except Exception as e: try: repoId = obj['repo']['id'] except Exception as e: pass print e, sys.exc_traceback.tb_lineno pass try: repoUrl = obj['url'] except Exception as e: print e, sys.exc_traceback.tb_lineno pass try: repoName = obj['repository']['name'] if '/' in repoName: repoUrl = 'https://github.com/' + repoName repoName = repoName.split("/")[1] except Exception as e: try: repoName = obj['repo']['name'] if '/' in repoName: repoUrl = 'https://github.com/' + repoName repoName = repoName.split("/")[1] except Exception as e: pass print e, sys.exc_traceback.tb_lineno pass if repoUrl=='': if loginName!='' and repoName!='': repoUrl = 'https://github.com/' + loginName+"/"+repoName try: repoDesc = obj['repository']['description'] except Exception as e: try: repoDesc = obj['repo']['description'] except Exception as e: try: repoDesc = obj['payload']['description'] except Exception as e: print e print e, sys.exc_traceback.tb_lineno pass try: createdAt = obj['created_at'] except Exception as e: try: createdAt = obj['repo']['created_at'] except Exception as e: pass print e, sys.exc_traceback.tb_lineno pass print 'desc::' + repoDesc if not util.emptyString(repoDesc): sql = "INSERT INTO repository VALUES ("+str(repoIdCounter)+",'"+repoName+"','"+mdb.escape_string(repoUrl)+"','"+repoDesc+"','"+loginName+"','"+language +"',"+ str(util.getFloatTime(createdAt))+")" print sql try: cursor.execute(sql) conn.commit() repoIdCounter = repoIdCounter+1 except Exception as e: print e except Exception as e: print 'Error in line:' print e pass