Exemplo n.º 1
0
def cleanKeyWords():
    """
    inserts the distinct words and occurence into the table
    """
    conn = db.getDBConnection()
    cursor = conn.cursor()
    sql = "select word from keywords1"
    rows = db.executeSQL(conn, sql)
    wordMap = {}
    for row in rows:
        word = row[0]
        if word in wordMap:
            count = wordMap.get(word)
            wordMap[word] = count + 1
        else:
            wordMap[word] = 1
    counter = 1
    for key in wordMap.keys():
        if (util.emptyString(key) == 0):
            sql1 = "insert into clean_keywords1 values (" + str(
                counter) + ",'" + key + "'," + str(
                    wordMap[key]) + "," + str(0) + ")"
            print sql1
            cursor.execute(sql1)
            conn.commit()
            counter = counter + 1
Exemplo n.º 2
0
def cleanKeyWords():
    """
    inserts the distinct words and occurence into the table
    """
    conn = db.getDBConnection()
    cursor = conn.cursor()
    sql = "select word from keywords1"
    rows= db.executeSQL(conn, sql)
    wordMap = {} 
    for row in rows:
        word = row[0]
        if word in wordMap:
            count=wordMap.get(word)
            wordMap[word] = count+1
        else:
            wordMap[word] = 1
    counter = 1
    for key in wordMap.keys():
        if (util.emptyString(key) ==0):
            sql1="insert into clean_keywords1 values ("+str(counter)+",'"+key+"',"+str(wordMap[key])+","+str(0)+")"
            print sql1
            cursor.execute(sql1)
            conn.commit()
            counter = counter+1
Exemplo n.º 3
0
def populateRepoTable(obj, conn): 
    """
    Inserts the data into the table
    input: obj- json object
    conn- database connection
    """
    try:
        cursor = conn.cursor()

        user = obj['actor']
        if type(user) == dict:
            user = user['login']
        loginName = ''
        repoUrl =''
        repoName = ''
        repoId =0
        language =''
        repoDesc =''
        global repoIdCounter
        try:    
            loginName = obj['actor_attributes']['login']
        except Exception as e:
            try:
                loginName = obj['actor']['login']
            except Exception as e:
                print e, sys.exc_traceback.tb_lineno 
            print e, sys.exc_traceback.tb_lineno 
            pass
        try:    
            repoId = obj['repository']['id']
        except Exception as e:
            try:    
                repoId = obj['repo']['id']
            except Exception as e:
                pass
            print e, sys.exc_traceback.tb_lineno 
            pass
        try:
            repoUrl = obj['url']
        except Exception as e:
            print e, sys.exc_traceback.tb_lineno 
            pass    
        try:
            repoName = obj['repository']['name']
            
            if '/' in repoName:
                repoUrl = 'https://github.com/' + repoName
                repoName = repoName.split("/")[1]
        except Exception as e:
            try:
                repoName = obj['repo']['name']
                if '/' in repoName:
                    repoUrl = 'https://github.com/' + repoName
                    repoName = repoName.split("/")[1]
            except Exception as e:
                pass
            print e, sys.exc_traceback.tb_lineno 
            pass
        
        if repoUrl=='':
            if loginName!='' and repoName!='':
                repoUrl = 'https://github.com/' + loginName+"/"+repoName
        
        try:
            repoDesc = obj['repository']['description']
        except Exception as e:
            try:
                repoDesc = obj['repo']['description']
            except Exception as e:
                try:
                    repoDesc = obj['payload']['description']
                except Exception as e:
                    print e
            print e, sys.exc_traceback.tb_lineno 
            pass
        try:
            createdAt = obj['created_at']
        except Exception as e:
            try:
                createdAt = obj['repo']['created_at']
            except Exception as e:
                pass
            print e, sys.exc_traceback.tb_lineno 
            pass
        print 'desc::' + repoDesc
        
        
        if not util.emptyString(repoDesc):
            sql = "INSERT INTO repository VALUES ("+str(repoIdCounter)+",'"+repoName+"','"+mdb.escape_string(repoUrl)+"','"+repoDesc+"','"+loginName+"','"+language +"',"+ str(util.getFloatTime(createdAt))+")"
            print sql
            try:
                cursor.execute(sql)
                conn.commit()
                repoIdCounter = repoIdCounter+1
            except Exception as e:
                print e
            
    except Exception as e:
        print 'Error in line:'
        print e
        pass