Exemplo n.º 1
0
def test():
    task1 = DMTask()
    val = {"name": "fake-followers", "action_type": "loop", "start": 6001000, "end": 6005000}
    task1.init_test("github", val)
    e1 = GithubFollowers(task1)
    e1.runTask()
    task1.remove()
Exemplo n.º 2
0
def init_commit_task(db, repo):
    task = DMTask()
    item = db["repositories"].find_one({"full_name": repo})
    if item:
        id = item["id"]
        val = {"name": "get_commit", "action_type": "loop", "start": repo, "end": id}
        task.init("github", val)
Exemplo n.º 3
0
def event(start, end):
    gap = 1000
    for i in range(start, end):
        task = DMTask()
        val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i + 1) * gap}
        task.init("github", val)
        r1 = GithubEvent(task)
        r1.runTask()
Exemplo n.º 4
0
def gen_event(start, end):
    gap = 1000
    for i in range(start, end):
        task = DMTask()
        val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i + 1) * gap}
        task.init("github", val)
        r1 = GithubEvent(task)
        file = "./TaskFiles/get_events_start_" + str(i * gap)
        r1.generateToFile(file)
Exemplo n.º 5
0
def import_event(start, end):
    gap = 1000
    for i in range(start, end):
        task = DMTask()
        val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i + 1) * gap}
        task.init("github", val)
        r1 = GithubEvent(task)
        file = "./TaskFinishedFiles/get_events_start_" + str(i * gap) + ".output"
        r1.runTaskFromFile(file)
Exemplo n.º 6
0
def init_repo_single_task(login):
    db = DMDatabase().getDB()
    task = DMTask()
    item = db["user"].find_one({"login": login})
    if item:
        val = {"name": "get_repos", "action_type": "single", "start": login, "end": item["id"]}
        task.init("github", val)
    else:
        print "user not found"
Exemplo n.º 7
0
def updated_repositories_task():
    last_id = get_last_saved_id()
    task = DMTask()
    val = {
        "name": "get_repositories",
        "action_type": "update",
        "start": last_id,
        "end": 0
    }
    task.init("github", val)
Exemplo n.º 8
0
def init_event_task():
# TODO: 1000 is system defined, maybe add to DMTask? or config file?
    gap = 1000
    start = 0
# end id is now set to 10300000
    end = 10300  
    db = DMDatabase().getDB()
    for i in range (start, end):
        task = DMTask()
        val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i+1)*gap}
        task.init("github", val)
Exemplo n.º 9
0
def init_followers_task():
# TODO: 1000 is system defined, maybe add to DMTask? or config file?
    gap = 1000
    start = 0
# end id is now set to 10300000
    end = 10300
    db = DMDatabase().getDB()
    for i in range (start, end):
        task = DMTask()
        val = {"name": "get_followers", "action_type": "loop", "start": i * gap, "end": (i+1)*gap}
        task.init("github", val)
Exemplo n.º 10
0
def single_download_demo():
    task1 = DMTask()
    val = {
        "name": "fake-followers",
        "action_type": "loop",
        "start": 6001000,
        "end": 6005000
    }
    task1.init_test("github", val)
    e1 = GithubFollowers(task1)
    e1.upload_user_followers("002", 3269460, 2)
Exemplo n.º 11
0
def init_commit_task(db, repo):
    task = DMTask()
    item = db["repositories"].find_one({"full_name": repo})
    if item:
        id = item["id"]
        val = {
            "name": "get_commit",
            "action_type": "loop",
            "start": repo,
            "end": id
        }
        task.init("github", val)
Exemplo n.º 12
0
def resolve_event_errors():
    client = DMDatabase().getClient()
    res = client["task"]["github"].find({"name": "get_repositories", "error_count": {"$gte": 10}})
    count = 0
    for item in res:
        task = DMTask()
        val = {"name": "get_repositories", "action_type": "loop", "start": item["start"], "end": item["end"]}
        task.init("github", val)
        r = GithubRepositories(task)
        res = r.error_check()
        count += res
    print str(count) + " errors solved"
Exemplo n.º 13
0
def event(start, end):
    gap = 1000
    for i in range(start, end):
        task = DMTask()
        val = {
            "name": "get_events",
            "action_type": "loop",
            "start": i * gap,
            "end": (i + 1) * gap
        }
        task.init("github", val)
        r1 = GithubEvent(task)
        r1.runTask()
Exemplo n.º 14
0
def resolve_event_errors():
    gap = 1000
    start = 0
# end id is now set to 10300000
    end = 10300  
    count = 0
    for i in range (start, end):
        task = DMTask()
        val = {"name": "get_events", "action_type": "loop", "start": i * gap, "end": (i+1)*gap}
        task.init("github", val)
        r = GithubEvent(task)
        res = r.error_check()
        count += res
    print str(count) + " errors solved"
Exemplo n.º 15
0
def init_users_thread_by_query(db, query):
    pieces = 100
    db = DMDatabase().getDB()
    i = 0
    res = db["user_contributor_result2"].find(query)
    user_list = []
    for item in res:
        i += 1
        user_list.append(item["login"])
        if i % pieces == 0:
            task = DMTask()
            val = {
                "name": "get_commit_check",
                "action_type": "loop",
                "query": str(query),
                "users": user_list,
                "start": 800000 + i - 100,
                "end": 800000 + i
            }
            task.init("github", val)
            user_list = []

    if i % pieces != 0:
        task = DMTask()
        val = {
            "name": "get_commit_check",
            "action_type": "loop",
            "query": str(query),
            "users": user_list,
            "start": 800000 + i - i % pieces,
            "end": 800000 + 1
        }
        task.init("github", val)
    return
Exemplo n.º 16
0
def gen_event(start, end):
    gap = 1000
    for i in range(start, end):
        task = DMTask()
        val = {
            "name": "get_events",
            "action_type": "loop",
            "start": i * gap,
            "end": (i + 1) * gap
        }
        task.init("github", val)
        r1 = GithubEvent(task)
        file = "./TaskFiles/get_events_start_" + str(i * gap)
        r1.generateToFile(file)
Exemplo n.º 17
0
def resolve_contributors_loop_errors():
    print "resolve contributors errors"
    gap = 1000
    start = 0
    end = 29000
    count = 0
    for i in range (start, end):
        task = DMTask()
        val = {"name": "get_contributors", "action_type": "loop", "start": i * gap, "end": (i+1)*gap}
        task.init("github", val)
        r = GithubContributors(task)
        res = r.error_check()
        count += res
    print str(count) + " errors solved"
Exemplo n.º 18
0
def import_event(start, end):
    gap = 1000
    for i in range(start, end):
        task = DMTask()
        val = {
            "name": "get_events",
            "action_type": "loop",
            "start": i * gap,
            "end": (i + 1) * gap
        }
        task.init("github", val)
        r1 = GithubEvent(task)
        file = "./TaskFinishedFiles/get_events_start_" + str(
            i * gap) + ".output"
        r1.runTaskFromFile(file)
Exemplo n.º 19
0
def resolve_contributors_30():
    db = DMDatabase().getDB()

    task1 = DMTask()
    val = {"name": "fake-contributors", "action_type": "loop", "start": 0, "end": 1000}
    task1.init_test("github", val)
    e1 = GithubContributors(task1)

    for num in range(1, 300):
        res = db["contributors"].find({"count": num*30})
        res_list = []
        for item in res:
            res_list.append({"full_name": item["full_name"], "id": item["id"]})
        for item in res_list:
            e1.get_repo_contributors(item["full_name"], item["id"])
Exemplo n.º 20
0
def run_free_task(num, endless):
    query = {
        "col": "github",
        "num": num,
        "query": {
            "status": "init",
            "name": "get_repos"
        }
    }
    query = {
        "col": "github",
        "num": num,
        "query": {
            "name": "get_repositories",
            "status": "init"
        }
    }
    #    query = {"col": "github", "num": num, "query": {"status": "fixed", "name": "get_contributors"}}
    res = DMTask().getFreeTasks(query)
    i = 0
    for item in res:
        new_thread = myThread(item["action_type"], item["name"], item["start"],
                              item["end"], endless)
        user_thread.append(new_thread)
        i += 1
    print str(i) + " task received, start to run them!"
    run_task()
Exemplo n.º 21
0
    def run(self):
        print "Starting " + str(self.val)
        if self.r:
            #            self.r.runFix2Task()
            self.r.runTask()
        print "Exiting " + str(self.val)

        if self.endless == 1:
            while 1:
                query = {
                    "col": "github",
                    "num": 1,
                    "query": {
                        "status": "init"
                    }
                }
                #                query = {"col": "github", "num": 1, "query": {"status": "fixed", "name": "get_contributors"}}
                threadLock.acquire()
                res = DMTask().getFreeTasks(query)
                threadLock.release()
                if res:
                    for item in res:
                        print item
                        if self.set(item["name"], item["start"],
                                    item["end"]) == 1:
                            #                            self.r.runFix2Task()
                            self.r.runTask()
                        else:
                            return
                        print "\n Start another task in the finished thread\n"
                        break
Exemplo n.º 22
0
    def set(self, action_type, cmd, start, end):
        self.task = DMTask()
        self.r = None
        self.val = {"action_type": action_type, "start": start, "end": end}
        if cmd == "get_repos":
            self.val["name"] = "get_repos"
            self.task.init("github", self.val)
            self.r = GithubRepo(self.task)
        elif cmd == "get_followers":
            self.val["name"] = "get_followers"
            self.task.init("github", self.val)
            self.r = GithubFollowers(self.task)
        elif cmd == "get_events":
            self.val["name"] = "get_events"
            self.task.init("github", self.val)
            self.r = GithubEvent(self.task)


# TODO: do not support now
        elif cmd == "get_users":
            self.val["name"] = "get_users"
            self.task.init("github", self.val)
            self.r = GithubUser(self.task)
        elif cmd == "get_repositories":
            self.val["name"] = "get_repositories"
            self.task.init("github", self.val)
            self.r = GithubRepositories(self.task)
        elif cmd == "get_contributors":
            self.val["name"] = "get_contributors"
            self.task.init("github", self.val)
            self.r = GithubContributors(self.task)
        elif cmd == "get_commit":
            self.val["name"] = "get_commit"
            self.task.init("github", self.val)
            self.r = GithubCommit(self.task)
        elif cmd == "get_commit_p1":
            self.val["name"] = "get_commit_p1"
            self.task.init("github", self.val)
            self.r = GithubCommit(self.task)
        else:
            print "Failed to init the task"
            return 0
        return 1
Exemplo n.º 23
0
def resolve_event_errors():
    client = DMDatabase().getClient()
    res = client["task"]["github"].find({
        "name": "get_repositories",
        "error_count": {
            "$gte": 10
        }
    })
    count = 0
    for item in res:
        task = DMTask()
        val = {
            "name": "get_repositories",
            "action_type": "loop",
            "start": item["start"],
            "end": item["end"]
        }
        task.init("github", val)
        r = GithubRepositories(task)
        res = r.error_check()
        count += res
    print str(count) + " errors solved"
Exemplo n.º 24
0
def test():
    task1 = DMTask()
    val = {"name": "fake-repo", "action_type": "loop", "start": 6001000, "end": 6005000}

    task1.init_test("github", val)
    e1 = GithubRepo(task1)
    e1.runTask()
    task1.remove()
Exemplo n.º 25
0
def run_free_task(db, num):
    query = {
        "col": "github",
        "num": num,
        "query": {
            "status": "running",
            "name": "get_commit_check"
        }
    }
    res = DMTask().getFreeTasks(query)
    i = 0
    for item in res:
        new_thread = myThread(db, item)
        user_thread.append(new_thread)
        i += 1
    print str(i) + " task received, start to run them!\n\n\n\n"
    run_task()
Exemplo n.º 26
0
class myThread (threading.Thread):
    def __init__(self, db, val):
        threading.Thread.__init__(self)
        self.db = db
        self.val = val
        self.task = DMTask()
        self.task.init("github", val)

    def run(self):
        print "Start the thread" + str(self.val)
        self.task.update({"status": "running", "percent": 0.0, "update_date": datetime.datetime.utcnow()})
        get_commit_repos_by_users(self.db, self.val["users"])
        self.task.update({"status": "finish", "percent": 1.0, "update_date": datetime.datetime.utcnow()})

        print "Exist the thread"
Exemplo n.º 27
0
 def set(self, action_type, cmd, start, end):
     self.task = DMTask()
     self.r = None
     self.val = {"action_type": action_type, "start": start, "end": end}
     if cmd == "get_repos":
         self.val["name"] = "get_repos"
         self.task.init("github", self.val)
         self.r = GithubRepo(self.task)
     elif cmd == "get_followers":
         self.val["name"] = "get_followers"
         self.task.init("github", self.val)
         self.r = GithubFollowers(self.task)
     elif cmd == "get_events":
         self.val["name"] = "get_events"
         self.task.init("github", self.val)
         self.r = GithubEvent(self.task)
     # TODO: do not support now
     elif cmd == "get_users":
         self.val["name"] = "get_users"
         self.task.init("github", self.val)
         self.r = GithubUser(self.task)
     elif cmd == "get_repositories":
         self.val["name"] = "get_repositories"
         self.task.init("github", self.val)
         self.r = GithubRepositories(self.task)
     elif cmd == "get_contributors":
         self.val["name"] = "get_contributors"
         self.task.init("github", self.val)
         self.r = GithubContributors(self.task)
     elif cmd == "get_commit":
         self.val["name"] = "get_commit"
         self.task.init("github", self.val)
         self.r = GithubCommit(self.task)
     elif cmd == "get_commit_p1":
         self.val["name"] = "get_commit_p1"
         self.task.init("github", self.val)
         self.r = GithubCommit(self.task)
     else:
         print "Failed to init the task"
         return 0
     return 1
Exemplo n.º 28
0
def test():
    db = DMDatabase().getDB()
    res = db["event"].find({"id": {"$gte": 1000, "$lt": 200}}).limit(20)
    if res is None:
        print 'res is none'
    else:
        print res.count()
    return

    task1 = DMTask()
    val = {"name": "fake-event", "action_type": "loop", "start": 6001000, "end": 6005000}

    task1.init_test("github", val)
    e1 = GithubEvent(task1)
    e1.runTask()
    task1.remove()
Exemplo n.º 29
0
def init_users_thread_by_query(db, query):
    pieces = 100
    db = DMDatabase().getDB()
    i = 0
    res =db["user_contributor_result2"].find(query)
    user_list = []
    for item in res:
        i += 1
        user_list.append(item["login"])
        if i%pieces == 0:
            task = DMTask()
            val = {"name": "get_commit_check", "action_type": "loop", "query": str(query), "users": user_list, "start": 800000 + i-100, "end": 800000 + i}
            task.init("github", val)
            user_list = []

    if i%pieces != 0:
        task = DMTask()
        val = {"name": "get_commit_check", "action_type": "loop", "query": str(query), "users": user_list, "start": 800000 + i-i%pieces, "end": 800000 + 1}
        task.init("github", val)
    return
Exemplo n.º 30
0
class myThread(threading.Thread):
    def __init__(self, db, val):
        threading.Thread.__init__(self)
        self.db = db
        self.val = val
        self.task = DMTask()
        self.task.init("github", val)

    def run(self):
        print "Start the thread" + str(self.val)
        self.task.update({
            "status": "running",
            "percent": 0.0,
            "update_date": datetime.datetime.utcnow()
        })
        get_commit_repos_by_users(self.db, self.val["users"])
        self.task.update({
            "status": "finish",
            "percent": 1.0,
            "update_date": datetime.datetime.utcnow()
        })

        print "Exist the thread"
Exemplo n.º 31
0
class myThread(threading.Thread):
    def __init__(self, action_type, cmd, start, end, endless):
        threading.Thread.__init__(self)
        self.endless = endless
        self.set(action_type, cmd, start, end)

    def set(self, action_type, cmd, start, end):
        self.task = DMTask()
        self.r = None
        self.val = {"action_type": action_type, "start": start, "end": end}
        if cmd == "get_repos":
            self.val["name"] = "get_repos"
            self.task.init("github", self.val)
            self.r = GithubRepo(self.task)
        elif cmd == "get_followers":
            self.val["name"] = "get_followers"
            self.task.init("github", self.val)
            self.r = GithubFollowers(self.task)
        elif cmd == "get_events":
            self.val["name"] = "get_events"
            self.task.init("github", self.val)
            self.r = GithubEvent(self.task)
        # TODO: do not support now
        elif cmd == "get_users":
            self.val["name"] = "get_users"
            self.task.init("github", self.val)
            self.r = GithubUser(self.task)
        elif cmd == "get_repositories":
            self.val["name"] = "get_repositories"
            self.task.init("github", self.val)
            self.r = GithubRepositories(self.task)
        elif cmd == "get_contributors":
            self.val["name"] = "get_contributors"
            self.task.init("github", self.val)
            self.r = GithubContributors(self.task)
        elif cmd == "get_commit":
            self.val["name"] = "get_commit"
            self.task.init("github", self.val)
            self.r = GithubCommit(self.task)
        elif cmd == "get_commit_p1":
            self.val["name"] = "get_commit_p1"
            self.task.init("github", self.val)
            self.r = GithubCommit(self.task)
        else:
            print "Failed to init the task"
            return 0
        return 1

    def run(self):
        print "Starting " + str(self.val)
        if self.r:
            #            self.r.runFix2Task()
            self.r.runTask()
        print "Exiting " + str(self.val)

        if self.endless == 1:
            while 1:
                query = {"col": "github", "num": 1, "query": {"status": "init"}}
                #                query = {"col": "github", "num": 1, "query": {"status": "fixed", "name": "get_contributors"}}
                threadLock.acquire()
                res = DMTask().getFreeTasks(query)
                threadLock.release()
                if res:
                    for item in res:
                        print item
                        if self.set(item["name"], item["start"], item["end"]) == 1:
                            #                            self.r.runFix2Task()
                            self.r.runTask()
                        else:
                            return
                        print "\n Start another task in the finished thread\n"
                        break
Exemplo n.º 32
0
def updated_contributors_task():
    last_id  = get_last_saved_id()
    task = DMTask()
    val = {"name": "get_contributors", "action_type": "update", "start": last_id, "end": 0}
    task.init("github", val)
Exemplo n.º 33
0
 def __init__(self, db, val):
     threading.Thread.__init__(self)
     self.db = db
     self.val = val
     self.task = DMTask()
     self.task.init("github", val)
Exemplo n.º 34
0
def single_download_demo(login, id, count):
    task1 = DMTask()
    val = {"name": "fake-followers", "action_type": "loop", "start": 6001000, "end": 6005000}
    task1.init_test("github", val)
    e1 = GithubFollowers(task1)
    e1.upload_user_followers(login, id, count)
Exemplo n.º 35
0
class myThread(threading.Thread):
    def __init__(self, action_type, cmd, start, end, endless):
        threading.Thread.__init__(self)
        self.endless = endless
        self.set(action_type, cmd, start, end)

    def set(self, action_type, cmd, start, end):
        self.task = DMTask()
        self.r = None
        self.val = {"action_type": action_type, "start": start, "end": end}
        if cmd == "get_repos":
            self.val["name"] = "get_repos"
            self.task.init("github", self.val)
            self.r = GithubRepo(self.task)
        elif cmd == "get_followers":
            self.val["name"] = "get_followers"
            self.task.init("github", self.val)
            self.r = GithubFollowers(self.task)
        elif cmd == "get_events":
            self.val["name"] = "get_events"
            self.task.init("github", self.val)
            self.r = GithubEvent(self.task)


# TODO: do not support now
        elif cmd == "get_users":
            self.val["name"] = "get_users"
            self.task.init("github", self.val)
            self.r = GithubUser(self.task)
        elif cmd == "get_repositories":
            self.val["name"] = "get_repositories"
            self.task.init("github", self.val)
            self.r = GithubRepositories(self.task)
        elif cmd == "get_contributors":
            self.val["name"] = "get_contributors"
            self.task.init("github", self.val)
            self.r = GithubContributors(self.task)
        elif cmd == "get_commit":
            self.val["name"] = "get_commit"
            self.task.init("github", self.val)
            self.r = GithubCommit(self.task)
        elif cmd == "get_commit_p1":
            self.val["name"] = "get_commit_p1"
            self.task.init("github", self.val)
            self.r = GithubCommit(self.task)
        else:
            print "Failed to init the task"
            return 0
        return 1

    def run(self):
        print "Starting " + str(self.val)
        if self.r:
            #            self.r.runFix2Task()
            self.r.runTask()
        print "Exiting " + str(self.val)

        if self.endless == 1:
            while 1:
                query = {
                    "col": "github",
                    "num": 1,
                    "query": {
                        "status": "init"
                    }
                }
                #                query = {"col": "github", "num": 1, "query": {"status": "fixed", "name": "get_contributors"}}
                threadLock.acquire()
                res = DMTask().getFreeTasks(query)
                threadLock.release()
                if res:
                    for item in res:
                        print item
                        if self.set(item["name"], item["start"],
                                    item["end"]) == 1:
                            #                            self.r.runFix2Task()
                            self.r.runTask()
                        else:
                            return
                        print "\n Start another task in the finished thread\n"
                        break
Exemplo n.º 36
0
 def __init__(self, db, val):
     threading.Thread.__init__(self)
     self.db = db
     self.val = val
     self.task = DMTask()
     self.task.init("github", val)