Пример #1
0
    def start(self):
        if not self.db_oper.is_enabled():
            return

        repo_list = self.db_oper.get_repo_list()
        if repo_list is None:
            self.db_oper.close_db()
            return

        thread_pool = ThreadPool(self.scan_virus, self.settings.threads)
        thread_pool.start()

        for row in repo_list:
            repo_id, head_commit_id, scan_commit_id = row

            if head_commit_id == scan_commit_id:
                logger.debug('No change occur for repo %.8s, skip virus scan.',
                             repo_id)
                continue

            thread_pool.put_task(
                ScanTask(repo_id, head_commit_id, scan_commit_id))

        thread_pool.join()

        self.db_oper.close_db()
Пример #2
0
def download():
    lines = ["Topics"]
    thread_pool = ThreadPool()
    d = False

    if enable_proxie[0]:
        refresh_proxie()

    filename = datetime.datetime.now().strftime("%d-%m-%Y %H-%M-%S") + '.txt'
    for i, enable in enumerate(download_enables):
        if enable:
            thread_pool.give_task(download_concrete_page,
                                  args=(download_hrefs[i], lines))
            d = True

    thread_pool.join()
    if d:
        file = open(filename, 'w')
        file.write('\n'.join(lines))
        file.close()

        print(f'All chosen topics are saved to {filename}')
        to_main_menu()
    else:
        print("Nothing is chosen")
    input("Press <Enter> to continue")

    return True
Пример #3
0
 def run(self):
     cx, cu = self.db_connection()
     pool = ThreadPool(size=20)
     pool.start()
     file_submission_id = open(FILE_SUBMISSION_ID)
     finished_submissions = [int(item[0]) for item in cu.execute("select submission_id from code")]
     all_submissions = [int(item) for item in file_submission_id.readlines()]
     for line in list(set(all_submissions).difference(set(finished_submissions))):
         sleep(0.2)
         pool.append_job(s.job, line)
     pool.join()
     pool.stop()
Пример #4
0
def start_tasks():
    thread_pool = ThreadPool(size=20)
    store_list = load_stores()
    total_count = len(store_list)
    count = 0
    for store in store_list:
        count += 1
        task = FetcherTask(store=store, num=count, total=total_count)
        thread_pool.push_task(task)
    thread_pool.init_pool()
    thread_pool.start()
    print('Waiting Task Finished......')
    thread_pool.join()
def main():
    store_list = load_stores()
    thread_pool = ThreadPool(size=20)
    pos = 0
    total = len(store_list)
    for store in store_list:
        pos += 1
        task = SlotStateFetchTask(store, pos=pos, total=total)
        thread_pool.push_task(task)
    thread_pool.init_pool()
    thread_pool.start()
    print('Waiting for tasks exit!!!')
    thread_pool.join()
Пример #6
0
def start_tasks():
    stores = load_stores()
    thread_pool = ThreadPool(size=20)
    total = len(stores)
    pos = 0
    for store in stores:
        pos += 1
        task = UnderLoadSlotZeroTask(store=store, total=total, pos=pos)
        thread_pool.push_task(task)
    thread_pool.init_pool()
    thread_pool.start()
    print('Waiting for task exit!')
    thread_pool.join()
def main():
    store_list = load_stores()
    thread_pool = ThreadPool(size=20)
    index = 0
    total = len(store_list)
    for store in store_list:
        index += 1
        task = CompensationDisableTask(store=store, index=index, total=total)
        thread_pool.push_task(task)
    thread_pool.init_pool()
    print('Starting tasks...')
    thread_pool.start()
    print('Waiting for task exit!')
    thread_pool.join()
Пример #8
0
    def compile(self, jobs=0):
        if 0 == jobs:
            jobs = cpu_count()

        self.print_msg('BS', 'Using %s parallel job(s)' % colored(str(jobs), 'yellow'))
        for target in self.targets:
            self.print_msg('BS', 'Building target %s' % colored(target.name, 'yellow'))
            pool = ThreadPool(jobs)
            for source in target.sources:
                args = (target, source, None)
                pool.apply_async(self.compile_object, args=args, callback=self.compile_object_done)
            try:
                self._wait_for_compilation(target)
            except BuildError as e:
                raise
            finally:
                pool.close()
                pool.join()
            self.run_prefinal(target)
            target.final(self)
d = DBHandler()
s = Spider()

user_list = d.get_user_list()

pool = ThreadPool(size=10)
pool.start()


def add_username(func, username, oj_username):
    data = func(oj_username)
    data["username"] = username
    return data


for user in user_list:
    pool.append_job(add_username, s.bestcoder, user[0], user[1])
    pool.append_job(add_username, s.codefoces, user[0], user[2])
    pool.append_job(add_username, s.hduoj, user[0], user[3])
pool.join()
pool.stop()


while not pool.results.empty():
    result = pool.results.get()
    username = result["username"]
    kwargs = {"bestcoder": {}, "codeforces": {}, "hduoj": {}}
    kwargs[result["website"]] = result

    d.save_user_info(username, **kwargs)
Пример #10
0
                c = item[i].decode("gb2312")
                if i == 0:
                    l.append(c)
                else:
                    if c[0] == "&":
                        l.append(0)
                    else:
                        l.append(1)
            rooms.append(l)
        with open(
                "data/" + campus + "." + building + "." + week + "." +
                week_day + ".json", "w") as f:
            f.write(json.dumps(rooms))
        print "finish: week:" + week + " week_day:" + week_day
        return "success"


if __name__ == "__main__":
    s = Spider()
    s.cookies = {"JSESSIONID": "8B7DA565F71772D37B04170241A757A8.TAB2;"}
    pool = ThreadPool(size=20)
    pool.start()

    for week in range(1, 21):
        for week_day in range(1, 8):
            print "start week:" + str(week) + " week_day:" + str(week_day)
            # 请自行确定info.py中的校区id和教学楼id是正确的
            # 然后按照info.py中的数据修改校区和教学楼id
            pool.append_job(s.craw, "1709", "1783", str(week), str(week_day))
    pool.join()
Пример #11
0
class ContentScan(object):
    def __init__(self):
        self.thread_pool = ThreadPool(self.diff_and_scan_content, appconfig.thread_num)
        self.thread_pool.start()

    def start(self):
        try:
            self.do_scan_task()
        except Exception as e:
            logging.warning('Error: %s', e)

    def do_scan_task(self):
        logging.info("Start scan task..")
        time_start = time.time()

        dt = datetime.utcnow()
        dt_str = dt.strftime('%Y-%m-%d %H:%M:%S')
        self.dt = datetime.strptime(dt_str,'%Y-%m-%d %H:%M:%S')

        edb_session = appconfig.session_cls()
        seafdb_session = appconfig.seaf_session_cls()

        # Get repo list from seafile-db
        Branch = SeafBase.classes.Branch
        VirtualRepo= SeafBase.classes.VirtualRepo
        q = seafdb_session.query(Branch.repo_id, Branch.commit_id)
        q = q.outerjoin(VirtualRepo, Branch.repo_id==VirtualRepo.repo_id)
        q = q.filter(VirtualRepo.repo_id == None)
        results = q.all()
        for row in results:
            repo_id = row.repo_id
            new_commit_id = row.commit_id
            last_commit_id = None
            q = edb_session.query(ContentScanRecord.commit_id)
            q = q.filter(ContentScanRecord.repo_id==repo_id)
            result = q.first()
            if result:
                last_commit_id = result[0]

            self.put_task(repo_id, last_commit_id, new_commit_id)

        # Remove deleted repo's record after all threads finished
        self.thread_pool.join()
        q = edb_session.query(ContentScanRecord)
        q = q.filter(ContentScanRecord.timestamp != self.dt)
        q.delete()
        q = edb_session.query(ContentScanResult)
        subqurey = edb_session.query(ContentScanRecord.repo_id)
        q = q.filter(ContentScanResult.repo_id.notin_(subqurey))
        # need fetch subqurey
        q.delete(synchronize_session='fetch')
        edb_session.commit()

        edb_session.close()
        seafdb_session.close()
        logging.info('Finish scan task, total time: %s seconds\n', str(time.time() - time_start))

        self.thread_pool.join(stop=True)

    def diff_and_scan_content(self, task, client):
        repo_id = task.repo_id
        last_commit_id = task.last_commit_id
        new_commit_id = task.new_commit_id
        edb_session = appconfig.session_cls()

        # repo not changed, update timestamp
        if last_commit_id == new_commit_id:
            q = edb_session.query(ContentScanRecord)
            q = q.filter(ContentScanRecord.repo_id==repo_id,
                         ContentScanRecord.commit_id==last_commit_id)
            q.update({"timestamp": self.dt})
            edb_session.commit()
            edb_session.close()
            return

        # diff
        version = 1
        new_commit = commit_mgr.load_commit(repo_id, version, new_commit_id)
        if new_commit is None:
            version = 0
            new_commit = commit_mgr.load_commit(repo_id, version, new_commit_id)
        if not new_commit:
            logging.warning('Failed to load commit %s/%s', repo_id, new_commit_id)
            edb_session.close()
            return
        last_commit = None
        if last_commit_id:
            last_commit = commit_mgr.load_commit(repo_id, version, last_commit_id)
            if not last_commit:
                logging.warning('Failed to load commit %s/%s', repo_id, last_commit_id)
                edb_session.close()
                return
        new_root_id = new_commit.root_id
        last_root_id = last_commit.root_id if last_commit else ZERO_OBJ_ID

        differ = CommitDiffer(repo_id, version, last_root_id, new_root_id,
                              True, False)
        added_files, deleted_files, added_dirs, deleted_dirs, modified_files,\
        renamed_files, moved_files, renamed_dirs, moved_dirs = differ.diff_to_unicode()

        # Handle renamed, moved and deleted files.
        q = edb_session.query(ContentScanResult).filter(ContentScanResult.repo_id==repo_id)
        results = q.all()
        if results:
            path_pairs_to_rename = []
            paths_to_delete = []
            # renamed dirs
            for r_dir in renamed_dirs:
                r_path = r_dir.path + '/'
                l = len(r_path)
                for row in results:
                    if r_path == row.path[:l]:
                        new_path = r_dir.new_path + '/' + row.path[l:]
                        path_pairs_to_rename.append((row.path, new_path))
            # moved dirs
            for m_dir in moved_dirs:
                m_path = m_dir.path + '/'
                l = len(m_path)
                for row in results:
                    if m_path == row.path[:l]:
                        new_path = m_dir.new_path + '/' + row.path[l:]
                        path_pairs_to_rename.append((row.path, new_path))
            # renamed files
            for r_file in renamed_files:
                r_path = r_file.path
                for row in results:
                    if r_path == row.path:
                        new_path = r_file.new_path
                        path_pairs_to_rename.append((row.path, new_path))
            # moved files
            for m_file in moved_files:
                m_path = m_file.path
                for row in results:
                    if m_path == row.path:
                        new_path = m_file.new_path
                        path_pairs_to_rename.append((row.path, new_path))

            for old_path, new_path in path_pairs_to_rename:
                q = edb_session.query(ContentScanResult)
                q = q.filter(ContentScanResult.repo_id==repo_id, ContentScanResult.path==old_path)
                q = q.update({"path": new_path})

            # deleted files
            for d_file in deleted_files:
                d_path = d_file.path
                for row in results:
                    if d_path == row.path:
                        paths_to_delete.append(row.path)
            # We will scan modified_files and re-record later,
            # so delete previous records now
            for m_file in modified_files:
                m_path = m_file.path
                for row in results:
                    if m_path == row.path:
                        paths_to_delete.append(row.path)

            for path in paths_to_delete:
                q = edb_session.query(ContentScanResult)
                q = q.filter(ContentScanResult.repo_id==repo_id, ContentScanResult.path==path)
                q.delete()

            edb_session.commit()

        # scan added_files and modified_files by third-party API.
        files_to_scan = []
        files_to_scan.extend(added_files)
        files_to_scan.extend(modified_files)
        a_count = 0
        scan_results = []
        for f in files_to_scan:
            if not self.should_scan_file (f.path, f.size):
                continue
            seafile_obj = fs_mgr.load_seafile(repo_id, 1, f.obj_id)
            content = seafile_obj.get_content()
            if not content:
                continue
            result = client.scan(content)
            if result and isinstance(result, dict):
                item = {"path": f.path, "detail": result}
                scan_results.append(item)
            else:
                logging.warning('Failed to scan %s:%s', repo_id, f.path)

        for item in scan_results:
            detail = json.dumps(item["detail"])
            new_record = ContentScanResult(repo_id, item["path"], appconfig.platform, detail)
            edb_session.add(new_record)
            a_count += 1
        if a_count >= 1:
            logging.info('Found %d new illegal files.', a_count)

        # Update ContentScanRecord
        if last_commit_id:
            q = edb_session.query(ContentScanRecord).filter(ContentScanRecord.repo_id==repo_id)
            q.update({"commit_id": new_commit_id, "timestamp": self.dt})
        else:
            new_record = ContentScanRecord(repo_id, new_commit_id, self.dt)
            edb_session.add(new_record)

        edb_session.commit()
        edb_session.close()

    def put_task(self, repo_id, last_commit_id, new_commit_id):
        task = ScanTask(repo_id, last_commit_id, new_commit_id)
        self.thread_pool.put_task(task)

    def should_scan_file(self, fpath, fsize):
        if fsize > appconfig.size_limit:
            return False

        filename, suffix = splitext(fpath)
        if suffix[1:] not in appconfig.suffix_list:
            return False

        return True