Exemplo n.º 1
0
def recommendation():
    by = request.form.get("by")
    if USER_MODE:
        temp = mycol.find_one({"_id": USER_ID})
        db = DB()
        table = pd.DataFrame()
        result = list()
        if by == str(0):  #author
            result = temp["author"]
            try:
                name = max(result, key=result.count)
                table = db.search_article_by_author(name)
            except:
                return {'status': 'fail'}
        else:
            result = temp["field"]
            try:
                field = max(result, key=result.count)
                table = db.search_by_field(field)
            except:
                return {'status': 'fail'}
        if table.empty:
            return {'status': 'fail'}
        else:
            return {"status": "success", "data": table.to_html(classes='data')}
    else:
        return {"status": "fail"}
Exemplo n.º 2
0
def delete():
    db = DB()
    delete_data = json.loads(request.form.get('data'))
    print(delete_data)
    status = db.delete_article(delete_data)
    db.close()
    return jsonify({"code": status})
Exemplo n.º 3
0
def rank_author():
    print("sdfdsafsdfdsa")
    db = DB()
    author = db.show_rank_author()
    if len(author) < 3:
        return jsonify({"status": "fail"})
    result = {1: author[0], 2: author[1], 3: author[2]}
    return jsonify({"status": "success", "data": result})
Exemplo n.º 4
0
def delete():
    value = request.form.get('id')
    db = DB()
    result = db.delete_article_by_id(value)
    if result == False:
        return jsonify({"status": "fail"})
    else:
        return jsonify({"status": "success"})
Exemplo n.º 5
0
def query():
    db = DB()
    query_data = json.loads(request.form.get('data'))
    args = list(query_data.values())
    table = db.query_article(*args)
    db.close()
    if table.empty:
        return jsonify({"code": -1})
    return jsonify({"code": 0, "data": table.to_html(classes='data')})
Exemplo n.º 6
0
def rank_journal():
    print("dsfasdf")
    db = DB()
    journal = db.show_rank_journal()
    print(journal)
    if len(journal) < 3:
        return jsonify({"status": "fail"})
    result = {1: journal[0], 2: journal[1], 3: journal[2]}
    return jsonify({"status": "success", "data": result})
Exemplo n.º 7
0
def search():
    value = request.form.get('value')
    by = int(request.form.get('by'))
    db = DB()
    table = pd.DataFrame()
    if by == 0:
        table = db.search_article_by_id(value)
    elif by == 1:
        table = db.search_article_by_author(value)
        if USER_MODE and not table.empty:
            newvalues = {"$push": {"author": value}}
            mycol.update({"_id": USER_ID}, newvalues)
    elif by == 2:
        table = db.search_article_by_university(value)
    elif by == 3:
        table = db.search_article_by_title(value)
    elif by == 4:
        table = db.search_article_by_year(value)
    elif by == 5:
        table = db.search_article_by_journal(value)
    else:
        table = db.search_by_field(value)
        if USER_MODE and not table.empty:
            newvalues = {"$push": {"field": value}}
            mycol.update({"_id": USER_ID}, newvalues)
    if table.empty == True:
        return {"status": "fail"}
    temp = {"status": "success", "data": table.to_html(classes='data')}
    return jsonify(temp)
Exemplo n.º 8
0
def insert():
    author = request.form.get('author')
    affiliation = request.form.get('aff')
    citedby = request.form.get('num_citations')
    title = request.form.get('name')
    year = request.form.get('pub_year')
    journal = request.form.get('j_name')
    pub_url = request.form.get('pub_url')
    db = DB()
    result = db.insert_article(author, affiliation, citedby, title, year,
                               pub_url, journal)
    if result == False:
        return jsonify({"status": "fail"})
    else:
        return jsonify({"status": "success"})
Exemplo n.º 9
0
def update():
    by = int(request.form.get('choice'))
    id = request.form.get('id')
    value = request.form.get('value')
    db = DB()
    result = 0
    if by == 0:
        result = db.update_article_on_citedby(id, value)
    elif by == 1:
        result = db.update_article_on_puburl(id, value)
    elif by == 2:
        result = db.update_article_on_journal(id, value)
    if result == False:
        return jsonify({"status": "fail"})
    else:
        return jsonify({"status": "success"})
Exemplo n.º 10
0
def processOne(delivery_tag, params, channel, connection):

    mariadb_connection = DB(host='db_dict',
                            port=3306,
                            user=os.environ['MYSQL_USER'],
                            password=os.environ['MYSQL_PASSWORD'],
                            database='crack_it')
    #QUERY INIT
    mariadb_connection.connect()

    print(" ")  #For log clarity
    print("processing:")
    print(params["v"])

    dbactioncount = [0]

    filename = os.path.join(tmpDirectory, params["v"])

    #Check if file exist to avoid errors
    if os.path.isfile(filename):
        #Latin 1 will work for utf-8 but may mangle character if we edit the stream (see the official doc)
        linecount = wccount(filename)
        try:
            with open(filename, 'r', encoding="latin-1") as file:
                print("Processing file : " + filename)
                hashes, passwords = analyzeFile(file, params["w"], linecount)

            for item_hash in hashes:
                dbactioncount[0] += sendHash(item_hash, params["s"],
                                             mariadb_connection)
                commitIfNecessary(mariadb_connection, dbactioncount)
            for item_password in passwords:
                dbactioncount[0] += registerPassword(*item_password,
                                                     params["s"],
                                                     mariadb_connection)
                commitIfNecessary(mariadb_connection, dbactioncount)
            print("LAST COMMIT")
            mariadb_connection.commit()
        except ValueError:
            print("Nothing was found in this file. Discarding it....")
        finally:
            print("Deleting source file")
            os.remove(filename)
            print("ACK-ING the message")
    else:
        print("FILE DOES NOT EXIST...")
        mariadb_connection.commit()
        print("Discarding (ACK-ING) the message")
    ack_callback = functools.partial(ack_message, channel, delivery_tag)
    connection.add_callback_threadsafe(ack_callback)
Exemplo n.º 11
0
def processOne(delivery_tag, rabbitMQ_data_array, channel, connection):

	db = DB(host='db_dict', port=3306, user=os.environ['MYSQL_USER'], password=os.environ['MYSQL_PASSWORD'], database='crack_it')
	db.connect()

	# Prepare requests
	print("------------------------------Start------------------------------")
	check_bdd = "SELECT str FROM hash WHERE str = %s AND clear IS NOT NULL"
	update_bdd_hash = "UPDATE hash SET clear = %s WHERE str = %s"
	insert_bdd_clear = "INSERT INTO dict (password) VALUES (%s) ON DUPLICATE KEY UPDATE seen=seen+1" # Modifier requête pour checker si même repo ne pas incrémenter seen (cf. parser)
	insert_bdd_clear_notAhash = "INSERT INTO dict (password, seen) VALUES (%s, %s) ON DUPLICATE KEY UPDATE seen=seen+%s"
	get_origin_hash_seen = "SELECT count(*) FROM origin_hash WHERE item = %s"
	move_origin_data = "INSERT INTO origin_dict (srckey, item) SELECT srckey, (SELECT id FROM dict WHERE password = %s) FROM origin_hash WHERE item = %s"
	delete_old_origin_data = "DELETE FROM origin_hash WHERE item = %s"
	delete_old_hash = "DELETE FROM hash WHERE id = %s"
	get_hash_ID = "SELECT id FROM hash WHERE str = %s"

	# Does the hash exist in db ?
	hash = rabbitMQ_data_array['value']
	cursor = db.query(check_bdd, (hash,))
	result = cursor.fetchone()
	print("Processing Hash : "+rabbitMQ_data_array['value'])

	hash_presence_in_bdd = True

	hashId=db.query(get_hash_ID, (hash,)).fetchall()[0][0]

	os.system("touch cracked.txt")

	if result == None:
		hash_presence_in_bdd = False
		success_token = False
	else:
		print("Hash already exists !")
		success_token = True

	notAHash=0
	if hash_presence_in_bdd == False:

		# Get Hash types (numbers) for hash types in hashcat
		for hashTypesNumber in rabbitMQ_data_array['possibleHashTypes']:
			print("------------------------------NewTry------------------------------")
			hashType = str(hashTypesNumber)

			# hashs cracking
			
			print(hash,  file=open('hash.txt', 'w'))
			print(str(path.exists("hash.txt")))
				
			#crack = subprocess.check_output(["hashcat","-a","0", "--show", "-m", hashType, "-o", "cracked.txt", "--force", hash, "dict/dict.txt"], stderr=subprocess.STDOUT, shell=False)
			print("Hashtype: "+str(hashType))
			"""
			try:
				#hashcat_proc= subprocess.Popen("hate_crack/hate_crack.py hash.txt "+str(hashType), encoding="latin1", input="2\n".encode(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
				while hashcat_proc.poll() is None:
					try:
						for line in hashcat_proc.stdout:
							if "Hash-value exception" in line or "Separator unmatched" in line:
								notAHash+=1		
								print("[hate_crack] [ERROR] "+line)
							print("[hate_crack] "+line)
					except (BrokenPipeError, IOError):
						print ('Caught InnerBrokenPipeError')
			except (BrokenPipeError, IOError):
				print ('Caught OuterBrokenPipeError')"""
			try:
				crack = subprocess.check_output(["hate_crack/hate_crack.py", "hash.txt", str(hashType)], input="2\n".encode(), stderr=subprocess.STDOUT, shell=False)	
				for line in crack:
					if "Hash-value exception" in line or "Separator unmatched" in line:
						notAHash+=1		
						print("[hate_crack] [ERROR] "+line)
					print("[hate_crack] "+line)
			except subprocess.CalledProcessError as e:
				print("Hashcat failed: ")
				print(e.output)
			# Success
			if path.isfile("hash.txt.out"):
				if (path.getsize("hash.txt.out") > 0):
					print("------------------------------Success------------------------------")
					success_token = True
					cracked = open("hash.txt.out", "r")
					password_data = cracked.readline().split(":")
					password_clear = password_data[1]
					cracked.close()

					# Clear password db insert
					cursor = db.query(insert_bdd_clear, (password_clear,))
					db.commit()
					print("Dictionnary has been updated ... Added: "+password_clear)

					cursor = db.query(update_bdd_hash, (cursor.lastrowid,hash))
					db.commit()
					print(cursor.rowcount, "Linked hash to dict value.")
					
					# Erase cracked.txt file
					#os.remove("hash.txt.out")
					# Create a new one
					#os.system("touch cracked.txt")
					#Path('cracked.txt').touch()
				os.remove("hash.txt.out")
			os.remove("hash.txt")

	if notAHash == len(rabbitMQ_data_array['possibleHashTypes']):
		print("Not a hash ! this is probably a password ! Saving in DB.")
		
		print("Old Hash ID: "+str(hashId))
		cursor = db.query(get_origin_hash_seen, (hashId,))
		count=cursor.fetchall()[0][0]
		cursor = db.query(insert_bdd_clear_notAhash, (hash, count, count))
		cursor = db.query(move_origin_data, (hash,hashId))
		cursor = db.query(delete_old_origin_data, (hashId,))
		cursor = db.query(delete_old_hash, (hashId, ))
		db.commit()
		print("Done")
	else:
		print("==============")
		print("Not going to save in DB.")
		print("Errors: "+str(notAHash))
		print("Hash Types : "+str(len(rabbitMQ_data_array['possibleHashTypes'])))
		print("Hash in question : "+hash)
	

	# Erase cracked.txt file
	os.system("rm cracked.txt 2> /dev/null")

	# Insert hash in db if the script hasn't cracked it
	if success_token == False:
		#possibleHashTypes = str(rabbitMQ_data_array['possibleHashTypes'])
		#val_hash = [hash, possibleHashTypes, None]
		#cursor.execute(insert_bdd_hash, val_hash)
		#mariadb_connection.commit()
		print("Hash not decrypted")
	ch.basic_ack(method.delivery_tag)
	print("------------------------------End------------------------------")
	ack_callback = functools.partial(ack_message, channel, delivery_tag)
	connection.add_callback_threadsafe(ack_callback)
Exemplo n.º 12
0
        links = soup.find_all('div', class_='pager_container')
        list = []
        list.append('https://www.lagou.com/zhaopin/Java/?labelWords=label')
        for link in links:
            href = link.find('a', {'class': 'page_no', 'data-index': '2'})
            url = href['href'].encode('utf-8')
            for n in range(2, 31):
                new_url = url[:35] + str(n) + '/'
                list.append(new_url)
        return list


if __name__ == "__main__":
    root_url = "https://www.lagou.com/zhaopin/Java/?labelWords=label"
    spider = Spider()
    content = spider.deal_url(root_url)
    soup = BeautifulSoup(content, "html.parser", from_encoding="utf-8")
    lists = spider.get_more_url(soup)
    for list in lists:
        content = spider.deal_url(list)
        print '正在爬取的链接:' + list
        soup = BeautifulSoup(content, "html.parser", from_encoding="utf-8")
        #print soup
        datas = spider.parse(soup)
        #print datas
        db = DB()
        for data in datas:
            print data['address']
            db.importDb(data['salary'], data['job'], data['address'],
                        data['require'], data['company'])
Exemplo n.º 13
0
def main():
    db = DB(host='db_dict',
            port=3306,
            user=os.environ['MYSQL_USER'],
            password=os.environ['MYSQL_PASSWORD'],
            database='crack_it')
    db.connect()
    success = False
    while not success:
        try:
            connection = pika.BlockingConnection(
                pika.ConnectionParameters(host='rabbitmq'))
            channel = connection.channel()
            success = True
        except (pika.exceptions.AMQPConnectionError) as e:
            success = False
            print("Failed to connect to rabbitMQ ... Retrying in 5 seconds.")
            time.sleep(5)

    #create the exchange if it does not exist already
    channel.exchange_declare(exchange='files', exchange_type='fanout')

    result = channel.queue_declare(queue='parser_files_queue',
                                   exclusive=False,
                                   auto_delete=False)
    queue_name = result.method.queue

    #bind the queue to the url exchange
    channel.queue_bind(exchange='files', queue=queue_name)

    #========================================================================

    #QUERY INIT
    _SQL = ("""
            SELECT * FROM source
            """)
    #QUERY EXECUTE
    cursor = db.query(_SQL)
    result = cursor.fetchall()
    print("Before loop")
    for row in result:
        #We call the module the row is aksing for (value: row[2]) in the crawler dict, which is a registry of all modules. We then pass it the url from the DB row
        #the result is an array of urls
        result, newsourcehint = crawlers[row[2]](row[1], row[4])
        cursor = db.query(
            "UPDATE source SET sourceHint = %s WHERE idsource = %s;",
            (newsourcehint, row[0]))
        db.commit()  # we could batch commit, but is it really worth it here ?
        for itemToParse in result:
            #assemble a json message to easely combine the two values, m=> module to use, v => url
            if itemToParse != None:
                datafiles = pre_parsers[row[2]](itemToParse, cacheDirectory,
                                                tmpDirectory)
            else:
                continue
            for datafile in datafiles:
                print("Sending one file.")
                message = json.dumps({
                    "m": row[2],
                    "s": itemToParse,
                    "v": datafile,
                    "w": row[5]
                })
                #send the message through rabbbitMQ using the urls exchange
                channel.basic_publish(exchange='files',
                                      routing_key='',
                                      body=message)

    #closing connection to rabbitMQ
    connection.close()
    print("Crawler done !")
Exemplo n.º 14
0
            str_all = anl.deal_str(str_new)
            deal_all = anl.deal_all(str_all)
            for new in deal_all:
                if new != '':
                    dis.append(new)
        return dis
#处理元组中为空的
    def deal_str(self,str_new):
        new = []
        for str in str_new:
            if str != '':
                new.append(str)
        return new
#生成要处理的数组
    def deal_all(self,str_all):
        if len(str_all) == 1:
            return str_all
        else:
            return range(int(str_all[0]), int(str_all[1]) + 1, 1)


if __name__ == "__main__":
    db = DB()
    result = db.get_salary()
    anl = Analyse()
    salary = anl.deal_result(result)
    d = collections.Counter(salary)
    for a in d:
        print str(a)+":"+str(d[a])

Exemplo n.º 15
0
def update():
    db = DB()
    update_data = json.loads(request.form.get('data'))
    status = db.update_article(update_data)
    db.close()
    return jsonify({"code": status})
Exemplo n.º 16
0
def insert():
    db = DB()
    insert_data = json.loads(request.form.get('data'))
    status = db.insert_article(insert_data)
    db.close()
    return jsonify({"code": status})
Exemplo n.º 17
0
from mysqldb import DB

db = DB()
Exemplo n.º 18
0
def index():
    db = DB()
    temp = url_for('static', filename='style.css')
    return render_template('webpage.html', titles="search engine")
Exemplo n.º 19
0
 def __init__(self):
     DB.__init__(self)
Exemplo n.º 20
0
#!/usr/bin/python3

from __future__ import generators
import sys
import os
from mysqldb import DB
import time
from pathlib import Path
from shutil import copyfile

# Db connect
db = DB(host='db_dict',
        port=3306,
        user=os.environ['MYSQL_USER'],
        password=os.environ['MYSQL_PASSWORD'],
        database='crack_it')
db.connect()

# Select all clear passwords in db
select_password_clear = "SELECT password FROM dict ORDER BY seen DESC"

cursor = db.query(select_password_clear)

# Create the new dictionnary in txt format
Path('/dict/new_dict.txt').touch()


# Select a list of 1000 passwords in db and put them in dict.txt until there's no passwords left
def PasswordIterator(cursor, arraysize=1000):
    while True:
        passwords = cursor.fetchmany(arraysize)