Exemplo n.º 1
0
def search_engine():
    collection = input("On which collection do you want to make a query ? (cacm/stanford) : ")

    if collection == "cacm":
        type_of_index_building = input("Which type of building do you want to use to make your index ? (bsbi/mapreduce/memory) : ")
        collection_path = input("What is the path of the CACM collection ? ")
        stopwords_path = "CACM/common_words"

        if type_of_index_building == "bsbi":
            index_folder = input("In which folder do you want to create CACM index ? : ")
            index = constructbsbi_index_CACM(collection_path, stopwords_path, index_folder)
        elif type_of_index_building == "mapreduce":
            index = constructmapred_index_CACM(collection_path, stopwords_path)
        elif type_of_index_building == "memory":
            index = constructmemory_index_CACM(collection_path, stopwords_path)
        else:
            raise ValueError("Not a type of index building allowed")


        #### STATISTICS #####
        # half_collection is only used to estimate the size of voc for half the collection
        # index.half_collection()
        # To compute range frequency plot, number of tokens and words in the dictionnary only if it is the memory index
        # index.rang_freq()
        # print("There are {} tokens in the collection".format(str(index.nb_tokens)))
        # print("There are {} distinct words in the vocabulary".format(str(index.size_voc())))

    elif collection == "stanford":
        path = input("Path for stanford collection ?")
        index = constructmemory_index_Stanford(path)

        #### STATISTICS #####
        # index.rang_freq()
        # print("There are {} tokens in the collection".format(str(index.nb_tokens)))
        # print("There are {} distinct words in the vocabulary".format(str(index.size_voc())))

    
    else:
        raise ValueError("Not a collection allowed")
        
    type_search = input("Type of search ? (boolean/vector) : ")
    user_request = input("Search : ")

    while user_request != "Stop":
        if type_search == "boolean":
            start_time = time.time()
            current_search = SearchBoolean(user_request)
            result_list = current_search.do_search(index)
            Search.display_docs(result_list, type_search)
            print("Results in %s seconds ---" % (time.time() - start_time))
        
        elif type_search == "vector":
            start_time = time.time()
            current_search = SearchVector(user_request)
            result_list = current_search.do_search(index, 20)
            Search.display_docs(result_list, type_search)
            print("Results in %s seconds ---" % (time.time() - start_time))
        
        type_search = input("Type of search ? (boolean/vector) : ")
        user_request = input("Search : ")
Exemplo n.º 2
0
def main():
	parser = argparse.ArgumentParser()
	subparser = parser.add_subparsers(dest='subparser')

	lexicon_argparser = subparser.add_parser("generate_lexicon")
	lexicon_argparser.add_argument('--b_range', type=str, help="Batches numbers range start and end creating/updating lexicon from. For example 1,3")
	lexicon_argparser.add_argument('--d', type=int, default=0, help="Print demo results.")

	forward_index_argparser = subparser.add_parser("generate_forward_index")
	forward_index_argparser.add_argument('--b_range', type=str, help="Batches numbers range start and end creating/updating forward index from. For example 1,3")
	forward_index_argparser.add_argument('--d', type=int, default=0, help="Print demo results.")

	inverted_index_argparser = subparser.add_parser("generate_inverted_index")
	inverted_index_argparser.add_argument('--b', type=str, help="Forward Index Batches to create inverted_index from. Comma Separated.")
	inverted_index_argparser.add_argument('--d', type=int, default=0, help="Print demo results.")

	search = subparser.add_parser("search")
	search.add_argument("--q", type=str, help="Search Query.")

	args = parser.parse_args()

	if args.subparser == 'generate_lexicon':
		batche_range = list(map(int, args.b_range.split(",")))
		generate_lexicon.main(*batche_range, demo=args.d)
	elif args.subparser == 'generate_forward_index':
		batche_range = list(map(int, args.b_range.split(",")))
		generate_forward_index.main(*batche_range, demo=args.d)
	elif args.subparser == 'generate_inverted_index':
		batches = args.b.split(',')
		generate_inverted_index.main(batches, demo=args.d)
	elif args.subparser == 'search':
		lexicon = Lexicon(config.LEXICON_PATH)
		inverted_index = InvertedIndex(config.INVERTED_INDEX_BARRELS_PATH, config.INVERTED_INDEX_BARRELS_TEMP_PATH, len(lexicon), config.INVERTED_INDEX_BARREL_SIZE)
		search = Search(lexicon, inverted_index)
		print(search.search(args.q))
Exemplo n.º 3
0
def search(request):
    query = request.GET.get('q')  # default to an empty string if q not present
    search = Search()
    results = search.get_results(query=query, model=Line)
    return render(request,
                  template_name='search.html',
                  context={
                      'query': query,
                      'results': results
                  })
Exemplo n.º 4
0
    def list(request):
        query = request.GET.get('q', None)
        anonymous = request.GET.get('anonymous', False)
        owner = None

        if not anonymous:
            owner = Account.get_by_user(request.user)

        try:
            search = Search(query, owner)
            search.process()
        except SearchException:
            pass

        notify_count = Notification.objects.filter(owner__user=request.user, viewed=False).count()
        return render(request, 'search/list.html', {'search': search, 'notify_count': notify_count})
Exemplo n.º 5
0
def api_getconnection():
    """api for creating connection"""
    if rq.method == 'POST':
        user_id = rq.form['user_id']
        password = rq.form['password']
        search_keyword = rq.form['search_keyword']
        response = requests.get('https://api.github.com',
                                auth=(user_id, password))
        status = response.headers['Status']
        if status == '401 Unauthorized' or status == 'Forbidden':
            app.logger.info('failed to log in.... Please try again')
            msg = json.dumps({"status": 401, "authorization": "False"})
            res = Response(msg, status=201, mimetype='application.json')
            return res
        else:
            owner_repositories = Search.search(search_keyword)
            owner_repositories_dict = {}
            owner_repositories_dict = owner_repositories
            connection_response = json.dumps({
                "status":
                200,
                "authorization":
                "True",
                "owner_repositories_list":
                owner_repositories_dict
            })
            app.logger.info('logged in successfully')
            res = Response(connection_response,
                           status=201,
                           mimetype='application.json')
            return res
def conceal(tweet_file, config, endword_index=False):
    dicts = dictionaries.load_dictionaries(config)
    print 'keywords (x) = ', config.x
    print 'Essence len = ', config.essence_len
    distillery = Distillery(config.essence_len, dicts.keywords)
    search_engine = Search()
    raw_data_words = open(tweets_path + tweet_file).read().split()
    data_words = [
        keyword for word in raw_data_words
        for keyword in dicts.english["".join(
            c for c in word.lower()
            if c not in ('!', '.', ':', ',', '?', '"', '-'))]
    ]

    if endword_index:
        words = [dicts.keywords[endword_index]] * config.w
    else:
        words = [dicts.keywords[config.x - 1]] * config.w

    collected_words = [(words, '', '')]
    stats = WordsStats(config, tweet_file, collected_words)

    try:
        while True:

            # Avoid inserting 3rd link word in data
            # iteration_type = len(collected_words) % 10
            iteration_type = 1

            if iteration_type == 0:
                insert_link_word_in_d = True
                choose_new_link_word = False
            elif iteration_type == 1:
                first_link_word = 'This string is ignored'
                insert_link_word_in_d = False
                choose_new_link_word = True
            else:
                insert_link_word_in_d = False
                choose_new_link_word = False

            words, link, first_link_word = conceal_step(
                data_words, words, first_link_word, insert_link_word_in_d,
                choose_new_link_word, search_engine, distillery, dicts, stats)
            collected_words.append((
                words,
                link,
                first_link_word,
            ))
            if not data_words:
                break

    except Exception:
        print(traceback.format_exc())
        t, v, tb = sys.exc_info()
        # distillery.browser.close()
        raise t, v, tb

    print "collected words are: %s" % collected_words
    return collected_words
Exemplo n.º 7
0
def search_results(request):
    ''' I search for substitute to the product requested. '''
    form = RequestForm(request.GET)

    if form.is_valid():
        query = form.cleaned_data['user_request']
    else:
        return render(request, 'search/results.html', {'product': "None"})

    search = Search()
    result_infos = []
    search_product = search.find_product(query)

    if not search_product:
        return render(request, 'search/results.html', {'product': "None"})
    else:
        product = search.product_infos(search_product)

        for categorie in product['categories']:
            result_info = search.find_substitute(search_product)
            result_infos.extend(search.result_infos(result_info))

        result_infos = [
            i for n, i in enumerate(result_infos)
            if i not in result_infos[n + 1:]
        ]

        return render(request, 'search/results.html', {
            'product': product,
            'results': result_infos
        })
Exemplo n.º 8
0
def fileresult():
    """api for file level features"""
    if request.method == 'POST':
        search_keyword = request.form['search_keyword']
        FETCH_OBJ = Fetch_file()
        owner_repositories = Search.search(search_keyword)
        for i in owner_repositories:
            owner_name = i["owner_name"]
            repository_name = i["repository_name"]
            FETCH_OBJ.json_to_csv(owner_name, repository_name)
        msg = json.dumps({"status": 200, "state": "files level Csv is being Constructed"})
        res = Response(msg, status=201, mimetype='application/json')
        return res
Exemplo n.º 9
0
async def search(
    q: str = Query(None, max_length=280),
    page: Optional[int] = Query(
        None,
        ge=eval(cfg["search"]["pg_range"])["ge"],
        le=eval(cfg["search"]["pg_range"])["le"],
    ),
):

    return Search()._query(q, page)


# Re-queries and populates database at scheduled time
# Use cron expression to set refresh rate
@aiocron.crontab(cfg["CRAWLER"]["refresh_rate"])
Exemplo n.º 10
0
    def test_json_to_csv_conversion(self, mock_search, mock_fetching_data):
        '''testing multiple_repository_to_dataframe'''
        mock_res1 = [{'owner_name': 'd3', 'repository_name': 'd3'}]
        type(mock_search).return_value = mock.PropertyMock(
            return_value=mock_res1)

        mock_res2 = {
            'repository_name': 'd3',
            'pull_numbers': 17,
            'open_pr_time': 119576.0,
            'open_pull_request': 3,
            'forks_count': 371,
            'commits': 4,
            'changed_files': 4,
            'pushed_time': 54987.0,
            'watchers_count': 6525,
            'open_issue_count': 556,
            'pull_request_acceptance_rate': 66.66666,
            'contributor_acceptance_rate': 20.5,
            'size': 2,
            'changes': 2,
            'state': 'Accepted'
        }
        type(mock_fetching_data).return_value = mock.PropertyMock(
            return_value=mock_res2)

        mock_res3 = [{'owner_name': 'd3', 'repository_name': 'd3'}]
        type(mock_search).return_value = mock.PropertyMock(
            return_value=mock_res3)
        response_push = None
        config = Utils().get_config_file('config.ini')
        search_keyword = config.get('Search', 'search_keyword')
        owner_repositories = Search.search(search_keyword)
        for i in owner_repositories:
            owner_name = i["owner_name"]
            repository_name = i["repository_name"]
            response_push = Fetch().json_to_csv_conversion(
                owner_name, repository_name)
        self.assertListEqual(list(response_push.columns), list(mock_res2))
Exemplo n.º 11
0
"""
    Starting point of program...
"""

__author__ = 'Acko'

import os

from search.search import Search
from utils.postfix_parser import InvalidInput, QuitRequest


if __name__ == '__main__':
    initial_path = raw_input("Unesite putanju do baze: ")
    Search.print_instruction()
    print 'Ucitavanje. Molim vas sacekajte...'
    s = Search(os.path.abspath(initial_path))

    while True:
        try:
            s.find_expression(raw_input("Unesite reci za pretragu (ili 'QUIT' za izlaz): "))
        except InvalidInput:
            print "Pogresno unet zahtev, pokusajte ponovo."
        except QuitRequest:
            break

    print 'Dovidjenja'
Exemplo n.º 12
0
'''Entry point for Search class'''
from search.search import Search
from utils.utils import Utils

if __name__ == '__main__':
    UTILS = Utils()
    CONFIG = UTILS.get_config_file('config.ini')
    SEARCH_KEYWORD = CONFIG.get('Search', 'search_keyword')
    UTILS.user_path()
    SEARCH = Search()
    SEARCH.search(search_keyword=SEARCH_KEYWORD)
    
Exemplo n.º 13
0
Function views
    1. Add an import:  from my_app import views
    2. Add a URL to urlpatterns:  path('', views.home, name='home')
Class-based views
    1. Add an import:  from other_app.views import Home
    2. Add a URL to urlpatterns:  path('', Home.as_view(), name='home')
Including another URLconf
    1. Import the include() function: from django.urls import include, path
    2. Add a URL to urlpatterns:  path('blog/', include('blog.urls'))
"""
from django.contrib.staticfiles.urls import static, staticfiles_urlpatterns
from django.urls import path, include
from search.search import Search
from sosial import settings

urlpatterns = [
    path('api/account/', include('accounts.urls')),
    path('api/posts/', include('posts.urls')),
    path('api/chanel/', include('chanel.urls')),
    path('api/comment/', include('comment.urls')),
    path('api/like/', include('like.urls')),
    path('api/notify/', include('notify.urls')),
    path('api/upload/', include('files.urls')),
    path('api/search/', Search.as_view()),
]
if settings.DEBUG:
        urlpatterns += static(settings.MEDIA_URL,
                              document_root=settings.MEDIA_ROOT)

urlpatterns += staticfiles_urlpatterns()
urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
Exemplo n.º 14
0
# -*- coding: utf-8 -*-
# __author__ = "zok"
# Date: 2019/2/28  Python: 3.7
from search.search import Search
from config import KEY

if __name__ == '__main__':
    """start"""
    se = Search(KEY)
    se.start()
Exemplo n.º 15
0
from fetching_data.fetching_data import Fetch
from utils.utils import Utils
from search.search import Search
import json
if __name__ == '__main__':
    UTILS_OBJ = Utils()
    CONFIG = UTILS_OBJ.get_config_file('config.ini')
    OWNER = CONFIG.get('Repository', 'owner')
    REPOSITORY_NAME = CONFIG.get('Repository', 'repository_name')
    UTILS_OBJ.user_path()
    FETCH_OBJ = Fetch()
    SEARCH_KEYWORD = CONFIG.get('Search', 'search_keyword')
    owner_repositories = Search.search(SEARCH_KEYWORD)
    for i in owner_repositories:
        owner_name = i["owner_name"]
        repository_name = i["repository_name"]
        data_frame = FETCH_OBJ.json_to_csv_conversion(owner_name,
                                                      repository_name)
from indexing.lexicon import Lexicon
from indexing.inverted_index import InvertedIndex
from search.search import Search

# flask app & Api
app = Flask(__name__)
api = Api(app)
cors = CORS(app)
app.config['CORS_HEADERS'] = 'Content-Type'

# Indexes
lexicon = Lexicon(config.LEXICON_PATH)
inverted_index = InvertedIndex(config.INVERTED_INDEX_BARRELS_PATH,
                               config.INVERTED_INDEX_BARRELS_TEMP_PATH,
                               len(lexicon), config.INVERTED_INDEX_BARREL_SIZE)
search = Search(lexicon, inverted_index)


# for handling searches
class Setup(Resource):
    @cross_origin()
    def get(self):
        return render_template('index.html')


class Document(Resource):
    def get(self, doc_id):

        doc_id = int(doc_id[-7:])
        batch = doc_id // 64 + 1