def search_engine(): collection = input("On which collection do you want to make a query ? (cacm/stanford) : ") if collection == "cacm": type_of_index_building = input("Which type of building do you want to use to make your index ? (bsbi/mapreduce/memory) : ") collection_path = input("What is the path of the CACM collection ? ") stopwords_path = "CACM/common_words" if type_of_index_building == "bsbi": index_folder = input("In which folder do you want to create CACM index ? : ") index = constructbsbi_index_CACM(collection_path, stopwords_path, index_folder) elif type_of_index_building == "mapreduce": index = constructmapred_index_CACM(collection_path, stopwords_path) elif type_of_index_building == "memory": index = constructmemory_index_CACM(collection_path, stopwords_path) else: raise ValueError("Not a type of index building allowed") #### STATISTICS ##### # half_collection is only used to estimate the size of voc for half the collection # index.half_collection() # To compute range frequency plot, number of tokens and words in the dictionnary only if it is the memory index # index.rang_freq() # print("There are {} tokens in the collection".format(str(index.nb_tokens))) # print("There are {} distinct words in the vocabulary".format(str(index.size_voc()))) elif collection == "stanford": path = input("Path for stanford collection ?") index = constructmemory_index_Stanford(path) #### STATISTICS ##### # index.rang_freq() # print("There are {} tokens in the collection".format(str(index.nb_tokens))) # print("There are {} distinct words in the vocabulary".format(str(index.size_voc()))) else: raise ValueError("Not a collection allowed") type_search = input("Type of search ? (boolean/vector) : ") user_request = input("Search : ") while user_request != "Stop": if type_search == "boolean": start_time = time.time() current_search = SearchBoolean(user_request) result_list = current_search.do_search(index) Search.display_docs(result_list, type_search) print("Results in %s seconds ---" % (time.time() - start_time)) elif type_search == "vector": start_time = time.time() current_search = SearchVector(user_request) result_list = current_search.do_search(index, 20) Search.display_docs(result_list, type_search) print("Results in %s seconds ---" % (time.time() - start_time)) type_search = input("Type of search ? (boolean/vector) : ") user_request = input("Search : ")
def main(): parser = argparse.ArgumentParser() subparser = parser.add_subparsers(dest='subparser') lexicon_argparser = subparser.add_parser("generate_lexicon") lexicon_argparser.add_argument('--b_range', type=str, help="Batches numbers range start and end creating/updating lexicon from. For example 1,3") lexicon_argparser.add_argument('--d', type=int, default=0, help="Print demo results.") forward_index_argparser = subparser.add_parser("generate_forward_index") forward_index_argparser.add_argument('--b_range', type=str, help="Batches numbers range start and end creating/updating forward index from. For example 1,3") forward_index_argparser.add_argument('--d', type=int, default=0, help="Print demo results.") inverted_index_argparser = subparser.add_parser("generate_inverted_index") inverted_index_argparser.add_argument('--b', type=str, help="Forward Index Batches to create inverted_index from. Comma Separated.") inverted_index_argparser.add_argument('--d', type=int, default=0, help="Print demo results.") search = subparser.add_parser("search") search.add_argument("--q", type=str, help="Search Query.") args = parser.parse_args() if args.subparser == 'generate_lexicon': batche_range = list(map(int, args.b_range.split(","))) generate_lexicon.main(*batche_range, demo=args.d) elif args.subparser == 'generate_forward_index': batche_range = list(map(int, args.b_range.split(","))) generate_forward_index.main(*batche_range, demo=args.d) elif args.subparser == 'generate_inverted_index': batches = args.b.split(',') generate_inverted_index.main(batches, demo=args.d) elif args.subparser == 'search': lexicon = Lexicon(config.LEXICON_PATH) inverted_index = InvertedIndex(config.INVERTED_INDEX_BARRELS_PATH, config.INVERTED_INDEX_BARRELS_TEMP_PATH, len(lexicon), config.INVERTED_INDEX_BARREL_SIZE) search = Search(lexicon, inverted_index) print(search.search(args.q))
def search(request): query = request.GET.get('q') # default to an empty string if q not present search = Search() results = search.get_results(query=query, model=Line) return render(request, template_name='search.html', context={ 'query': query, 'results': results })
def list(request): query = request.GET.get('q', None) anonymous = request.GET.get('anonymous', False) owner = None if not anonymous: owner = Account.get_by_user(request.user) try: search = Search(query, owner) search.process() except SearchException: pass notify_count = Notification.objects.filter(owner__user=request.user, viewed=False).count() return render(request, 'search/list.html', {'search': search, 'notify_count': notify_count})
def api_getconnection(): """api for creating connection""" if rq.method == 'POST': user_id = rq.form['user_id'] password = rq.form['password'] search_keyword = rq.form['search_keyword'] response = requests.get('https://api.github.com', auth=(user_id, password)) status = response.headers['Status'] if status == '401 Unauthorized' or status == 'Forbidden': app.logger.info('failed to log in.... Please try again') msg = json.dumps({"status": 401, "authorization": "False"}) res = Response(msg, status=201, mimetype='application.json') return res else: owner_repositories = Search.search(search_keyword) owner_repositories_dict = {} owner_repositories_dict = owner_repositories connection_response = json.dumps({ "status": 200, "authorization": "True", "owner_repositories_list": owner_repositories_dict }) app.logger.info('logged in successfully') res = Response(connection_response, status=201, mimetype='application.json') return res
def conceal(tweet_file, config, endword_index=False): dicts = dictionaries.load_dictionaries(config) print 'keywords (x) = ', config.x print 'Essence len = ', config.essence_len distillery = Distillery(config.essence_len, dicts.keywords) search_engine = Search() raw_data_words = open(tweets_path + tweet_file).read().split() data_words = [ keyword for word in raw_data_words for keyword in dicts.english["".join( c for c in word.lower() if c not in ('!', '.', ':', ',', '?', '"', '-'))] ] if endword_index: words = [dicts.keywords[endword_index]] * config.w else: words = [dicts.keywords[config.x - 1]] * config.w collected_words = [(words, '', '')] stats = WordsStats(config, tweet_file, collected_words) try: while True: # Avoid inserting 3rd link word in data # iteration_type = len(collected_words) % 10 iteration_type = 1 if iteration_type == 0: insert_link_word_in_d = True choose_new_link_word = False elif iteration_type == 1: first_link_word = 'This string is ignored' insert_link_word_in_d = False choose_new_link_word = True else: insert_link_word_in_d = False choose_new_link_word = False words, link, first_link_word = conceal_step( data_words, words, first_link_word, insert_link_word_in_d, choose_new_link_word, search_engine, distillery, dicts, stats) collected_words.append(( words, link, first_link_word, )) if not data_words: break except Exception: print(traceback.format_exc()) t, v, tb = sys.exc_info() # distillery.browser.close() raise t, v, tb print "collected words are: %s" % collected_words return collected_words
def search_results(request): ''' I search for substitute to the product requested. ''' form = RequestForm(request.GET) if form.is_valid(): query = form.cleaned_data['user_request'] else: return render(request, 'search/results.html', {'product': "None"}) search = Search() result_infos = [] search_product = search.find_product(query) if not search_product: return render(request, 'search/results.html', {'product': "None"}) else: product = search.product_infos(search_product) for categorie in product['categories']: result_info = search.find_substitute(search_product) result_infos.extend(search.result_infos(result_info)) result_infos = [ i for n, i in enumerate(result_infos) if i not in result_infos[n + 1:] ] return render(request, 'search/results.html', { 'product': product, 'results': result_infos })
def fileresult(): """api for file level features""" if request.method == 'POST': search_keyword = request.form['search_keyword'] FETCH_OBJ = Fetch_file() owner_repositories = Search.search(search_keyword) for i in owner_repositories: owner_name = i["owner_name"] repository_name = i["repository_name"] FETCH_OBJ.json_to_csv(owner_name, repository_name) msg = json.dumps({"status": 200, "state": "files level Csv is being Constructed"}) res = Response(msg, status=201, mimetype='application/json') return res
async def search( q: str = Query(None, max_length=280), page: Optional[int] = Query( None, ge=eval(cfg["search"]["pg_range"])["ge"], le=eval(cfg["search"]["pg_range"])["le"], ), ): return Search()._query(q, page) # Re-queries and populates database at scheduled time # Use cron expression to set refresh rate @aiocron.crontab(cfg["CRAWLER"]["refresh_rate"])
def test_json_to_csv_conversion(self, mock_search, mock_fetching_data): '''testing multiple_repository_to_dataframe''' mock_res1 = [{'owner_name': 'd3', 'repository_name': 'd3'}] type(mock_search).return_value = mock.PropertyMock( return_value=mock_res1) mock_res2 = { 'repository_name': 'd3', 'pull_numbers': 17, 'open_pr_time': 119576.0, 'open_pull_request': 3, 'forks_count': 371, 'commits': 4, 'changed_files': 4, 'pushed_time': 54987.0, 'watchers_count': 6525, 'open_issue_count': 556, 'pull_request_acceptance_rate': 66.66666, 'contributor_acceptance_rate': 20.5, 'size': 2, 'changes': 2, 'state': 'Accepted' } type(mock_fetching_data).return_value = mock.PropertyMock( return_value=mock_res2) mock_res3 = [{'owner_name': 'd3', 'repository_name': 'd3'}] type(mock_search).return_value = mock.PropertyMock( return_value=mock_res3) response_push = None config = Utils().get_config_file('config.ini') search_keyword = config.get('Search', 'search_keyword') owner_repositories = Search.search(search_keyword) for i in owner_repositories: owner_name = i["owner_name"] repository_name = i["repository_name"] response_push = Fetch().json_to_csv_conversion( owner_name, repository_name) self.assertListEqual(list(response_push.columns), list(mock_res2))
""" Starting point of program... """ __author__ = 'Acko' import os from search.search import Search from utils.postfix_parser import InvalidInput, QuitRequest if __name__ == '__main__': initial_path = raw_input("Unesite putanju do baze: ") Search.print_instruction() print 'Ucitavanje. Molim vas sacekajte...' s = Search(os.path.abspath(initial_path)) while True: try: s.find_expression(raw_input("Unesite reci za pretragu (ili 'QUIT' za izlaz): ")) except InvalidInput: print "Pogresno unet zahtev, pokusajte ponovo." except QuitRequest: break print 'Dovidjenja'
'''Entry point for Search class''' from search.search import Search from utils.utils import Utils if __name__ == '__main__': UTILS = Utils() CONFIG = UTILS.get_config_file('config.ini') SEARCH_KEYWORD = CONFIG.get('Search', 'search_keyword') UTILS.user_path() SEARCH = Search() SEARCH.search(search_keyword=SEARCH_KEYWORD)
Function views 1. Add an import: from my_app import views 2. Add a URL to urlpatterns: path('', views.home, name='home') Class-based views 1. Add an import: from other_app.views import Home 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') Including another URLconf 1. Import the include() function: from django.urls import include, path 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) """ from django.contrib.staticfiles.urls import static, staticfiles_urlpatterns from django.urls import path, include from search.search import Search from sosial import settings urlpatterns = [ path('api/account/', include('accounts.urls')), path('api/posts/', include('posts.urls')), path('api/chanel/', include('chanel.urls')), path('api/comment/', include('comment.urls')), path('api/like/', include('like.urls')), path('api/notify/', include('notify.urls')), path('api/upload/', include('files.urls')), path('api/search/', Search.as_view()), ] if settings.DEBUG: urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) urlpatterns += staticfiles_urlpatterns() urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
# -*- coding: utf-8 -*- # __author__ = "zok" # Date: 2019/2/28 Python: 3.7 from search.search import Search from config import KEY if __name__ == '__main__': """start""" se = Search(KEY) se.start()
from fetching_data.fetching_data import Fetch from utils.utils import Utils from search.search import Search import json if __name__ == '__main__': UTILS_OBJ = Utils() CONFIG = UTILS_OBJ.get_config_file('config.ini') OWNER = CONFIG.get('Repository', 'owner') REPOSITORY_NAME = CONFIG.get('Repository', 'repository_name') UTILS_OBJ.user_path() FETCH_OBJ = Fetch() SEARCH_KEYWORD = CONFIG.get('Search', 'search_keyword') owner_repositories = Search.search(SEARCH_KEYWORD) for i in owner_repositories: owner_name = i["owner_name"] repository_name = i["repository_name"] data_frame = FETCH_OBJ.json_to_csv_conversion(owner_name, repository_name)
from indexing.lexicon import Lexicon from indexing.inverted_index import InvertedIndex from search.search import Search # flask app & Api app = Flask(__name__) api = Api(app) cors = CORS(app) app.config['CORS_HEADERS'] = 'Content-Type' # Indexes lexicon = Lexicon(config.LEXICON_PATH) inverted_index = InvertedIndex(config.INVERTED_INDEX_BARRELS_PATH, config.INVERTED_INDEX_BARRELS_TEMP_PATH, len(lexicon), config.INVERTED_INDEX_BARREL_SIZE) search = Search(lexicon, inverted_index) # for handling searches class Setup(Resource): @cross_origin() def get(self): return render_template('index.html') class Document(Resource): def get(self, doc_id): doc_id = int(doc_id[-7:]) batch = doc_id // 64 + 1