-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.cpp
99 lines (80 loc) · 2.77 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <list>
#include "src/word_treatment.h"
#include "src/vocabulary.h"
#include "src/list_database.h"
#include "src/vector.h"
#include "src/ranking.h"
#define DOC_QUERY ""
using namespace std;
int main() {
cout << "Iniciando maquina de busca..." << endl;
vector <string> file_list = requestArchievs("database");
Vocabulary vocabulary(file_list.size());
for(int i=0; i<file_list.size(); i++) {
cout << "\rLendo arquivos: " << i+1 << "/" << file_list.size();
ifstream file(file_list[i]);
if(file.is_open()) {
string word;
while(file >> word) {
treat(word);
vocabulary.insert(word, file_list[i]);
}
file.close();
} else {
cerr << "Erro ao tentar abrir arquivo: " << file_list[i] << endl;
}
}
cout << endl;
vector <string> words_list = vocabulary.get_words();
cout << "Configuracoes finais..." << endl;
map <string, Vector> docs_coord;
for(int i=0; i<file_list.size(); i++) {
cerr << "\rProcessando: " << (i+1)*100/file_list.size() << "%";
string doc = file_list[i];
Vector doc_vector;
for(auto &word: words_list) {
doc_vector.insert_coord(word, vocabulary.tf(word, doc) * vocabulary.idf(word));
}
docs_coord[doc] = doc_vector;
}
cout << endl;
while(1) {
setbuf(stdin, NULL);
string query;
cout << endl << "O que deseja pesquisar?\n> ";
getline(cin, query);
cout << endl << "Buscando...\n" << endl;
vector <string> query_words = split(query, " ");
Vocabulary query_vocabulary(query_words.size());
for(int i=0; i<query_words.size(); i++) {
treat(query_words[i]);
query_vocabulary.insert(query_words[i], DOC_QUERY);
}
map <string, Vector> query_coord;
Vector query_vector;
for(auto &word: words_list) {
query_vector.insert_coord(word, query_vocabulary.tf(word, DOC_QUERY) * vocabulary.idf(word));
}
list <ranking_cell> ranking;
for(auto &doc: file_list) {
float similarity = docs_coord[doc].cos(query_vector);
if(similarity > 0) {
ranking.push_back(ranking_cell(doc, similarity));
}
}
ranking.sort();
cout << "Encontramos " << ranking.size() << " resultados para a sua busca!" << endl;
if(ranking.size()) {
cout << "Arquivos relacionados a sua pesquisa:" << endl;
for(auto it=ranking.rbegin(); it!=ranking.rend(); it++) {
cout << '\t' << *it << endl;
}
}
cout << endl;
}
return 0;
}