-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
crawler.proto
27 lines (23 loc) · 991 Bytes
/
crawler.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
syntax = "proto3";
package crawler;
// The web indexer to find links async.
service Crawler {
rpc Scan (ScanRequest) returns (ScanReply) {} // sends feedback async as links found
rpc Crawl (ScanRequest) returns (ScanReply) {} // waits for all links to be found before returning
}
// basic reply message.
message ScanReply {
string message = 1; // message of the scan success.
}
// Request params for crawling generically.
message ScanRequest {
string url = 1; // the base request to start crawling/indexing pages.
uint32 id = 2; // the user id or identifier to track crawl subs.
bool norobots = 3; // ignore respect robots txt file rules.
string agent = 4; // User agent to use when crawling.
bool subdomains = 5; // allow subdomain crawling.
bool tld = 6; // allow tld crawling all . ext.
string proxy = 7; // connect to proxy.
bool sitemap = 8; // extend crawl with sitemap links.
uint64 delay = 9; // the delay throttle between links.
}