Skip to content

Commit

Permalink
chore(page): fix subdomain/tld domain handling
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Jul 26, 2023
1 parent c3619fd commit c4cf271
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 13 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "website_crawler"
version = "0.8.8"
version = "0.8.9"
authors = ["Jeff Mendez <[email protected]>"]
edition = "2021"
description = "gRPC tokio based web crawler"
Expand All @@ -12,13 +12,13 @@ categories = ["accessibility", "asynchronous"]
include = ["src/*", "build.rs", "proto/*", "LICENSE", "README.md"]

[dependencies]
tokio = { version = "^1.28.2", features = [ "rt-multi-thread", "macros", "sync", "time", "parking_lot" ] }
tokio = { version = "1.29.1", features = [ "rt-multi-thread", "macros", "sync", "time", "parking_lot" ] }
tokio-stream = "0.1.14"
tonic = { version = "0.9.1" }
tonic = { version = "0.9.2" }
prost = "0.11.3"
prost-types = "0.11.2"
reqwest = { version = "0.11.18", features = ["brotli", "gzip", "native-tls-alpn", "socks", "stream"] }
url = "2.3.1"
reqwest = { version = "0.11.18", features = ["deflate", "brotli", "gzip", "native-tls-alpn", "socks", "stream" ] }
url = "2.4.0"
regex = { version = "^1.5.0", optional = true }
hashbrown = { version = "0.13.2" }
log = "0.4.16"
Expand All @@ -29,7 +29,7 @@ env_logger = "0.9.0"
string_concat = "0.0.1"
sitemap = "0.4.1"
xml-rs = "0.8.4"
compact_str = "0.7.0"
compact_str = "0.7.1"
selectors = "0.24.0"
tendril = "0.4.3"
ahash = "0.8.3"
Expand Down
11 changes: 5 additions & 6 deletions src/packages/spider/page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,12 @@ lazy_static! {
pub fn domain_name(domain: &Url) -> &str {
let b = unsafe { domain.host_str().unwrap_unchecked() };
let b = b.split('.').collect::<Vec<&str>>();
let bsize = b.len();

if b.len() > 2 {
b[1]
} else if b.len() == 2 {
b[0]
} else {
b[b.len() - 2]
if bsize > 0 {
b[bsize - 1]
} else {
""
}
}

Expand Down

0 comments on commit c4cf271

Please sign in to comment.