From 198798d28bec5f3a60232d7cca545102eb80caf5 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 6 May 2024 00:09:00 -0400 Subject: [PATCH 1/8] Add basic schema --- sherlock/resources/data.json | 11 ++---- sherlock/resources/data.schema.json | 60 +++++++++++++++++++++++++++++ sherlock/sites.py | 5 +++ site_list.py | 8 +++- 4 files changed, 75 insertions(+), 9 deletions(-) create mode 100644 sherlock/resources/data.schema.json diff --git a/sherlock/resources/data.json b/sherlock/resources/data.json index d698d5261..31c29a3e7 100644 --- a/sherlock/resources/data.json +++ b/sherlock/resources/data.json @@ -1,4 +1,5 @@ { + "$schema": "data.schema.json", "1337x": { "errorMsg": [ "Error something went wrong.", @@ -479,8 +480,7 @@ "url": "https://codeforces.com/profile/{}", "urlMain": "https://codeforces.com/", "urlProbe": "https://codeforces.com/api/user.info?handles={}", - "username_claimed": "tourist", - "username_unclaimed": "noonewouldeverusethis7" + "username_claimed": "tourist" }, "Codepen": { "errorType": "status_code", @@ -1072,8 +1072,7 @@ "errorUrl": "https://irc-galleria.net/users/search?username={}", "url": "https://irc-galleria.net/user/{}", "urlMain": "https://irc-galleria.net/", - "username_claimed": "appas", - "username_unclaimed": "noonewouldeverusethis77" + "username_claimed": "appas" }, "Icons8 Community": { "errorType": "status_code", @@ -1160,7 +1159,6 @@ }, "Jimdo": { "errorType": "status_code", - "noPeriod": "True", "regexCheck": "^[a-zA-Z0-9@_-]$", "url": "https://{}.jimdosite.com", "urlMain": "https://jimdosite.com/", @@ -1334,8 +1332,7 @@ "url": "https://monkeytype.com/profile/{}", "urlMain": "https://monkeytype.com/", "urlProbe": "https://api.monkeytype.com/users/{}/profile", - "username_claimed": "Lost_Arrow", - "username_unclaimed": "noonewouldeverusethis7" + "username_claimed": "Lost_Arrow" }, "Motherless": { "errorMsg": "no longer a member", diff --git a/sherlock/resources/data.schema.json b/sherlock/resources/data.schema.json new file mode 100644 index 000000000..5f0854106 --- /dev/null +++ b/sherlock/resources/data.schema.json @@ -0,0 +1,60 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Sherlock Targets", + "description": "Social media target to probe for existence of usernames", + "type": "object", + "patternProperties": { + "^(?!\\$).*?$": { + "type": "object", + "required": [ "url", "urlMain", "errorType", "username_claimed" ], + "properties": { + "url": { "type": "string" }, + "urlMain": { "type": "string" }, + "urlProbe": { "type": "string" }, + "username_claimed": { "type": "string" }, + "regexCheck": { "type": "string" }, + "isNSFW": { "type": "boolean" }, + "headers": { "type": "object" }, + "request_payload": { "type": "object" }, + "tags": { + "oneOf": [ + { + "type": "string", + "enum": [ "adult", "gaming" ] + }, + { + "type": "array", + "items": { + "type": "string", + "enum": [ "adult", "gaming" ] + } + } + ] + }, + "request_method": { + "type": "string", + "enum": [ "GET", "POST", "HEAD" ] + }, + "errorType": { + "type": "string", + "enum": [ "message", "response_url", "status_code" ] + }, + "errorMsg": { + "oneOf": [ + { "type": "string" }, + { "type": "array", "items": { "type": "string" } } + ] + }, + "errorCode": { + "oneOf": [ + { "type": "integer" }, + { "type": "array", "items": { "type": "integer" } } + ] + }, + "errorUrl": { "type": "string" }, + "response_url": { "type": "string" } + }, + "additionalProperties": false + } + } +} diff --git a/sherlock/sites.py b/sherlock/sites.py index 9bef100ce..009a05c12 100644 --- a/sherlock/sites.py +++ b/sherlock/sites.py @@ -152,6 +152,11 @@ def __init__(self, data_file_path=None): raise FileNotFoundError(f"Problem while attempting to access " f"data file '{data_file_path}'." ) + + try: + site_data.pop('$schema') + except: + pass self.sites = {} diff --git a/site_list.py b/site_list.py index 58a9d681c..b66010dca 100644 --- a/site_list.py +++ b/site_list.py @@ -5,10 +5,14 @@ # Read the data.json file with open("sherlock/resources/data.json", "r", encoding="utf-8") as data_file: - data = json.load(data_file) + data: dict = json.load(data_file) + +# Removes schema-specific keywords for proper processing +social_networks: dict = dict(data) +social_networks.pop('$schema') # Sort the social networks in alphanumeric order -social_networks = sorted(data.items()) +social_networks: list = sorted(social_networks.items()) # Write the list of supported sites to sites.md with open("sites.md", "w") as site_file: From 2da7195ba8e835b1e4e97455da4378668176f837 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 6 May 2024 01:13:20 -0400 Subject: [PATCH 2/8] Add PUT request_method --- sherlock/resources/data.schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock/resources/data.schema.json b/sherlock/resources/data.schema.json index 5f0854106..931255dee 100644 --- a/sherlock/resources/data.schema.json +++ b/sherlock/resources/data.schema.json @@ -33,7 +33,7 @@ }, "request_method": { "type": "string", - "enum": [ "GET", "POST", "HEAD" ] + "enum": [ "GET", "POST", "HEAD", "PUT" ] }, "errorType": { "type": "string", From 4b2f8ad2d074d9546108bf8aeddabc55ba61080f Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 6 May 2024 16:57:14 -0400 Subject: [PATCH 3/8] Bring comments into compliance with schema --- sherlock/resources/data.json | 4 ++-- sherlock/resources/data.schema.json | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/sherlock/resources/data.json b/sherlock/resources/data.json index 31c29a3e7..1c1c28095 100644 --- a/sherlock/resources/data.json +++ b/sherlock/resources/data.json @@ -977,7 +977,7 @@ "username_claimed": "naveennamani877" }, "HackerNews": { - "::::README::::": "First errMsg invalid, second errMsg rate limited. Not ideal. Adjust for better rate limit filtering.", + "__comment__": "First errMsg invalid, second errMsg rate limited. Not ideal. Adjust for better rate limit filtering.", "errorMsg": [ "No such user.", "Sorry." @@ -2265,7 +2265,7 @@ "username_claimed": "blue" }, "YandexMusic": { - "::::README::::": "The first and third errorMsg relate to geo-restrictions and bot detection/captchas.", + "__comment__": "The first and third errorMsg relate to geo-restrictions and bot detection/captchas.", "errorMsg": [ "\u041e\u0448\u0438\u0431\u043a\u0430 404", " Date: Mon, 6 May 2024 18:37:21 -0400 Subject: [PATCH 4/8] Prevent mixing of error values --- sherlock/resources/data.schema.json | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/sherlock/resources/data.schema.json b/sherlock/resources/data.schema.json index a68399a93..a3f270dde 100644 --- a/sherlock/resources/data.schema.json +++ b/sherlock/resources/data.schema.json @@ -18,7 +18,7 @@ "request_payload": { "type": "object" }, "__comment__": { "type": "string", - "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock.", + "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock." }, "tags": { "oneOf": [ @@ -58,6 +58,17 @@ "errorUrl": { "type": "string" }, "response_url": { "type": "string" } }, + "dependencies": { + "errorMsg": { + "properties" : { "errorType": { "const": "message" } } + }, + "errorUrl": { + "properties": { "errorType": { "const": "response_url" } } + }, + "errorCode": { + "properties": { "errorType": { "const": "status_code" } } + } + }, "additionalProperties": false } } From 80d0434bb361072f5863f80beddf9d94f57fcf93 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 6 May 2024 18:49:48 -0400 Subject: [PATCH 5/8] Require keys based on errorType value --- sherlock/resources/data.schema.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sherlock/resources/data.schema.json b/sherlock/resources/data.schema.json index a3f270dde..4e4a22291 100644 --- a/sherlock/resources/data.schema.json +++ b/sherlock/resources/data.schema.json @@ -69,6 +69,12 @@ "properties": { "errorType": { "const": "status_code" } } } }, + "if": { "properties": { "errorType": { "const": "message" } } }, + "then": { "required": [ "errorMsg" ] }, + "else": { + "if": { "properties": { "errorType": { "const": "response_url" } } }, + "then": { "required": [ "errorUrl" ] } + }, "additionalProperties": false } } From e4cbd3d8ad8b0ccb67891160983b28b72fed4221 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 6 May 2024 18:56:53 -0400 Subject: [PATCH 6/8] Add target top-level hint --- sherlock/resources/data.schema.json | 1 + 1 file changed, 1 insertion(+) diff --git a/sherlock/resources/data.schema.json b/sherlock/resources/data.schema.json index 4e4a22291..62418722c 100644 --- a/sherlock/resources/data.schema.json +++ b/sherlock/resources/data.schema.json @@ -6,6 +6,7 @@ "patternProperties": { "^(?!\\$).*?$": { "type": "object", + "description": "User-friendly target name", "required": [ "url", "urlMain", "errorType", "username_claimed" ], "properties": { "url": { "type": "string" }, From 0446779a3c14f93a9491f1be7b29a73d03c29c06 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 6 May 2024 19:25:38 -0400 Subject: [PATCH 7/8] Simplify schema with defs --- sherlock/resources/data.schema.json | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/sherlock/resources/data.schema.json b/sherlock/resources/data.schema.json index 62418722c..f75891347 100644 --- a/sherlock/resources/data.schema.json +++ b/sherlock/resources/data.schema.json @@ -23,17 +23,8 @@ }, "tags": { "oneOf": [ - { - "type": "string", - "enum": [ "adult", "gaming" ] - }, - { - "type": "array", - "items": { - "type": "string", - "enum": [ "adult", "gaming" ] - } - } + { "$ref": "#/$defs/tag" }, + { "type": "array", "items": { "$ref": "#/$defs/tag" } } ] }, "request_method": { @@ -78,5 +69,8 @@ }, "additionalProperties": false } + }, + "$defs": { + "tag": { "type": "string", "enum": [ "adult", "gaming" ] } } } From d118c1c43cb858109b40b260798346d2af00efdf Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 6 May 2024 19:58:40 -0400 Subject: [PATCH 8/8] Disallow other $ keys --- sherlock/resources/data.schema.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sherlock/resources/data.schema.json b/sherlock/resources/data.schema.json index f75891347..4453500c0 100644 --- a/sherlock/resources/data.schema.json +++ b/sherlock/resources/data.schema.json @@ -3,6 +3,9 @@ "title": "Sherlock Targets", "description": "Social media target to probe for existence of usernames", "type": "object", + "properties": { + "$schema": { "type": "string" } + }, "patternProperties": { "^(?!\\$).*?$": { "type": "object", @@ -70,6 +73,7 @@ "additionalProperties": false } }, + "additionalProperties": false, "$defs": { "tag": { "type": "string", "enum": [ "adult", "gaming" ] } }