From b2566452a9a15a4ffe8cc724bdfcd379571eb6e9 Mon Sep 17 00:00:00 2001 From: "Stephen (Alex) Wallen" Date: Wed, 29 May 2024 20:55:16 -1000 Subject: [PATCH] refactor: entrypoint of script --- sherlock/sherlock.py | 431 +++++++++++++++++++++++-------------------- 1 file changed, 231 insertions(+), 200 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index cbd258667..bd1a531a5 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -158,7 +158,17 @@ def sherlock( username, site_data, query_notify, - tor=False, + tor:bool=False, + no_color:bool=False, + output=None, + folderoutput=None, + local=False, + nsfw=False, + json_file=None, + site_list=[], + print_all=False, + print_found=True, + xlsx=False, unique_tor=False, proxy=None, timeout=60, @@ -194,6 +204,206 @@ def sherlock( there was an HTTP error when checking for existence. """ + # Argument check + # TODO regex check on args.proxy + if tor and (proxy is not None): + raise Exception("Tor and Proxy cannot be set at the same time.") + + # Make prompts + if proxy is not None: + print("Using the proxy: " + proxy) + + if tor or unique_tor: + print("Using Tor to make requests") + + print( + "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors." + ) + + if no_color: + # Disable color output. + init(strip=True, convert=False) + else: + # Enable color output. + init(autoreset=True) + + # Check if both output methods are entered as input. + if output is not None and folderoutput is not None: + print("You can only use one of the output methods.") + sys.exit(1) + + # Check validity for single username output. + if output is not None and len(username) != 1: + print("You can only use --output with a single username") + sys.exit(1) + + # Create object with all information about sites we are aware of. + try: + if local: + sites = SitesInformation( + os.path.join(os.path.dirname(__file__), "resources/data.json") + ) + else: + sites = SitesInformation(json_file) + except Exception as error: + print(f"ERROR: {error}") + sys.exit(1) + + if not nsfw: + sites.remove_nsfw_sites(do_not_remove=site_list) + + # Create original dictionary from SitesInformation() object. + # Eventually, the rest of the code will be updated to use the new object + # directly, but this will glue the two pieces together. + site_data_all = {site.name: site.information for site in sites} + if site_list == []: + # Not desired to look at a sub-set of sites + site_data = site_data_all + else: + # User desires to selectively run queries on a sub-set of the site list. + # Make sure that the sites are supported & build up pruned site database. + site_data = {} + site_missing = [] + for site in site_list: + counter = 0 + for existing_site in site_data_all: + if site.lower() == existing_site.lower(): + site_data[existing_site] = site_data_all[existing_site] + counter += 1 + if counter == 0: + # Build up list of sites not supported for future error message. + site_missing.append(f"'{site}'") + + if site_missing: + print(f"Error: Desired sites not found: {', '.join(site_missing)}.") + + if not site_data: + sys.exit(1) + + # Run report on all specified users. + all_usernames = [] + for username in username: + if check_for_parameter(username): + for name in multiple_usernames(username): + all_usernames.append(name) + else: + all_usernames.append(username) + for username in all_usernames: + results = sherlock( + username, + site_data, + query_notify, + tor=tor, + unique_tor=unique_tor, + proxy=proxy, + timeout=timeout, + ) + + if output: + result_file = output + elif folderoutput: + # The usernames results should be stored in a targeted folder. + # If the folder doesn't exist, create it first + os.makedirs(folderoutput, exist_ok=True) + result_file = os.path.join(folderoutput, f"{username}.txt") + else: + result_file = f"{username}.txt" + + with open(result_file, "w", encoding="utf-8") as file: + exists_counter = 0 + for website_name in results: + dictionary = results[website_name] + if dictionary.get("status").status == QueryStatus.CLAIMED: + exists_counter += 1 + file.write(dictionary["url_user"] + "\n") + file.write(f"Total Websites Username Detected On : {exists_counter}\n") + + if csv: + result_file = f"{username}.csv" + if folderoutput: + # The usernames results should be stored in a targeted folder. + # If the folder doesn't exist, create it first + os.makedirs(folderoutput, exist_ok=True) + result_file = os.path.join(folderoutput, result_file) + + with open(result_file, "w", newline="", encoding="utf-8") as csv_report: + writer = csv.writer(csv_report) + writer.writerow( + [ + "username", + "name", + "url_main", + "url_user", + "exists", + "http_status", + "response_time_s", + ] + ) + for site in results: + if ( + print_found + and not print_all + and results[site]["status"].status != QueryStatus.CLAIMED + ): + continue + + response_time_s = results[site]["status"].query_time + if response_time_s is None: + response_time_s = "" + writer.writerow( + [ + username, + site, + results[site]["url_main"], + results[site]["url_user"], + str(results[site]["status"].status), + results[site]["http_status"], + response_time_s, + ] + ) + if xlsx: + usernames = [] + names = [] + url_main = [] + url_user = [] + exists = [] + http_status = [] + response_time_s = [] + + for site in results: + if ( + print_found + and not print_all + and results[site]["status"].status != QueryStatus.CLAIMED + ): + continue + + if response_time_s is None: + response_time_s.append("") + else: + response_time_s.append(results[site]["status"].query_time) + usernames.append(username) + names.append(site) + url_main.append(results[site]["url_main"]) + url_user.append(results[site]["url_user"]) + exists.append(str(results[site]["status"].status)) + http_status.append(results[site]["http_status"]) + + DataFrame = pd.DataFrame( + { + "username": usernames, + "name": names, + "url_main": url_main, + "url_user": url_user, + "exists": exists, + "http_status": http_status, + "response_time_s": response_time_s, + } + ) + DataFrame.to_excel(f"{username}.xlsx", sheet_name="sheet1", index=False) + + print() + # Notify caller that we are starting the query. query_notify.start(username) # Create session based on request methodology @@ -685,212 +895,33 @@ def main(): except Exception as error: print(f"A problem occurred while checking for an update: {error}") - # Argument check - # TODO regex check on args.proxy - if args.tor and (args.proxy is not None): - raise Exception("Tor and Proxy cannot be set at the same time.") - - # Make prompts - if args.proxy is not None: - print("Using the proxy: " + args.proxy) - - if args.tor or args.unique_tor: - print("Using Tor to make requests") - - print( - "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors." - ) - - if args.no_color: - # Disable color output. - init(strip=True, convert=False) - else: - # Enable color output. - init(autoreset=True) - - # Check if both output methods are entered as input. - if args.output is not None and args.folderoutput is not None: - print("You can only use one of the output methods.") - sys.exit(1) - - # Check validity for single username output. - if args.output is not None and len(args.username) != 1: - print("You can only use --output with a single username") - sys.exit(1) - - # Create object with all information about sites we are aware of. - try: - if args.local: - sites = SitesInformation( - os.path.join(os.path.dirname(__file__), "resources/data.json") - ) - else: - sites = SitesInformation(args.json_file) - except Exception as error: - print(f"ERROR: {error}") - sys.exit(1) - - if not args.nsfw: - sites.remove_nsfw_sites(do_not_remove=args.site_list) - - # Create original dictionary from SitesInformation() object. - # Eventually, the rest of the code will be updated to use the new object - # directly, but this will glue the two pieces together. - site_data_all = {site.name: site.information for site in sites} - if args.site_list == []: - # Not desired to look at a sub-set of sites - site_data = site_data_all - else: - # User desires to selectively run queries on a sub-set of the site list. - # Make sure that the sites are supported & build up pruned site database. - site_data = {} - site_missing = [] - for site in args.site_list: - counter = 0 - for existing_site in site_data_all: - if site.lower() == existing_site.lower(): - site_data[existing_site] = site_data_all[existing_site] - counter += 1 - if counter == 0: - # Build up list of sites not supported for future error message. - site_missing.append(f"'{site}'") - - if site_missing: - print(f"Error: Desired sites not found: {', '.join(site_missing)}.") - - if not site_data: - sys.exit(1) - # Create notify object for query results. query_notify = QueryNotifyPrint( result=None, verbose=args.verbose, print_all=args.print_all, browse=args.browse ) - # Run report on all specified users. - all_usernames = [] - for username in args.username: - if check_for_parameter(username): - for name in multiple_usernames(username): - all_usernames.append(name) - else: - all_usernames.append(username) - for username in all_usernames: - results = sherlock( - username, - site_data, - query_notify, - tor=args.tor, - unique_tor=args.unique_tor, - proxy=args.proxy, - timeout=args.timeout, - ) - - if args.output: - result_file = args.output - elif args.folderoutput: - # The usernames results should be stored in a targeted folder. - # If the folder doesn't exist, create it first - os.makedirs(args.folderoutput, exist_ok=True) - result_file = os.path.join(args.folderoutput, f"{username}.txt") - else: - result_file = f"{username}.txt" - - with open(result_file, "w", encoding="utf-8") as file: - exists_counter = 0 - for website_name in results: - dictionary = results[website_name] - if dictionary.get("status").status == QueryStatus.CLAIMED: - exists_counter += 1 - file.write(dictionary["url_user"] + "\n") - file.write(f"Total Websites Username Detected On : {exists_counter}\n") - - if args.csv: - result_file = f"{username}.csv" - if args.folderoutput: - # The usernames results should be stored in a targeted folder. - # If the folder doesn't exist, create it first - os.makedirs(args.folderoutput, exist_ok=True) - result_file = os.path.join(args.folderoutput, result_file) - - with open(result_file, "w", newline="", encoding="utf-8") as csv_report: - writer = csv.writer(csv_report) - writer.writerow( - [ - "username", - "name", - "url_main", - "url_user", - "exists", - "http_status", - "response_time_s", - ] - ) - for site in results: - if ( - args.print_found - and not args.print_all - and results[site]["status"].status != QueryStatus.CLAIMED - ): - continue - - response_time_s = results[site]["status"].query_time - if response_time_s is None: - response_time_s = "" - writer.writerow( - [ - username, - site, - results[site]["url_main"], - results[site]["url_user"], - str(results[site]["status"].status), - results[site]["http_status"], - response_time_s, - ] - ) - if args.xlsx: - usernames = [] - names = [] - url_main = [] - url_user = [] - exists = [] - http_status = [] - response_time_s = [] - - for site in results: - if ( - args.print_found - and not args.print_all - and results[site]["status"].status != QueryStatus.CLAIMED - ): - continue - - if response_time_s is None: - response_time_s.append("") - else: - response_time_s.append(results[site]["status"].query_time) - usernames.append(username) - names.append(site) - url_main.append(results[site]["url_main"]) - url_user.append(results[site]["url_user"]) - exists.append(str(results[site]["status"].status)) - http_status.append(results[site]["http_status"]) - - DataFrame = pd.DataFrame( - { - "username": usernames, - "name": names, - "url_main": url_main, - "url_user": url_user, - "exists": exists, - "http_status": http_status, - "response_time_s": response_time_s, - } - ) - DataFrame.to_excel(f"{username}.xlsx", sheet_name="sheet1", index=False) + # Run Sherlock analysis. + sherlock( + args.username, + None, + query_notify, + tor=args.tor, + no_color=args.no_color, + output=args.output, + folderoutput=args.folderoutput, + local=args.local, + nsfw=args.nsfw, + json_file=args.json_file, + site_list=args.site_list, + print_all=args.print_all, + print_found=args.print_found, + xlsx=args.xlsx, + unique_tor=args.unique_tor, + proxy=args.proxy, + timeout=args.timeout, + ) - print() query_notify.finish() - if __name__ == "__main__": main()