From b2566452a9a15a4ffe8cc724bdfcd379571eb6e9 Mon Sep 17 00:00:00 2001
From: "Stephen (Alex) Wallen" <wallenstephen@outlook.com>
Date: Wed, 29 May 2024 20:55:16 -1000
Subject: [PATCH] refactor: entrypoint of script

---
 sherlock/sherlock.py | 431 +++++++++++++++++++++++--------------------
 1 file changed, 231 insertions(+), 200 deletions(-)

diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py
index cbd258667..bd1a531a5 100644
--- a/sherlock/sherlock.py
+++ b/sherlock/sherlock.py
@@ -158,7 +158,17 @@ def sherlock(
     username,
     site_data,
     query_notify,
-    tor=False,
+    tor:bool=False,
+    no_color:bool=False,
+    output=None,
+    folderoutput=None,
+    local=False,
+    nsfw=False,
+    json_file=None,
+    site_list=[],
+    print_all=False,
+    print_found=True,
+    xlsx=False,
     unique_tor=False,
     proxy=None,
     timeout=60,
@@ -194,6 +204,206 @@ def sherlock(
                        there was an HTTP error when checking for existence.
     """
 
+    # Argument check
+    # TODO regex check on args.proxy
+    if tor and (proxy is not None):
+        raise Exception("Tor and Proxy cannot be set at the same time.")
+
+    # Make prompts
+    if proxy is not None:
+        print("Using the proxy: " + proxy)
+
+    if tor or unique_tor:
+        print("Using Tor to make requests")
+
+        print(
+            "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
+        )
+
+    if no_color:
+        # Disable color output.
+        init(strip=True, convert=False)
+    else:
+        # Enable color output.
+        init(autoreset=True)
+
+    # Check if both output methods are entered as input.
+    if output is not None and folderoutput is not None:
+        print("You can only use one of the output methods.")
+        sys.exit(1)
+
+    # Check validity for single username output.
+    if output is not None and len(username) != 1:
+        print("You can only use --output with a single username")
+        sys.exit(1)
+
+    # Create object with all information about sites we are aware of.
+    try:
+        if local:
+            sites = SitesInformation(
+                os.path.join(os.path.dirname(__file__), "resources/data.json")
+            )
+        else:
+            sites = SitesInformation(json_file)
+    except Exception as error:
+        print(f"ERROR:  {error}")
+        sys.exit(1)
+
+    if not nsfw:
+        sites.remove_nsfw_sites(do_not_remove=site_list)
+
+    # Create original dictionary from SitesInformation() object.
+    # Eventually, the rest of the code will be updated to use the new object
+    # directly, but this will glue the two pieces together.
+    site_data_all = {site.name: site.information for site in sites}
+    if site_list == []:
+        # Not desired to look at a sub-set of sites
+        site_data = site_data_all
+    else:
+        # User desires to selectively run queries on a sub-set of the site list.
+        # Make sure that the sites are supported & build up pruned site database.
+        site_data = {}
+        site_missing = []
+        for site in site_list:
+            counter = 0
+            for existing_site in site_data_all:
+                if site.lower() == existing_site.lower():
+                    site_data[existing_site] = site_data_all[existing_site]
+                    counter += 1
+            if counter == 0:
+                # Build up list of sites not supported for future error message.
+                site_missing.append(f"'{site}'")
+
+        if site_missing:
+            print(f"Error: Desired sites not found: {', '.join(site_missing)}.")
+
+        if not site_data:
+            sys.exit(1)
+
+    # Run report on all specified users.
+    all_usernames = []
+    for username in username:
+        if check_for_parameter(username):
+            for name in multiple_usernames(username):
+                all_usernames.append(name)
+        else:
+            all_usernames.append(username)
+    for username in all_usernames:
+        results = sherlock(
+            username,
+            site_data,
+            query_notify,
+            tor=tor,
+            unique_tor=unique_tor,
+            proxy=proxy,
+            timeout=timeout,
+        )
+
+        if output:
+            result_file = output
+        elif folderoutput:
+            # The usernames results should be stored in a targeted folder.
+            # If the folder doesn't exist, create it first
+            os.makedirs(folderoutput, exist_ok=True)
+            result_file = os.path.join(folderoutput, f"{username}.txt")
+        else:
+            result_file = f"{username}.txt"
+
+        with open(result_file, "w", encoding="utf-8") as file:
+            exists_counter = 0
+            for website_name in results:
+                dictionary = results[website_name]
+                if dictionary.get("status").status == QueryStatus.CLAIMED:
+                    exists_counter += 1
+                    file.write(dictionary["url_user"] + "\n")
+            file.write(f"Total Websites Username Detected On : {exists_counter}\n")
+
+        if csv:
+            result_file = f"{username}.csv"
+            if folderoutput:
+                # The usernames results should be stored in a targeted folder.
+                # If the folder doesn't exist, create it first
+                os.makedirs(folderoutput, exist_ok=True)
+                result_file = os.path.join(folderoutput, result_file)
+
+            with open(result_file, "w", newline="", encoding="utf-8") as csv_report:
+                writer = csv.writer(csv_report)
+                writer.writerow(
+                    [
+                        "username",
+                        "name",
+                        "url_main",
+                        "url_user",
+                        "exists",
+                        "http_status",
+                        "response_time_s",
+                    ]
+                )
+                for site in results:
+                    if (
+                        print_found
+                        and not print_all
+                        and results[site]["status"].status != QueryStatus.CLAIMED
+                    ):
+                        continue
+
+                    response_time_s = results[site]["status"].query_time
+                    if response_time_s is None:
+                        response_time_s = ""
+                    writer.writerow(
+                        [
+                            username,
+                            site,
+                            results[site]["url_main"],
+                            results[site]["url_user"],
+                            str(results[site]["status"].status),
+                            results[site]["http_status"],
+                            response_time_s,
+                        ]
+                    )
+        if xlsx:
+            usernames = []
+            names = []
+            url_main = []
+            url_user = []
+            exists = []
+            http_status = []
+            response_time_s = []
+
+            for site in results:
+                if (
+                    print_found
+                    and not print_all
+                    and results[site]["status"].status != QueryStatus.CLAIMED
+                ):
+                    continue
+
+                if response_time_s is None:
+                    response_time_s.append("")
+                else:
+                    response_time_s.append(results[site]["status"].query_time)
+                usernames.append(username)
+                names.append(site)
+                url_main.append(results[site]["url_main"])
+                url_user.append(results[site]["url_user"])
+                exists.append(str(results[site]["status"].status))
+                http_status.append(results[site]["http_status"])
+
+            DataFrame = pd.DataFrame(
+                {
+                    "username": usernames,
+                    "name": names,
+                    "url_main": url_main,
+                    "url_user": url_user,
+                    "exists": exists,
+                    "http_status": http_status,
+                    "response_time_s": response_time_s,
+                }
+            )
+            DataFrame.to_excel(f"{username}.xlsx", sheet_name="sheet1", index=False)
+
+        print()
+
     # Notify caller that we are starting the query.
     query_notify.start(username)
     # Create session based on request methodology
@@ -685,212 +895,33 @@ def main():
     except Exception as error:
         print(f"A problem occurred while checking for an update: {error}")
 
-    # Argument check
-    # TODO regex check on args.proxy
-    if args.tor and (args.proxy is not None):
-        raise Exception("Tor and Proxy cannot be set at the same time.")
-
-    # Make prompts
-    if args.proxy is not None:
-        print("Using the proxy: " + args.proxy)
-
-    if args.tor or args.unique_tor:
-        print("Using Tor to make requests")
-
-        print(
-            "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors."
-        )
-
-    if args.no_color:
-        # Disable color output.
-        init(strip=True, convert=False)
-    else:
-        # Enable color output.
-        init(autoreset=True)
-
-    # Check if both output methods are entered as input.
-    if args.output is not None and args.folderoutput is not None:
-        print("You can only use one of the output methods.")
-        sys.exit(1)
-
-    # Check validity for single username output.
-    if args.output is not None and len(args.username) != 1:
-        print("You can only use --output with a single username")
-        sys.exit(1)
-
-    # Create object with all information about sites we are aware of.
-    try:
-        if args.local:
-            sites = SitesInformation(
-                os.path.join(os.path.dirname(__file__), "resources/data.json")
-            )
-        else:
-            sites = SitesInformation(args.json_file)
-    except Exception as error:
-        print(f"ERROR:  {error}")
-        sys.exit(1)
-
-    if not args.nsfw:
-        sites.remove_nsfw_sites(do_not_remove=args.site_list)
-
-    # Create original dictionary from SitesInformation() object.
-    # Eventually, the rest of the code will be updated to use the new object
-    # directly, but this will glue the two pieces together.
-    site_data_all = {site.name: site.information for site in sites}
-    if args.site_list == []:
-        # Not desired to look at a sub-set of sites
-        site_data = site_data_all
-    else:
-        # User desires to selectively run queries on a sub-set of the site list.
-        # Make sure that the sites are supported & build up pruned site database.
-        site_data = {}
-        site_missing = []
-        for site in args.site_list:
-            counter = 0
-            for existing_site in site_data_all:
-                if site.lower() == existing_site.lower():
-                    site_data[existing_site] = site_data_all[existing_site]
-                    counter += 1
-            if counter == 0:
-                # Build up list of sites not supported for future error message.
-                site_missing.append(f"'{site}'")
-
-        if site_missing:
-            print(f"Error: Desired sites not found: {', '.join(site_missing)}.")
-
-        if not site_data:
-            sys.exit(1)
-
     # Create notify object for query results.
     query_notify = QueryNotifyPrint(
         result=None, verbose=args.verbose, print_all=args.print_all, browse=args.browse
     )
 
-    # Run report on all specified users.
-    all_usernames = []
-    for username in args.username:
-        if check_for_parameter(username):
-            for name in multiple_usernames(username):
-                all_usernames.append(name)
-        else:
-            all_usernames.append(username)
-    for username in all_usernames:
-        results = sherlock(
-            username,
-            site_data,
-            query_notify,
-            tor=args.tor,
-            unique_tor=args.unique_tor,
-            proxy=args.proxy,
-            timeout=args.timeout,
-        )
-
-        if args.output:
-            result_file = args.output
-        elif args.folderoutput:
-            # The usernames results should be stored in a targeted folder.
-            # If the folder doesn't exist, create it first
-            os.makedirs(args.folderoutput, exist_ok=True)
-            result_file = os.path.join(args.folderoutput, f"{username}.txt")
-        else:
-            result_file = f"{username}.txt"
-
-        with open(result_file, "w", encoding="utf-8") as file:
-            exists_counter = 0
-            for website_name in results:
-                dictionary = results[website_name]
-                if dictionary.get("status").status == QueryStatus.CLAIMED:
-                    exists_counter += 1
-                    file.write(dictionary["url_user"] + "\n")
-            file.write(f"Total Websites Username Detected On : {exists_counter}\n")
-
-        if args.csv:
-            result_file = f"{username}.csv"
-            if args.folderoutput:
-                # The usernames results should be stored in a targeted folder.
-                # If the folder doesn't exist, create it first
-                os.makedirs(args.folderoutput, exist_ok=True)
-                result_file = os.path.join(args.folderoutput, result_file)
-
-            with open(result_file, "w", newline="", encoding="utf-8") as csv_report:
-                writer = csv.writer(csv_report)
-                writer.writerow(
-                    [
-                        "username",
-                        "name",
-                        "url_main",
-                        "url_user",
-                        "exists",
-                        "http_status",
-                        "response_time_s",
-                    ]
-                )
-                for site in results:
-                    if (
-                        args.print_found
-                        and not args.print_all
-                        and results[site]["status"].status != QueryStatus.CLAIMED
-                    ):
-                        continue
-
-                    response_time_s = results[site]["status"].query_time
-                    if response_time_s is None:
-                        response_time_s = ""
-                    writer.writerow(
-                        [
-                            username,
-                            site,
-                            results[site]["url_main"],
-                            results[site]["url_user"],
-                            str(results[site]["status"].status),
-                            results[site]["http_status"],
-                            response_time_s,
-                        ]
-                    )
-        if args.xlsx:
-            usernames = []
-            names = []
-            url_main = []
-            url_user = []
-            exists = []
-            http_status = []
-            response_time_s = []
-
-            for site in results:
-                if (
-                    args.print_found
-                    and not args.print_all
-                    and results[site]["status"].status != QueryStatus.CLAIMED
-                ):
-                    continue
-
-                if response_time_s is None:
-                    response_time_s.append("")
-                else:
-                    response_time_s.append(results[site]["status"].query_time)
-                usernames.append(username)
-                names.append(site)
-                url_main.append(results[site]["url_main"])
-                url_user.append(results[site]["url_user"])
-                exists.append(str(results[site]["status"].status))
-                http_status.append(results[site]["http_status"])
-
-            DataFrame = pd.DataFrame(
-                {
-                    "username": usernames,
-                    "name": names,
-                    "url_main": url_main,
-                    "url_user": url_user,
-                    "exists": exists,
-                    "http_status": http_status,
-                    "response_time_s": response_time_s,
-                }
-            )
-            DataFrame.to_excel(f"{username}.xlsx", sheet_name="sheet1", index=False)
+    # Run Sherlock analysis.
+    sherlock(
+        args.username,
+        None,
+        query_notify,
+        tor=args.tor,
+        no_color=args.no_color,
+        output=args.output,
+        folderoutput=args.folderoutput,
+        local=args.local,
+        nsfw=args.nsfw,
+        json_file=args.json_file,
+        site_list=args.site_list,
+        print_all=args.print_all,
+        print_found=args.print_found,
+        xlsx=args.xlsx,
+        unique_tor=args.unique_tor,
+        proxy=args.proxy,
+        timeout=args.timeout,
+    )
 
-        print()
     query_notify.finish()
 
-
 if __name__ == "__main__":
     main()