From fba6432f40eb9ed95a6d892a796f93210c5064b5 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Wed, 15 May 2024 23:34:37 -0400 Subject: [PATCH 01/55] Switch to Poetry --- docs/README.md | 8 ++-- docs/pyproj/README.md | 26 +++++----- pyproject.toml | 58 +++++++++++++---------- sherlock/notify.py | 2 +- sherlock/sherlock.py | 8 ++-- sherlock/tests/base.py | 6 +-- sherlock/tests/test_multiple_usernames.py | 1 - 7 files changed, 56 insertions(+), 53 deletions(-) diff --git a/docs/README.md b/docs/README.md index fe3bcafc5..9f3c413c9 100644 --- a/docs/README.md +++ b/docs/README.md @@ -27,11 +27,11 @@ [![PyPI - Version](https://img.shields.io/pypi/v/sherlock-project?logo=PyPi&label=PyPI&color=darkgreen)][ext_pypi] [![Docker Image Version](https://img.shields.io/docker/v/sherlock/sherlock?sort=semver&logo=docker&label=Docker&color=darkgreen)][docs_docker] [![homebrew version](https://img.shields.io/homebrew/v/sherlock?logo=Homebrew&color=darkgreen)][ext_brew] -| Method | Command | Notes | +| | Command | Notes | | - | - | - | -| pypi | `pipx install sherlock-project` | `pip` may be used in place of `pipx` | -| brew | `brew install sherlock` | Community supported | -| docker | `docker pull sherlock/sherlock` | | +| PyPI | `pipx install sherlock-project` | `pip` may be used in place of `pipx` | +| Homebrew | `brew install sherlock` | Community supported | +| Docker | `docker pull sherlock/sherlock` | | ### Alternative guides and methods diff --git a/docs/pyproj/README.md b/docs/pyproj/README.md index 446382f5f..f33358a55 100644 --- a/docs/pyproj/README.md +++ b/docs/pyproj/README.md @@ -2,17 +2,12 @@


- +
- Hunt down social media accounts by username across social networks + Hunt down social media accounts by username across 400+ social networks +

+ Additional documentation can be found at our GitHub repository
- Additional documentation can be found on our GitHub repository -
-

- -

- -

## Usage @@ -36,11 +31,12 @@ To search for more than one user: ```bash $ sherlock user1 user2 user3 ``` +
-## Star History +___ - - - - Sherlock Project Star History Chart - +
+

+ + +

diff --git a/pyproject.toml b/pyproject.toml index 7356c0d11..11228285e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,30 +1,26 @@ [build-system] -requires = ["setuptools >= 61.0"] -build-backend = "setuptools.build_meta" +requires = [ "poetry-core>=1.8.0", "poetry-dynamic-versioning>1.0.0,<2.0.0" ] +build-backend = "poetry_dynamic_versioning.backend" -[project.scripts] -sherlock = "sherlock:main" +[tool.poetry-dynamic-versioning] +enable = true +vcs = "git" -[project.urls] -Homepage = "http://sherlock-project.github.io/" -Repository = "https://github.com/sherlock-project/sherlock.git" -Issues = "https://github.com/sherlock-project/sherlock/issues" - -[project] +[tool.poetry] name = "sherlock-project" +version = "0.0.0" +description = "Hunt down social media accounts by username across social networks" +license = "MIT" authors = [ - { name = "Siddharth Dushantha" } + "Siddharth Dushantha" ] maintainers = [ - { name = "Matheus Felipe" }, - { name = "Sondre Karlsen Dyrnes" }, - { name = "Paul Pfeister" } + "Paul Pfeister", + "Matheus Felipe", + "Sondre Karlsen Dyrnes" ] -description = "Hunt down social media accounts by username across social networks" readme = "docs/pyproj/README.md" -# Do not set license to file. Causes issues with rpm packaging for some reason. -license = {text = "MIT"} -dynamic = ["dependencies", "version"] +packages = [ { include = "sherlock" } ] keywords = [ "osint", "reconnaissance", "information gathering" ] classifiers = [ "Development Status :: 5 - Production/Stable", @@ -37,12 +33,24 @@ classifiers = [ "Topic :: Security" ] -[tool.setuptools.dynamic] -dependencies = { file = [ "requirements.txt" ] } -version = { attr = "sherlock.__version__" } -[tool.setuptools] -package-dir = {"" = "sherlock"} +[tool.poetry.urls] +Homepage = "http://sherlock-project.github.io/" +Repository = "https://github.com/sherlock-project/sherlock.git" +Issues = "https://github.com/sherlock-project/sherlock/issues" + +[tool.poetry.dependencies] +python = "^3.8" +certifi = "^2019.6.16" +colorama = "^0.4.1" +PySocks = "^1.7.0" +requests = "^2.22.0" +requests-futures = "^1.0.0" +stem = "^1.8.0" +torrequest = "^0.1.0" +pandas = ">1.0.0" +openpyxl = "^3.0.10" +exrex = "^0.11.0" -[tool.setuptools.package-data] -"*" = ["*.json"] +[tool.poetry.scripts] +sherlock = 'sherlock.sherlock:main' diff --git a/sherlock/notify.py b/sherlock/notify.py index 4af1ff18e..c198fe620 100644 --- a/sherlock/notify.py +++ b/sherlock/notify.py @@ -3,7 +3,7 @@ This module defines the objects for notifying the caller about the results of queries. """ -from result import QueryStatus +from sherlock.result import QueryStatus from colorama import Fore, Style import webbrowser diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 3f7a93391..d48791942 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -21,10 +21,10 @@ from requests_futures.sessions import FuturesSession from torrequest import TorRequest -from result import QueryStatus -from result import QueryResult -from notify import QueryNotifyPrint -from sites import SitesInformation +from sherlock.result import QueryStatus +from sherlock.result import QueryResult +from sherlock.notify import QueryNotifyPrint +from sherlock.sites import SitesInformation from colorama import init from argparse import ArgumentTypeError diff --git a/sherlock/tests/base.py b/sherlock/tests/base.py index de958b9db..204c19c0a 100644 --- a/sherlock/tests/base.py +++ b/sherlock/tests/base.py @@ -6,9 +6,9 @@ import os.path import unittest import sherlock -from result import QueryStatus -from notify import QueryNotify -from sites import SitesInformation +from sherlock.result import QueryStatus +from sherlock.notify import QueryNotify +from sherlock.sites import SitesInformation import warnings diff --git a/sherlock/tests/test_multiple_usernames.py b/sherlock/tests/test_multiple_usernames.py index 418110877..3be803330 100644 --- a/sherlock/tests/test_multiple_usernames.py +++ b/sherlock/tests/test_multiple_usernames.py @@ -1,4 +1,3 @@ -import importlib import unittest import sys sys.path.append('../') From de8ebb1577035bfbf44c7a4b3df537d98abba077 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Wed, 15 May 2024 23:53:37 -0400 Subject: [PATCH 02/55] Relocate unit tests --- docs/CONTRIBUTING.md | 2 +- {sherlock/tests => tests}/__init__.py | 0 {sherlock/tests => tests}/all.py | 0 {sherlock/tests => tests}/base.py | 4 ++-- {sherlock/tests => tests}/test_multiple_usernames.py | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) rename {sherlock/tests => tests}/__init__.py (100%) rename {sherlock/tests => tests}/all.py (100%) rename {sherlock/tests => tests}/base.py (98%) rename {sherlock/tests => tests}/test_multiple_usernames.py (90%) diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 4f1ff4c32..2d3d6733d 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -68,7 +68,7 @@ Sherlock. This invocation hides the progress text that Sherlock normally outputs, and instead shows the verbose output of the tests. ```console -$ cd sherlock/sherlock +# Assumes pwd is respository root $ python3 -m unittest tests.all --verbose ``` diff --git a/sherlock/tests/__init__.py b/tests/__init__.py similarity index 100% rename from sherlock/tests/__init__.py rename to tests/__init__.py diff --git a/sherlock/tests/all.py b/tests/all.py similarity index 100% rename from sherlock/tests/all.py rename to tests/all.py diff --git a/sherlock/tests/base.py b/tests/base.py similarity index 98% rename from sherlock/tests/base.py rename to tests/base.py index 204c19c0a..2146b88b2 100644 --- a/sherlock/tests/base.py +++ b/tests/base.py @@ -5,7 +5,7 @@ import os import os.path import unittest -import sherlock +from sherlock import sherlock from sherlock.result import QueryStatus from sherlock.notify import QueryNotify from sherlock.sites import SitesInformation @@ -30,7 +30,7 @@ def setUp(self): warnings.simplefilter("ignore", ResourceWarning) # Create object with all information about sites we are aware of. - sites = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../resources/data.json")) + sites = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json")) # Create original dictionary from SitesInformation() object. # Eventually, the rest of the code will be updated to use the new object diff --git a/sherlock/tests/test_multiple_usernames.py b/tests/test_multiple_usernames.py similarity index 90% rename from sherlock/tests/test_multiple_usernames.py rename to tests/test_multiple_usernames.py index 3be803330..3e8a307ee 100644 --- a/sherlock/tests/test_multiple_usernames.py +++ b/tests/test_multiple_usernames.py @@ -1,7 +1,7 @@ import unittest import sys sys.path.append('../') -import sherlock as sh +from sherlock import sherlock as sh checksymbols = [] checksymbols = ["_", "-", "."] From b2a69b519895a2acc6926164e3e9b707c184dd22 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Thu, 16 May 2024 00:06:49 -0400 Subject: [PATCH 03/55] Remove setup.[cfg|py] --- setup.cfg | 4 ---- setup.py | 8 -------- 2 files changed, 12 deletions(-) delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 8929bbae7..000000000 --- a/setup.cfg +++ /dev/null @@ -1,4 +0,0 @@ -[metadata] -name = Sherlock -author = Sherlock Project -url = http://sherlock-project.github.io/ diff --git a/setup.py b/setup.py deleted file mode 100644 index 434d3f95e..000000000 --- a/setup.py +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env python3 - -"""Sometimes required for rpm spec - particularly when using pyp2rpm -""" - -from setuptools import setup - -setup() From caed51e268132c4e495ef25d1d09039d93eb2c1f Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Thu, 16 May 2024 00:08:00 -0400 Subject: [PATCH 04/55] Remove rpm spec --- .github/CODEOWNERS | 1 - sherlock-project.spec | 71 ------------------------------------------- 2 files changed, 72 deletions(-) delete mode 100644 sherlock-project.spec diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 76001ae9a..e2a358d90 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -8,4 +8,3 @@ /pyproject.toml @ppfeister @sdushantha /setup.cfg @ppfeister @sdushantha /setup.py @ppfeister -/*.spec @ppfeister diff --git a/sherlock-project.spec b/sherlock-project.spec deleted file mode 100644 index b07dded1a..000000000 --- a/sherlock-project.spec +++ /dev/null @@ -1,71 +0,0 @@ -# Packager: Paul Pfeister (GitHub @ppfeister) -%global source_ref master - -Name: sherlock-project -Version: 0.14.4 -Release: %autorelease -Summary: Hunt down social media accounts by username across social networks - -License: MIT -URL: https://github.com/sherlock-project/sherlock -Source: %{url}/archive/%{source_ref}.tar.gz -# Switch to new Source URL after adoption of tagged releases - -BuildArch: noarch -BuildRequires: python3-devel -BuildRequires: help2man - -%global _description %{expand: -Hunt down social media accounts by username across 400+ social networks and -websites. New targets are tested and implemented regularly. -} - -%description %{_description} - - -%prep -%autosetup -n sherlock-%{source_ref} - - -%generate_buildrequires -%pyproject_buildrequires - - -%build -%pyproject_wheel - - -%install -%pyproject_install -%pyproject_save_files -l sherlock sites result notify __init__ __main__ - -sed -r -i '1{/^#!/d}' '%{buildroot}%{python3_sitelib}/__main__.py' -sed -r -i '1{/^#!/d}' '%{buildroot}%{python3_sitelib}/sherlock.py' - -install -d '%{buildroot}%{_mandir}/man1' -PYTHONPATH='%{buildroot}%{python3_sitelib}' help2man \ - --no-info \ - --version-string='%{version}' \ - --name='%{summary}' \ - --output='%{buildroot}%{_mandir}/man1/sherlock.1' \ - '%{buildroot}%{_bindir}/sherlock' - - -%check -# Tests fail when pwd isn't sherlock. Relative pathing need fix upstream. -cd sherlock -%{py3_test_envvars} %{python3} -m unittest tests.all --verbose -cd .. - - -%files -f %{pyproject_files} -%doc README.md -%{_bindir}/sherlock -%{python3_sitelib}/resources -%pycached %{python3_sitelib}/tests/*.py -%{_mandir}/man1/sherlock.1* - - -%changelog -* Tue May 14 2024 Paul Pfeister 0.14.4-1 -- Initial package. From 44bc8523dd89e8ae38179361526730463558a9c3 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Thu, 16 May 2024 00:08:40 -0400 Subject: [PATCH 05/55] Remove setup.[cfg|py] from codeowners --- .github/CODEOWNERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index e2a358d90..ae6c8b316 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -6,5 +6,3 @@ # Changes made to these items without code owner approval may negatively # impact packaging pipelines. Code owners may need time to verify or adapt. /pyproject.toml @ppfeister @sdushantha -/setup.cfg @ppfeister @sdushantha -/setup.py @ppfeister From 9f58f93562a066c04d5f110f0cde57b0a22a735e Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Thu, 16 May 2024 00:09:23 -0400 Subject: [PATCH 06/55] Remove requirements.txt --- requirements.txt | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index b1dd62522..000000000 --- a/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -certifi>=2019.6.16 -colorama>=0.4.1 -PySocks>=1.7.0 -requests>=2.22.0 -requests-futures>=1.0.0 -stem>=1.8.0 -torrequest>=0.1.0 -pandas>=1.0.0 -openpyxl>=3.0.10 -exrex>=0.11.0 From 6b0995599ac56ed30fe5d228b9b8230965f92dae Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Thu, 16 May 2024 00:16:46 -0400 Subject: [PATCH 07/55] Remove install redirect --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 11228285e..0aaf259e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ maintainers = [ "Sondre Karlsen Dyrnes" ] readme = "docs/pyproj/README.md" -packages = [ { include = "sherlock" } ] +packages = [ { include = "sherlock"} ] keywords = [ "osint", "reconnaissance", "information gathering" ] classifiers = [ "Development Status :: 5 - Production/Stable", From 07227e4a9a87a4b08cce9722401d8539be7d4cce Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Thu, 16 May 2024 02:09:41 -0400 Subject: [PATCH 08/55] Add dynamic versionin support (disabled) --- pyproject.toml | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0aaf259e5..d17374902 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,12 +3,20 @@ requires = [ "poetry-core>=1.8.0", "poetry-dynamic-versioning>1.0.0,<2.0.0" ] build-backend = "poetry_dynamic_versioning.backend" [tool.poetry-dynamic-versioning] -enable = true +enable = false +metadata = false +strict = true vcs = "git" +# Matched tags: +# v0.0.0 followed by optional -rc0 (release candidate) OR -b0 (public beta) +pattern = "^v(?P\\d+(?:\\.\\d+){2})(?:-(?P(?:rc|b)\\d+?))??$" + +[tool.poetry-dynamic-versioning.substitution] +folders = [ { path = "sherlock" } ] [tool.poetry] name = "sherlock-project" -version = "0.0.0" +version = "0.14.4" description = "Hunt down social media accounts by username across social networks" license = "MIT" authors = [ @@ -32,12 +40,13 @@ classifiers = [ "Programming Language :: Python :: 3", "Topic :: Security" ] +homepage = "http://sherlock-project.github.io/" +repository = "https://github.com/sherlock-project/sherlock.git" [tool.poetry.urls] -Homepage = "http://sherlock-project.github.io/" -Repository = "https://github.com/sherlock-project/sherlock.git" -Issues = "https://github.com/sherlock-project/sherlock/issues" +"Bug Reporting" = "https://github.com/sherlock-project/sherlock/issues" +"Contributors" = "https://github.com/sherlock-project/sherlock/graphs/contributors" [tool.poetry.dependencies] python = "^3.8" From af7565ec3d830057773279f12a9abb6521e370da Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Thu, 16 May 2024 02:51:06 -0400 Subject: [PATCH 09/55] Allow higher certifi --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d17374902..1176963bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,7 @@ repository = "https://github.com/sherlock-project/sherlock.git" [tool.poetry.dependencies] python = "^3.8" -certifi = "^2019.6.16" +certifi = ">2019.6.16" colorama = "^0.4.1" PySocks = "^1.7.0" requests = "^2.22.0" From 36c274ec19c9c7e6ce1bdcb888898e54649cce9f Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Thu, 16 May 2024 03:07:31 -0400 Subject: [PATCH 10/55] Remove License classifier --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1176963bd..858681b39 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,6 @@ classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Intended Audience :: Information Technology", - "License :: OSI Approved :: MIT License", "Natural Language :: English", "Operating System :: OS Independent", "Programming Language :: Python :: 3", From e58057820fc486d28c626f1ee5e49ece0d7e5e36 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Thu, 16 May 2024 04:00:05 -0400 Subject: [PATCH 11/55] Downgrade poetry-core --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 858681b39..2f5cfd19c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,7 @@ [build-system] -requires = [ "poetry-core>=1.8.0", "poetry-dynamic-versioning>1.0.0,<2.0.0" ] +requires = [ "poetry-core>=1.6.0", "poetry-dynamic-versioning>1.0.0,<2.0.0" ] build-backend = "poetry_dynamic_versioning.backend" +# poetry-core 1.8 not available in .fc39. Can upgrade to 1.8.0 at .fc39 EOL [tool.poetry-dynamic-versioning] enable = false From 3b713ed0084c5dea7ce985e4bf2a2de77c3c4be3 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Thu, 16 May 2024 23:36:55 -0400 Subject: [PATCH 12/55] Fix cli mod exec import --- sherlock/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock/__main__.py b/sherlock/__main__.py index b10bd6aef..39068a06a 100644 --- a/sherlock/__main__.py +++ b/sherlock/__main__.py @@ -18,5 +18,5 @@ print(f"Sherlock requires Python 3.6+\nYou are using Python {python_version}, which is not supported by Sherlock.") sys.exit(1) - import sherlock + from sherlock import sherlock sherlock.main() From 63a1c1448a19643386ee71867f135978df4f97d4 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Thu, 16 May 2024 23:37:03 -0400 Subject: [PATCH 13/55] Fix version string --- sherlock/sherlock.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index d48791942..e3474d1db 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -510,9 +510,9 @@ def handler(signal_received, frame): def main(): version_string = ( - f"%(prog)s {__version__}\n" - + f"{requests.__description__}: {requests.__version__}\n" - + f"Python: {platform.python_version()}" + f"Sherlock {__version__}\n" + + f"Requests {requests.__version__}\n" + + f"Python {platform.python_version()}" ) parser = ArgumentParser( From ea1c4218382985f5148388d2d27609412c976389 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Thu, 16 May 2024 23:47:10 -0400 Subject: [PATCH 14/55] Update docs with module changes --- docs/README.md | 5 ----- docs/install.md | 19 ++++++++++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/docs/README.md b/docs/README.md index 9f3c413c9..5835de446 100644 --- a/docs/README.md +++ b/docs/README.md @@ -133,15 +133,10 @@ Original Creator - [Siddharth Dushantha](https://github.com/sdushantha) [docs_install]: /docs/install.md [docs_docker]: /docs/install.md#docker -[docs_docker_dockerhub]: /docs/install.md#docker -[docs_docker_compose]: /docs/install.md#using-compose -[docs_docker_source]: /docs/install.md#build-image-from-source-useful-for-contributors [docs_py]: /docs/install.md#python -[docs_py_build]: /docs/install.md#build-python-package-from-source-useful-for-contributors [docs_contrib]: /docs/CONTRIBUTING.md [docs_contrib_adding_targets]: /docs/CONTRIBUTING.md#adding-targets [docs_contrib_removing_targets]: /docs/CONTRIBUTING.md#removing-targets [docs_contrib_restoring_targets]: /docs/CONTRIBUTING.md#restoring-targets [ext_pypi]: https://pypi.org/project/sherlock-project/ [ext_brew]: https://formulae.brew.sh/formula/sherlock - diff --git a/docs/install.md b/docs/install.md index 9813a09c0..0a5069d92 100644 --- a/docs/install.md +++ b/docs/install.md @@ -37,13 +37,22 @@ Python pipx install sherlock-project ``` -### Build python package from source (useful for contributors) +### Build live package from source (useful for contributors) + +Building an editable (or live) package links the entry point to your current directory, rather than to the standard install location. This is often useful when working with the code base, as changes are reflected immediately without reinstallation. ```bash -# pipx is recommended, but pip may suffice if pipx is unavailable -git clone https://github.com/sherlock-project/sherlock.git -cd sherlock -pipx install . +# Assumes repository cloned, and pwd is repository root +pipx install -e . +``` + +### Run package from source (no install) + +If you'd rather not install directly to your system, you can import the module at runtime with `-m`. + +```bash +# Assumes repository cloned, and pwd is repository root +python3 -m sherlock user123 user789 ```

From 7867e26868e87c8a6f4feee23104a05f8cac82cf Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Fri, 17 May 2024 00:19:08 -0400 Subject: [PATCH 15/55] Remove upper limit --- pyproject.toml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2f5cfd19c..f3a8d74ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,16 +50,16 @@ repository = "https://github.com/sherlock-project/sherlock.git" [tool.poetry.dependencies] python = "^3.8" -certifi = ">2019.6.16" -colorama = "^0.4.1" -PySocks = "^1.7.0" -requests = "^2.22.0" -requests-futures = "^1.0.0" -stem = "^1.8.0" -torrequest = "^0.1.0" -pandas = ">1.0.0" -openpyxl = "^3.0.10" -exrex = "^0.11.0" +certifi = ">=2019.6.16" +colorama = ">=0.4.1" +PySocks = ">=1.7.0" +requests = ">=2.22.0" +requests-futures = ">=1.0.0" +stem = ">=1.8.0" +torrequest = ">=0.1.0" +pandas = ">=1.0.0" +openpyxl = ">=3.0.10" +exrex = ">=0.11.0" [tool.poetry.scripts] sherlock = 'sherlock.sherlock:main' From 8fee9a971437eb55334fb720380f0b5b01e00e24 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Fri, 17 May 2024 20:02:14 -0400 Subject: [PATCH 16/55] Ignore Poetry --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 4d47421ae..c91732b47 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,9 @@ -# Virtual Environment +# Virtual Environments venv/ bin/ lib/ pyvenv.cfg +poetry.lock # Editor Configurations .vscode/ From b485001fcb7b99c6eb76334693c4a32d54d78f89 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Fri, 17 May 2024 20:57:37 -0400 Subject: [PATCH 17/55] Switch versioning plugin --- pyproject.toml | 19 +++++-------------- sherlock/__init__.py | 4 ++++ sherlock/__main__.py | 4 ++-- sherlock/sherlock.py | 20 ++++++++++++++------ 4 files changed, 25 insertions(+), 22 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f3a8d74ba..04844acee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,23 +1,14 @@ [build-system] -requires = [ "poetry-core>=1.6.0", "poetry-dynamic-versioning>1.0.0,<2.0.0" ] -build-backend = "poetry_dynamic_versioning.backend" +requires = [ "poetry-core>=1.2.0" ] +build-backend = "poetry.core.masonry.api" # poetry-core 1.8 not available in .fc39. Can upgrade to 1.8.0 at .fc39 EOL -[tool.poetry-dynamic-versioning] -enable = false -metadata = false -strict = true -vcs = "git" -# Matched tags: -# v0.0.0 followed by optional -rc0 (release candidate) OR -b0 (public beta) -pattern = "^v(?P\\d+(?:\\.\\d+){2})(?:-(?P(?:rc|b)\\d+?))??$" - -[tool.poetry-dynamic-versioning.substitution] -folders = [ { path = "sherlock" } ] +[tool.poetry-version-plugin] +source = "init" [tool.poetry] name = "sherlock-project" -version = "0.14.4" +version = "0" # single source of truth is __version__.py description = "Hunt down social media accounts by username across social networks" license = "MIT" authors = [ diff --git a/sherlock/__init__.py b/sherlock/__init__.py index 219dcaec7..5bf79e243 100644 --- a/sherlock/__init__.py +++ b/sherlock/__init__.py @@ -4,3 +4,7 @@ networks. """ + +__shortname__ = "Sherlock" +__longname__ = "Sherlock: Find Usernames Across Social Networks" +__version__ = "0.14.4" diff --git a/sherlock/__main__.py b/sherlock/__main__.py index 39068a06a..014d8b353 100644 --- a/sherlock/__main__.py +++ b/sherlock/__main__.py @@ -14,8 +14,8 @@ # Check if the user is using the correct version of Python python_version = sys.version.split()[0] - if sys.version_info < (3, 6): - print(f"Sherlock requires Python 3.6+\nYou are using Python {python_version}, which is not supported by Sherlock.") + if sys.version_info < (3, 8): + print(f"Sherlock requires Python 3.8+\nYou are using Python {python_version}, which is not supported by Sherlock.") sys.exit(1) from sherlock import sherlock diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index e3474d1db..9846cb5a5 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -19,6 +19,17 @@ import requests +# Removing __version__ here will trigger update message for users +# Do not remove until ready to trigger that message +__version__ = "0.14.4" +del __version__ + +from .__init__ import ( + __shortname__, + __longname__, + __version__ +) + from requests_futures.sessions import FuturesSession from torrequest import TorRequest from sherlock.result import QueryStatus @@ -28,9 +39,6 @@ from colorama import init from argparse import ArgumentTypeError -module_name = "Sherlock: Find Usernames Across Social Networks" -__version__ = "0.14.4" - class SherlockFuturesSession(FuturesSession): def request(self, method, url, hooks=None, *args, **kwargs): @@ -510,14 +518,14 @@ def handler(signal_received, frame): def main(): version_string = ( - f"Sherlock {__version__}\n" + f"{__shortname__} {__version__}\n" + f"Requests {requests.__version__}\n" - + f"Python {platform.python_version()}" + + f"Python {platform.python_version()}" ) parser = ArgumentParser( formatter_class=RawDescriptionHelpFormatter, - description=f"{module_name} (Version {__version__})", + description=f"{__longname__} (Version {__version__})", ) parser.add_argument( "--version", From 41f798a34e3e807d6a8d7b00cd6469b24bd4e442 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Fri, 17 May 2024 21:29:52 -0400 Subject: [PATCH 18/55] Ignore dist --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c91732b47..c53e34ad9 100644 --- a/.gitignore +++ b/.gitignore @@ -15,8 +15,9 @@ __pycache__/ # Pip src/ -# Pip / PyProject Devel & Installation +# Devel, Build, and Installation *.egg-info/ +dist/** # Jupyter Notebook .ipynb_checkpoints From 33bbb4e720a92e496fcf46f900476c75f2339123 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 18 May 2024 00:35:24 -0400 Subject: [PATCH 19/55] Fix pyproject URLs Co-authored-by: Matheus Felipe --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 04844acee..3b396b315 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,8 +31,8 @@ classifiers = [ "Programming Language :: Python :: 3", "Topic :: Security" ] -homepage = "http://sherlock-project.github.io/" -repository = "https://github.com/sherlock-project/sherlock.git" +homepage = "https://sherlock-project.github.io/" +repository = "https://github.com/sherlock-project/sherlock" [tool.poetry.urls] From 16e6ee639b1cdc9e5b045724d5b8140ac92e36e4 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 18 May 2024 01:01:37 -0400 Subject: [PATCH 20/55] Add email to maintainers Co-authored-by: Matheus Felipe --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3b396b315..db760506c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,9 +15,9 @@ authors = [ "Siddharth Dushantha" ] maintainers = [ - "Paul Pfeister", - "Matheus Felipe", - "Sondre Karlsen Dyrnes" + "Paul Pfeister ", + "Matheus Felipe ", + "Sondre Karlsen Dyrnes " ] readme = "docs/pyproj/README.md" packages = [ { include = "sherlock"} ] From 606743b99d7d8b42b40eefdfea4d2d233d305bb9 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 18 May 2024 01:29:11 -0400 Subject: [PATCH 21/55] Remove tor --- sherlock/sherlock.py | 58 +++----------------------------------------- tests/base.py | 4 --- 2 files changed, 4 insertions(+), 58 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 9846cb5a5..7d894246c 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -31,7 +31,6 @@ ) from requests_futures.sessions import FuturesSession -from torrequest import TorRequest from sherlock.result import QueryStatus from sherlock.result import QueryResult from sherlock.notify import QueryNotifyPrint @@ -167,8 +166,6 @@ def sherlock( username, site_data, query_notify, - tor=False, - unique_tor=False, proxy=None, timeout=60, ): @@ -183,8 +180,6 @@ def sherlock( query_notify -- Object with base type of QueryNotify(). This will be used to notify the caller about query results. - tor -- Boolean indicating whether to use a tor circuit for the requests. - unique_tor -- Boolean indicating whether to use a new tor circuit for each request. proxy -- String indicating the proxy URL timeout -- Time in seconds to wait before timing out request. Default is 60 seconds. @@ -205,20 +200,10 @@ def sherlock( # Notify caller that we are starting the query. query_notify.start(username) - # Create session based on request methodology - if tor or unique_tor: - # Requests using Tor obfuscation - try: - underlying_request = TorRequest() - except OSError: - print("Tor not found in system path. Unable to continue.\n") - sys.exit(query_notify.finish()) - underlying_session = underlying_request.session - else: - # Normal requests - underlying_session = requests.session() - underlying_request = requests.Request() + # Normal requests + underlying_session = requests.session() + underlying_request = requests.Request() # Limit number of workers to 20. # This is probably vastly overkill. @@ -342,15 +327,10 @@ def sherlock( # Store future in data for access later net_info["request_future"] = future - # Reset identify for tor (if needed) - if unique_tor: - underlying_request.reset_identity() - # Add this site's results into final dictionary with all the other results. results_total[social_network] = results_site # Open the file containing account links - # Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses for social_network, net_info in site_data.items(): # Retrieve results again results_site = results_total.get(social_network) @@ -554,23 +534,7 @@ def main(): "-o", dest="output", help="If using single username, the output of the result will be saved to this file.", - ) - parser.add_argument( - "--tor", - "-t", - action="store_true", - dest="tor", - default=False, - help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.", - ) - parser.add_argument( - "--unique-tor", - "-u", - action="store_true", - dest="unique_tor", - default=False, - help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.", - ) + ), parser.add_argument( "--csv", action="store_true", @@ -694,22 +658,10 @@ def main(): except Exception as error: print(f"A problem occurred while checking for an update: {error}") - # Argument check - # TODO regex check on args.proxy - if args.tor and (args.proxy is not None): - raise Exception("Tor and Proxy cannot be set at the same time.") - # Make prompts if args.proxy is not None: print("Using the proxy: " + args.proxy) - if args.tor or args.unique_tor: - print("Using Tor to make requests") - - print( - "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors." - ) - if args.no_color: # Disable color output. init(strip=True, convert=False) @@ -788,8 +740,6 @@ def main(): username, site_data, query_notify, - tor=args.tor, - unique_tor=args.unique_tor, proxy=args.proxy, timeout=args.timeout, ) diff --git a/tests/base.py b/tests/base.py index 2146b88b2..bda809948 100644 --- a/tests/base.py +++ b/tests/base.py @@ -51,8 +51,6 @@ def setUp(self): # Create notify object for query results. self.query_notify = QueryNotify() - self.tor = False - self.unique_tor = False self.timeout = None self.skip_error_sites = True @@ -115,8 +113,6 @@ def username_check(self, username_list, site_list, exist_check=True): results = sherlock.sherlock(username, site_data, self.query_notify, - tor=self.tor, - unique_tor=self.unique_tor, timeout=self.timeout ) for site, result in results.items(): From 4b7fd8b59dd134f116f81544a4f7b01f149bf1a9 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 18 May 2024 02:22:18 -0400 Subject: [PATCH 22/55] Fix typos --- pyproject.toml | 3 ++- sherlock/sherlock.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index db760506c..6c0eb1940 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,8 @@ source = "init" [tool.poetry] name = "sherlock-project" -version = "0" # single source of truth is __version__.py +# single source of truth for version is __init__.py +version = "0" description = "Hunt down social media accounts by username across social networks" license = "MIT" authors = [ diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 7d894246c..88fab987a 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -534,7 +534,7 @@ def main(): "-o", dest="output", help="If using single username, the output of the result will be saved to this file.", - ), + ) parser.add_argument( "--csv", action="store_true", From ba2eef7b0e650335841c2140ae794354a8d65198 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 18 May 2024 02:41:25 -0400 Subject: [PATCH 23/55] Revert "Remove tor" This reverts commit 606743b99d7d8b42b40eefdfea4d2d233d305bb9. --- sherlock/sherlock.py | 56 +++++++++++++++++++++++++++++++++++++++++--- tests/base.py | 4 ++++ 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 88fab987a..9846cb5a5 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -31,6 +31,7 @@ ) from requests_futures.sessions import FuturesSession +from torrequest import TorRequest from sherlock.result import QueryStatus from sherlock.result import QueryResult from sherlock.notify import QueryNotifyPrint @@ -166,6 +167,8 @@ def sherlock( username, site_data, query_notify, + tor=False, + unique_tor=False, proxy=None, timeout=60, ): @@ -180,6 +183,8 @@ def sherlock( query_notify -- Object with base type of QueryNotify(). This will be used to notify the caller about query results. + tor -- Boolean indicating whether to use a tor circuit for the requests. + unique_tor -- Boolean indicating whether to use a new tor circuit for each request. proxy -- String indicating the proxy URL timeout -- Time in seconds to wait before timing out request. Default is 60 seconds. @@ -200,10 +205,20 @@ def sherlock( # Notify caller that we are starting the query. query_notify.start(username) + # Create session based on request methodology + if tor or unique_tor: + # Requests using Tor obfuscation + try: + underlying_request = TorRequest() + except OSError: + print("Tor not found in system path. Unable to continue.\n") + sys.exit(query_notify.finish()) - # Normal requests - underlying_session = requests.session() - underlying_request = requests.Request() + underlying_session = underlying_request.session + else: + # Normal requests + underlying_session = requests.session() + underlying_request = requests.Request() # Limit number of workers to 20. # This is probably vastly overkill. @@ -327,10 +342,15 @@ def sherlock( # Store future in data for access later net_info["request_future"] = future + # Reset identify for tor (if needed) + if unique_tor: + underlying_request.reset_identity() + # Add this site's results into final dictionary with all the other results. results_total[social_network] = results_site # Open the file containing account links + # Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses for social_network, net_info in site_data.items(): # Retrieve results again results_site = results_total.get(social_network) @@ -535,6 +555,22 @@ def main(): dest="output", help="If using single username, the output of the result will be saved to this file.", ) + parser.add_argument( + "--tor", + "-t", + action="store_true", + dest="tor", + default=False, + help="Make requests over Tor; increases runtime; requires Tor to be installed and in system path.", + ) + parser.add_argument( + "--unique-tor", + "-u", + action="store_true", + dest="unique_tor", + default=False, + help="Make requests over Tor with new Tor circuit after each request; increases runtime; requires Tor to be installed and in system path.", + ) parser.add_argument( "--csv", action="store_true", @@ -658,10 +694,22 @@ def main(): except Exception as error: print(f"A problem occurred while checking for an update: {error}") + # Argument check + # TODO regex check on args.proxy + if args.tor and (args.proxy is not None): + raise Exception("Tor and Proxy cannot be set at the same time.") + # Make prompts if args.proxy is not None: print("Using the proxy: " + args.proxy) + if args.tor or args.unique_tor: + print("Using Tor to make requests") + + print( + "Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors." + ) + if args.no_color: # Disable color output. init(strip=True, convert=False) @@ -740,6 +788,8 @@ def main(): username, site_data, query_notify, + tor=args.tor, + unique_tor=args.unique_tor, proxy=args.proxy, timeout=args.timeout, ) diff --git a/tests/base.py b/tests/base.py index bda809948..2146b88b2 100644 --- a/tests/base.py +++ b/tests/base.py @@ -51,6 +51,8 @@ def setUp(self): # Create notify object for query results. self.query_notify = QueryNotify() + self.tor = False + self.unique_tor = False self.timeout = None self.skip_error_sites = True @@ -113,6 +115,8 @@ def username_check(self, username_list, site_list, exist_check=True): results = sherlock.sherlock(username, site_data, self.query_notify, + tor=self.tor, + unique_tor=self.unique_tor, timeout=self.timeout ) for site, result in results.items(): From 377e0766a251d55aa2f33a70729146e3eb69daee Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 18 May 2024 14:29:33 -0400 Subject: [PATCH 24/55] Update pyproject.toml Co-authored-by: Matheus Felipe --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6c0eb1940..8bb9f3d2c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ version = "0" description = "Hunt down social media accounts by username across social networks" license = "MIT" authors = [ - "Siddharth Dushantha" + "Siddharth Dushantha " ] maintainers = [ "Paul Pfeister ", From 313d2a9080703366d6cd763c5e25b12c360e4609 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 18 May 2024 17:28:50 -0400 Subject: [PATCH 25/55] Caret depends --- pyproject.toml | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8bb9f3d2c..5439fd959 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,21 +37,20 @@ repository = "https://github.com/sherlock-project/sherlock" [tool.poetry.urls] -"Bug Reporting" = "https://github.com/sherlock-project/sherlock/issues" -"Contributors" = "https://github.com/sherlock-project/sherlock/graphs/contributors" +"Bug Tracker" = "https://github.com/sherlock-project/sherlock/issues" [tool.poetry.dependencies] python = "^3.8" certifi = ">=2019.6.16" -colorama = ">=0.4.1" -PySocks = ">=1.7.0" -requests = ">=2.22.0" -requests-futures = ">=1.0.0" -stem = ">=1.8.0" -torrequest = ">=0.1.0" -pandas = ">=1.0.0" -openpyxl = ">=3.0.10" -exrex = ">=0.11.0" +colorama = "^0.4.1" +PySocks = "^1.7.0" +requests = "^2.22.0" +requests-futures = "^1.0.0" +stem = "^1.8.0" +torrequest = "^0.1.0" +pandas = "^1.0.0" +openpyxl = "^3.0.10" +exrex = "^0.11.0" [tool.poetry.scripts] sherlock = 'sherlock.sherlock:main' From 1b0e50854f6a51bd50a5fb94b425a29dd40177df Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 18 May 2024 17:41:01 -0400 Subject: [PATCH 26/55] Allow pandas upgrade --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5439fd959..c91b92261 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,8 @@ requests = "^2.22.0" requests-futures = "^1.0.0" stem = "^1.8.0" torrequest = "^0.1.0" -pandas = "^1.0.0" +# pandas can likely be bumped up to ^2.0.0 after fc39 EOL +pandas = ">=1.0.0,<3.0.0" openpyxl = "^3.0.10" exrex = "^0.11.0" From 07274a9a2ce0067a4d6a78b014c9e907a1fe9842 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 18 May 2024 18:06:16 -0400 Subject: [PATCH 27/55] Add note about version number --- docs/install.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/install.md b/docs/install.md index 0a5069d92..52c70086e 100644 --- a/docs/install.md +++ b/docs/install.md @@ -41,6 +41,8 @@ pipx install sherlock-project Building an editable (or live) package links the entry point to your current directory, rather than to the standard install location. This is often useful when working with the code base, as changes are reflected immediately without reinstallation. +Note that the version number will be 0.0.0 for pipx local builds unless manually changed in the pyproject file (it will prompt the user for an update). + ```bash # Assumes repository cloned, and pwd is repository root pipx install -e . From 04ce7aa0bb841fe2a6675cf3766133d1d30559a0 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 20 May 2024 00:51:28 -0400 Subject: [PATCH 28/55] Change remote version URI --- sherlock/sherlock.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 9846cb5a5..e029a59da 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -679,10 +679,10 @@ def main(): # Check for newer version of Sherlock. If it exists, let the user know about it try: r = requests.get( - "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/sherlock.py" + "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/__init__.py" ) - remote_version = str(re.findall('__version__ = "(.*)"', r.text)[0]) + remote_version = str(re.findall('__version__ *= *"(.*)"', r.text)[0]) local_version = __version__ if remote_version != local_version: From b8ca8aade48dd4b6497ef114e41067aac476da65 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Fri, 31 May 2024 14:42:28 -0400 Subject: [PATCH 29/55] Add Sponsor --- .github/FUNDING.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/FUNDING.yml diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 000000000..b21b31ed8 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: [ sdushantha, ppfeister ] \ No newline at end of file From 1de8ad0fc436f973b853d4558e669bf1e8901817 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 18 May 2024 23:09:04 -0400 Subject: [PATCH 30/55] Fix linter flags --- sherlock/sites.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock/sites.py b/sherlock/sites.py index 2b360afad..112b6d023 100644 --- a/sherlock/sites.py +++ b/sherlock/sites.py @@ -174,7 +174,7 @@ def __init__(self, data_file_path=None): raise ValueError( f"Problem parsing json contents at '{data_file_path}': Missing attribute {error}." ) - except TypeError as error: + except TypeError: print(f"Encountered TypeError parsing json contents for target '{site_name}' at {data_file_path}\nSkipping target.\n") return From b34461137142122b7601918c6e235a170b7f049f Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 18 May 2024 23:09:17 -0400 Subject: [PATCH 31/55] Add basic tox --- tox.ini | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 tox.ini diff --git a/tox.ini b/tox.ini new file mode 100644 index 000000000..b7fe32355 --- /dev/null +++ b/tox.ini @@ -0,0 +1,25 @@ +[tox] +requires = + tox >= 4 +envlist = + lint + py313 + py312 + py311 + py310 + py39 + py38 + +[testenv] +whitelist_externals = poetry +commands = + poetry plugin add poetry-version-plugin + poetry install --no-root --with dev + +[testenv:lint] +description = Lint with Ruff +deps = + ruff +commands = + ruff check + From 85ec59e25525566110423feee9b9c2dc38c852ca Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 18 May 2024 23:14:25 -0400 Subject: [PATCH 32/55] whitelist -> allowlist Seriously, tox? --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index b7fe32355..bd9e5c4ce 100644 --- a/tox.ini +++ b/tox.ini @@ -11,7 +11,7 @@ envlist = py38 [testenv] -whitelist_externals = poetry +allowlist_externals = poetry commands = poetry plugin add poetry-version-plugin poetry install --no-root --with dev From a785a5931f96e37bb540c6898e3a8c4ae86788ad Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 20 May 2024 04:44:52 -0400 Subject: [PATCH 33/55] Adopt tox and pytest --- pyproject.toml | 3 + sherlock/sherlock.py | 26 ++-- tests/__init__.py | 4 - tests/all.py | 213 ----------------------------- tests/base.py | 224 ------------------------------- tests/conftest.py | 14 ++ tests/few_test_basic.py | 7 + tests/sherlock_interactives.py | 23 ++++ tests/test_manifest.py | 28 ++++ tests/test_multiple_usernames.py | 28 ---- tests/test_probes.py | 102 ++++++++++++++ tests/test_ux.py | 38 ++++++ tests/test_version.py | 9 ++ tox.ini | 19 ++- 14 files changed, 254 insertions(+), 484 deletions(-) delete mode 100644 tests/__init__.py delete mode 100644 tests/all.py delete mode 100644 tests/base.py create mode 100644 tests/conftest.py create mode 100644 tests/few_test_basic.py create mode 100644 tests/sherlock_interactives.py create mode 100644 tests/test_manifest.py delete mode 100644 tests/test_multiple_usernames.py create mode 100644 tests/test_probes.py create mode 100644 tests/test_ux.py create mode 100644 tests/test_version.py diff --git a/pyproject.toml b/pyproject.toml index c91b92261..279ab3089 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,5 +53,8 @@ pandas = ">=1.0.0,<3.0.0" openpyxl = "^3.0.10" exrex = "^0.11.0" +[tool.poetry.group.dev.dependencies] +jsonschema = "^4.0.0" + [tool.poetry.scripts] sherlock = 'sherlock.sherlock:main' diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index e029a59da..5b90e464a 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -21,23 +21,25 @@ # Removing __version__ here will trigger update message for users # Do not remove until ready to trigger that message +# When removed, also remove all the noqa: E402 comments for linting __version__ = "0.14.4" del __version__ -from .__init__ import ( +from .__init__ import ( # noqa: E402 __shortname__, __longname__, __version__ ) -from requests_futures.sessions import FuturesSession -from torrequest import TorRequest -from sherlock.result import QueryStatus -from sherlock.result import QueryResult -from sherlock.notify import QueryNotifyPrint -from sherlock.sites import SitesInformation -from colorama import init -from argparse import ArgumentTypeError +from requests_futures.sessions import FuturesSession # noqa: E402 +from torrequest import TorRequest # noqa: E402 +from sherlock.result import QueryStatus # noqa: E402 +from sherlock.result import QueryResult # noqa: E402 +from sherlock.notify import QueryNotify # noqa: E402 +from sherlock.notify import QueryNotifyPrint # noqa: E402 +from sherlock.sites import SitesInformation # noqa: E402 +from colorama import init # noqa: E402 +from argparse import ArgumentTypeError # noqa: E402 class SherlockFuturesSession(FuturesSession): @@ -166,9 +168,9 @@ def multiple_usernames(username): def sherlock( username, site_data, - query_notify, - tor=False, - unique_tor=False, + query_notify: QueryNotify, + tor: bool = False, + unique_tor: bool = False, proxy=None, timeout=60, ): diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index 944e27ce6..000000000 --- a/tests/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -"""Sherlock Tests - -This package contains various submodules used to run tests. -""" diff --git a/tests/all.py b/tests/all.py deleted file mode 100644 index 926946fd5..000000000 --- a/tests/all.py +++ /dev/null @@ -1,213 +0,0 @@ -"""Sherlock Tests - -This module contains various tests. -""" -from tests.base import SherlockBaseTest -import exrex - - -class SherlockDetectTests(SherlockBaseTest): - def test_detect_true_via_message(self): - """Test Username Does Exist (Via Message). - - This test ensures that the "message" detection mechanism of - ensuring that a Username does exist works properly. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - site = "AllMyLinks" - site_data = self.site_data_all[site] - - # Ensure that the site's detection method has not changed. - self.assertEqual("message", site_data["errorType"]) - - self.username_check([site_data["username_claimed"]], [site], exist_check=True) - - return - - def test_detect_false_via_message(self): - """Test Username Does Not Exist (Via Message). - - This test ensures that the "message" detection mechanism of - ensuring that a Username does *not* exist works properly. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - site = "AllMyLinks" - site_data = self.site_data_all[site] - - # Ensure that the site's detection method has not changed. - self.assertEqual("message", site_data["errorType"]) - - # Generate a valid username based on the regex for a username that the - # site supports that is *most likely* not taken. The regex is slightly - # modified version of site_data["regexCheck"] as we want a username - # that has the maximum length that is supported by the site. This way, - # we wont generate a random username that might actually exist. This - # method is very hacky, but it does the job as having hardcoded - # usernames that dont exists will lead to people with ill intent to - # create an account with that username which will break the tests - valid_username = exrex.getone(r"^[a-z0-9][a-z0-9-]{32}$") - self.username_check([valid_username], [site], exist_check=False) - - return - - def test_detect_true_via_status_code(self): - """Test Username Does Exist (Via Status Code). - - This test ensures that the "status code" detection mechanism of - ensuring that a Username does exist works properly. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - site = "BitBucket" - site_data = self.site_data_all[site] - - # Ensure that the site's detection method has not changed. - self.assertEqual("status_code", site_data["errorType"]) - - self.username_check([site_data["username_claimed"]], [site], exist_check=True) - - return - - def test_detect_false_via_status_code(self): - """Test Username Does Not Exist (Via Status Code). - - This test ensures that the "status code" detection mechanism of - ensuring that a Username does *not* exist works properly. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - site = "BitBucket" - site_data = self.site_data_all[site] - - # Ensure that the site's detection method has not changed. - self.assertEqual("status_code", site_data["errorType"]) - - # Generate a valid username based on the regex for a username that the - # site supports that is *most likely* not taken. The regex is slightly - # modified version of site_data["regexCheck"] as we want a username - # that has the maximum length that is supported by the site. This way, - # we wont generate a random username that might actually exist. This - # method is very hacky, but it does the job as having hardcoded - # usernames that dont exists will lead to people with ill intent to - # create an account with that username which will break the tests - valid_username = exrex.getone(r"^[a-zA-Z0-9-_]{30}") - self.username_check([valid_username], [site], exist_check=False) - - return - - -class SherlockSiteCoverageTests(SherlockBaseTest): - def test_coverage_false_via_status(self): - """Test Username Does Not Exist Site Coverage (Via HTTP Status). - - This test checks all sites with the "HTTP Status" detection mechanism - to ensure that a Username that does not exist is reported that way. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - self.detect_type_check("status_code", exist_check=False) - - return - - def test_coverage_true_via_status(self): - """Test Username Does Exist Site Coverage (Via HTTP Status). - - This test checks all sites with the "HTTP Status" detection mechanism - to ensure that a Username that does exist is reported that way. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - self.detect_type_check("status_code", exist_check=True) - - return - - def test_coverage_false_via_message(self): - """Test Username Does Not Exist Site Coverage (Via Error Message). - - This test checks all sites with the "Error Message" detection mechanism - to ensure that a Username that does not exist is reported that way. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - self.detect_type_check("message", exist_check=False) - - return - - def test_coverage_true_via_message(self): - """Test Username Does Exist Site Coverage (Via Error Message). - - This test checks all sites with the "Error Message" detection mechanism - to ensure that a Username that does exist is reported that way. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if detection mechanism did not work as expected. - """ - - self.detect_type_check("message", exist_check=True) - - return - - def test_coverage_total(self): - """Test Site Coverage Is Total. - - This test checks that all sites have test data available. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Will trigger an assert if we do not have total coverage. - """ - - self.coverage_total_check() - - return diff --git a/tests/base.py b/tests/base.py deleted file mode 100644 index 2146b88b2..000000000 --- a/tests/base.py +++ /dev/null @@ -1,224 +0,0 @@ -"""Sherlock Base Tests - -This module contains various utilities for running tests. -""" -import os -import os.path -import unittest -from sherlock import sherlock -from sherlock.result import QueryStatus -from sherlock.notify import QueryNotify -from sherlock.sites import SitesInformation -import warnings - - -class SherlockBaseTest(unittest.TestCase): - def setUp(self): - """Sherlock Base Test Setup. - - Does common setup tasks for base Sherlock tests. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - """ - - # This ignores the ResourceWarning from an unclosed SSLSocket. - # TODO: Figure out how to fix the code so this is not needed. - warnings.simplefilter("ignore", ResourceWarning) - - # Create object with all information about sites we are aware of. - sites = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json")) - - # Create original dictionary from SitesInformation() object. - # Eventually, the rest of the code will be updated to use the new object - # directly, but this will glue the two pieces together. - site_data_all = {} - for site in sites: - site_data_all[site.name] = site.information - self.site_data_all = site_data_all - - # Load excluded sites list, if any - excluded_sites_path = os.path.join(os.path.dirname(os.path.realpath(sherlock.__file__)), "tests/.excluded_sites") - try: - with open(excluded_sites_path, "r", encoding="utf-8") as excluded_sites_file: - self.excluded_sites = excluded_sites_file.read().splitlines() - except FileNotFoundError: - self.excluded_sites = [] - - # Create notify object for query results. - self.query_notify = QueryNotify() - - self.tor = False - self.unique_tor = False - self.timeout = None - self.skip_error_sites = True - - return - - def site_data_filter(self, site_list): - """Filter Site Data. - - Keyword Arguments: - self -- This object. - site_list -- List of strings corresponding to sites which - should be filtered. - - Return Value: - Dictionary containing sub-set of site data specified by "site_list". - """ - - # Create new dictionary that has filtered site data based on input. - # Note that any site specified which is not understood will generate - # an error. - site_data = {} - for site in site_list: - with self.subTest(f"Checking test vector Site '{site}' " - f"exists in total site data." - ): - site_data[site] = self.site_data_all[site] - - return site_data - - def username_check(self, username_list, site_list, exist_check=True): - """Username Exist Check. - - Keyword Arguments: - self -- This object. - username_list -- List of strings corresponding to usernames - which should exist on *all* of the sites. - site_list -- List of strings corresponding to sites which - should be filtered. - exist_check -- Boolean which indicates if this should be - a check for Username existence, - or non-existence. - - Return Value: - Nothing. - Will trigger an assert if Username does not have the expected - existence state. - """ - - # Filter all site data down to just what is needed for this test. - site_data = self.site_data_filter(site_list) - - if exist_check: - check_type_text = "claimed" - exist_result_desired = QueryStatus.CLAIMED - else: - check_type_text = "available" - exist_result_desired = QueryStatus.AVAILABLE - - for username in username_list: - results = sherlock.sherlock(username, - site_data, - self.query_notify, - tor=self.tor, - unique_tor=self.unique_tor, - timeout=self.timeout - ) - for site, result in results.items(): - with self.subTest(f"Checking Username '{username}' " - f"{check_type_text} on Site '{site}'" - ): - if ( - (self.skip_error_sites == True) and - (result["status"].status == QueryStatus.UNKNOWN) - ): - #Some error connecting to site. - self.skipTest(f"Skipping Username '{username}' " - f"{check_type_text} on Site '{site}': " - f"Site returned error status." - ) - - self.assertEqual(exist_result_desired, - result["status"].status) - - return - - def detect_type_check(self, detect_type, exist_check=True): - """Username Exist Check. - - Keyword Arguments: - self -- This object. - detect_type -- String corresponding to detection algorithm - which is desired to be tested. - Note that only sites which have documented - usernames which exist and do not exist - will be tested. - exist_check -- Boolean which indicates if this should be - a check for Username existence, - or non-existence. - - Return Value: - Nothing. - Runs tests on all sites using the indicated detection algorithm - and which also has test vectors specified. - Will trigger an assert if Username does not have the expected - existence state. - """ - - # Dictionary of sites that should be tested for having a username. - # This will allow us to test sites with a common username in parallel. - sites_by_username = {} - - for site, site_data in self.site_data_all.items(): - if ( - (site in self.excluded_sites) or - (site_data["errorType"] != detect_type) or - (site_data.get("username_claimed") is None) or - (site_data.get("username_unclaimed") is None) - ): - # This is either not a site we are interested in, or the - # site does not contain the required information to do - # the tests. - pass - else: - # We should run a test on this site. - - # Figure out which type of user - if exist_check: - username = site_data.get("username_claimed") - else: - username = site_data.get("username_unclaimed") - - # Add this site to the list of sites corresponding to this - # username. - if username in sites_by_username: - sites_by_username[username].append(site) - else: - sites_by_username[username] = [site] - - # Check on the username availability against all of the sites. - for username, site_list in sites_by_username.items(): - self.username_check([username], - site_list, - exist_check=exist_check - ) - - return - - def coverage_total_check(self): - """Total Coverage Check. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - Counts up all Sites with full test data available. - Will trigger an assert if any Site does not have test coverage. - """ - - site_no_tests_list = [] - - for site, site_data in self.site_data_all.items(): - if site_data.get("username_claimed") is None: - # Test information not available on this site. - site_no_tests_list.append(site) - - self.assertEqual("", ", ".join(site_no_tests_list)) - - return diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 000000000..ecf949742 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,14 @@ +import os +import pytest +from sherlock.sites import SitesInformation + +@pytest.fixture() +def sites_obj(): + sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json")) + yield sites_obj + +@pytest.fixture(scope="session") +def sites_info(): + sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json")) + sites_iterable = {site.name: site.information for site in sites_obj} + yield sites_iterable diff --git a/tests/few_test_basic.py b/tests/few_test_basic.py new file mode 100644 index 000000000..7ec81ac12 --- /dev/null +++ b/tests/few_test_basic.py @@ -0,0 +1,7 @@ +import sherlock + +#from sherlock.sites import SitesInformation +#local_manifest = data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json") + +def test_username_via_message(): + sherlock.__main__("--version") diff --git a/tests/sherlock_interactives.py b/tests/sherlock_interactives.py new file mode 100644 index 000000000..6646c7b96 --- /dev/null +++ b/tests/sherlock_interactives.py @@ -0,0 +1,23 @@ +import os +import re +import subprocess + +class Interactives: + def run_cli(args: str = "") -> str: + command = [f"sherlock {args}"] + proc_out = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT) + return proc_out.decode() + + def walk_sherlock_for_files_with(pattern: str) -> list[str]: + pattern: re.Pattern = re.compile(pattern) + matching_files: list[str] = [] + for root, dirs, files in os.walk("sherlock"): + for file in files: + file_path = os.path.join(root,file) + if "__pycache__" in file_path: + continue + with open(file_path, 'r', errors='ignore') as f: + if pattern.search(f.read()): + matching_files.append(file_path) + return matching_files + \ No newline at end of file diff --git a/tests/test_manifest.py b/tests/test_manifest.py new file mode 100644 index 000000000..689a0d471 --- /dev/null +++ b/tests/test_manifest.py @@ -0,0 +1,28 @@ +import os +import json +import pytest +from jsonschema import validate + +def validate_json(jsonfile: str, schemafile: str) -> bool: + with open(jsonfile, 'r') as f: + jsondat = json.load(f) + with open(schemafile, 'r') as f: + schemadat = json.load(f) + validate(instance=jsondat, schema=schemadat) + return True + +def test_validate_manifest_against_schema(): + json_relative: str = '../sherlock/resources/data.json' + schema_relative: str = '../sherlock/resources/data.schema.json' + + json_path: str = os.path.join(os.path.dirname(__file__), json_relative) + schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative) + validate_json(jsonfile=json_path, schemafile=schema_path) + +# Ensure that the expected values are beind returned by the site list +@pytest.mark.parametrize("target_name,target_expected_err_type", [ + ('GitHub', 'status_code'), + ('GitLab', 'message'), +]) +def test_site_list_iterability (sites_info, target_name, target_expected_err_type): + assert sites_info[target_name]['errorType'] == target_expected_err_type diff --git a/tests/test_multiple_usernames.py b/tests/test_multiple_usernames.py deleted file mode 100644 index 3e8a307ee..000000000 --- a/tests/test_multiple_usernames.py +++ /dev/null @@ -1,28 +0,0 @@ -import unittest -import sys -sys.path.append('../') -from sherlock import sherlock as sh - -checksymbols = [] -checksymbols = ["_", "-", "."] - -"""Test for multiple usernames. - - This test ensures that the function multiple_usernames works properly. More specific, - different scenarios are tested and only usernames that contain this specific sequence: {?} - should return positive. - - Keyword Arguments: - self -- This object. - - Return Value: - Nothing. - """ -class TestMultipleUsernames(unittest.TestCase): - def test_area(self): - test_usernames = ["test{?}test" , "test{?feo" , "test"] - for name in test_usernames: - if(sh.check_for_parameter(name)): - self.assertAlmostEqual(sh.multiple_usernames(name), ["test_test" , "test-test" , "test.test"]) - else: - self.assertAlmostEqual(name, name) \ No newline at end of file diff --git a/tests/test_probes.py b/tests/test_probes.py new file mode 100644 index 000000000..4c7527a2b --- /dev/null +++ b/tests/test_probes.py @@ -0,0 +1,102 @@ +import pytest +import random +import string +import re +from sherlock.sherlock import sherlock +from sherlock.notify import QueryNotify +from sherlock.result import QueryStatus +#from sherlock_interactives import Interactives + + +def simple_query(sites_info: dict, site: str, username: str) -> QueryStatus: + query_notify = QueryNotify() + site_data: dict = {} + site_data[site] = sites_info[site] + return sherlock( + username=username, + site_data=site_data, + query_notify=query_notify, + )[site]['status'].status + + +# Known positives should only use sites trusted to be reliable and unchanging +@pytest.mark.parametrize('site,username',[ + ('GitLab', 'ppfeister'), + ('AllMyLinks', 'blue'), +]) +def test_known_positives_via_message(sites_info, site, username): + assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED + + +# Known positives should only use sites trusted to be reliable and unchanging +@pytest.mark.parametrize('site,username',[ + ('GitHub', 'ppfeister'), + ('GitHub', 'sherlock-project'), + ('Docker Hub', 'ppfeister'), + ('Docker Hub', 'sherlock'), +]) +def test_known_positives_via_status_code(sites_info, site, username): + assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED + + +# Known positives should only use sites trusted to be reliable and unchanging +@pytest.mark.parametrize('site,username',[ + ('BodyBuilding', 'blue'), + ('labpentestit', 'CSV'), +]) +def test_known_positives_via_response_url(sites_info, site, username): + assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED + + +# Randomly generate usernames of high length and test for positive availability +# Randomly generated usernames should be simple alnum for simplicity and high +# compatibility. Several attempts may be made ~just in case~ a real username is +# generated. +@pytest.mark.parametrize('site,random_len',[ + ('GitLab', 255), + ('Codecademy', 30) +]) +def test_likely_negatives_via_message(sites_info, site, random_len): + num_attempts: int = 3 + attempted_usernames: list[str] = [] + status: QueryStatus = QueryStatus.CLAIMED + for i in range(num_attempts): + acceptable_types = string.ascii_letters + string.digits + random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len)) + attempted_usernames.append(random_handle) + status = simple_query(sites_info=sites_info, site=site, username=random_handle) + if status is QueryStatus.AVAILABLE: + break + assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." + + +# Randomly generate usernames of high length and test for positive availability +# Randomly generated usernames should be simple alnum for simplicity and high +# compatibility. Several attempts may be made ~just in case~ a real username is +# generated. +@pytest.mark.parametrize('site,random_len',[ + ('GitHub', 39), + ('Docker Hub', 30) +]) +def test_likely_negatives_via_status_code(sites_info, site, random_len): + num_attempts: int = 3 + attempted_usernames: list[str] = [] + status: QueryStatus = QueryStatus.CLAIMED + for i in range(num_attempts): + acceptable_types = string.ascii_letters + string.digits + random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len)) + attempted_usernames.append(random_handle) + status = simple_query(sites_info=sites_info, site=site, username=random_handle) + if status is QueryStatus.AVAILABLE: + break + assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." + + +def test_username_illegal_regex(sites_info): + site: str = 'BitBucket' + invalid_handle: str = '*#$Y&*JRE' + pattern = re.compile(sites_info[site]['regexCheck']) + # Ensure that the username actually fails regex before testing sherlock + assert pattern.match(invalid_handle) is None + assert simple_query(sites_info=sites_info, site=site, username=invalid_handle) is QueryStatus.ILLEGAL + diff --git a/tests/test_ux.py b/tests/test_ux.py new file mode 100644 index 000000000..08f6948f7 --- /dev/null +++ b/tests/test_ux.py @@ -0,0 +1,38 @@ +import pytest +from sherlock import sherlock + +def test_remove_nsfw(sites_obj): + nsfw_target: str = 'Pornhub' + assert nsfw_target in {site.name: site.information for site in sites_obj} + sites_obj.remove_nsfw_sites() + assert nsfw_target not in {site.name: site.information for site in sites_obj} + + +# Parametrized sites should *not* include Motherless, which is acting as the control +@pytest.mark.parametrize('nsfwsites', [ + ['Pornhub'], + ['Pornhub', 'Xvideos'], +]) +def test_nsfw_explicit_selection(sites_obj, nsfwsites): + for site in nsfwsites: + assert site in {site.name: site.information for site in sites_obj} + sites_obj.remove_nsfw_sites(do_not_remove=nsfwsites) + for site in nsfwsites: + assert site in {site.name: site.information for site in sites_obj} + assert 'Motherless' not in {site.name: site.information for site in sites_obj} + +def test_wildcard_username_expansion(): + assert sherlock.check_for_parameter('test{?}test') is True + assert sherlock.check_for_parameter('test{.}test') is False + assert sherlock.check_for_parameter('test{}test') is False + assert sherlock.multiple_usernames('test{?}test') == ["test_test" , "test-test" , "test.test"] + + + +#def test_area(self): +# test_usernames = ["test{?}test" , "test{?feo" , "test"] +# for name in test_usernames: +# if(sh.check_for_parameter(name)): +# self.assertAlmostEqual(sh.multiple_usernames(name), ["test_test" , "test-test" , "test.test"]) +# else: +# self.assertAlmostEqual(name, name) diff --git a/tests/test_version.py b/tests/test_version.py new file mode 100644 index 000000000..872f5ea1d --- /dev/null +++ b/tests/test_version.py @@ -0,0 +1,9 @@ +from sherlock_interactives import Interactives +import sherlock + +def test_versioning() -> None: + # Ensure __version__ matches version presented to the user + assert sherlock.__version__ in Interactives.run_cli("--version") + # Ensure __init__ is single source of truth for __version__ in package + # Temporarily allows sherlock.py so as to not trigger early upgrades + assert Interactives.walk_sherlock_for_files_with(r'__version__ *= *') == [ "sherlock/__init__.py", "sherlock/sherlock.py" ] \ No newline at end of file diff --git a/tox.ini b/tox.ini index bd9e5c4ce..d0a5d84d2 100644 --- a/tox.ini +++ b/tox.ini @@ -11,10 +11,15 @@ envlist = py38 [testenv] -allowlist_externals = poetry +description = Attempt to build and install the package +deps = + coverage + jsonschema + pytest +allowlist_externals = coverage commands = - poetry plugin add poetry-version-plugin - poetry install --no-root --with dev + coverage run --source=sherlock --module pytest -v + coverage report --show-missing [testenv:lint] description = Lint with Ruff @@ -23,3 +28,11 @@ deps = commands = ruff check +[gh] +python = + 3.13 = py31 + 3.12 = py312 + 3.11 = py311 + 3.10 = py310 + 3.9 = py39 + 3.8 = py38 From 9f75d5ed1420cde8e769c1ae24b39ebc4d3a0900 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 20 May 2024 04:47:10 -0400 Subject: [PATCH 34/55] Fix newline --- tests/sherlock_interactives.py | 1 - tests/test_version.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/sherlock_interactives.py b/tests/sherlock_interactives.py index 6646c7b96..548cade65 100644 --- a/tests/sherlock_interactives.py +++ b/tests/sherlock_interactives.py @@ -20,4 +20,3 @@ def walk_sherlock_for_files_with(pattern: str) -> list[str]: if pattern.search(f.read()): matching_files.append(file_path) return matching_files - \ No newline at end of file diff --git a/tests/test_version.py b/tests/test_version.py index 872f5ea1d..4088c0b0b 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -6,4 +6,4 @@ def test_versioning() -> None: assert sherlock.__version__ in Interactives.run_cli("--version") # Ensure __init__ is single source of truth for __version__ in package # Temporarily allows sherlock.py so as to not trigger early upgrades - assert Interactives.walk_sherlock_for_files_with(r'__version__ *= *') == [ "sherlock/__init__.py", "sherlock/sherlock.py" ] \ No newline at end of file + assert Interactives.walk_sherlock_for_files_with(r'__version__ *= *') == [ "sherlock/__init__.py", "sherlock/sherlock.py" ] From 08a12912c72f5a5467c0c000449e3881e8a01eb2 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 20 May 2024 15:10:00 -0400 Subject: [PATCH 35/55] Remove exrex depend --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 279ab3089..fafa9f85f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,6 @@ torrequest = "^0.1.0" # pandas can likely be bumped up to ^2.0.0 after fc39 EOL pandas = ">=1.0.0,<3.0.0" openpyxl = "^3.0.10" -exrex = "^0.11.0" [tool.poetry.group.dev.dependencies] jsonschema = "^4.0.0" From 2b24cca3405c3e6e0ff716012b3678dc6734ba1a Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 20 May 2024 16:30:08 -0400 Subject: [PATCH 36/55] Add tests --- .github/CODEOWNERS | 4 ++++ tests/conftest.py | 9 +++++++++ tests/sherlock_interactives.py | 13 +++++++++++-- tests/test_manifest.py | 28 +++++++++++++++++++--------- tests/test_ux.py | 16 ++++++++++++++++ 5 files changed, 59 insertions(+), 11 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index ae6c8b316..71b3e4deb 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -6,3 +6,7 @@ # Changes made to these items without code owner approval may negatively # impact packaging pipelines. Code owners may need time to verify or adapt. /pyproject.toml @ppfeister @sdushantha + +### REGRESSION +/tox.ini @ppfeister +/tests/ @ppfeister diff --git a/tests/conftest.py b/tests/conftest.py index ecf949742..a13388bb2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,6 @@ import os +import json +import urllib import pytest from sherlock.sites import SitesInformation @@ -12,3 +14,10 @@ def sites_info(): sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json")) sites_iterable = {site.name: site.information for site in sites_obj} yield sites_iterable + +@pytest.fixture(scope="session") +def remote_schema(): + schema_url: str = 'https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock/resources/data.schema.json' + with urllib.request.urlopen(schema_url) as remoteschema: + schemadat = json.load(remoteschema) + yield schemadat diff --git a/tests/sherlock_interactives.py b/tests/sherlock_interactives.py index 548cade65..0e7124ce6 100644 --- a/tests/sherlock_interactives.py +++ b/tests/sherlock_interactives.py @@ -4,11 +4,17 @@ class Interactives: def run_cli(args: str = "") -> str: + """Pass arguments to Sherlock as a normal user on the command line""" command = [f"sherlock {args}"] - proc_out = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT) - return proc_out.decode() + proc_out: str = "" + try: + proc_out = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT) + return proc_out.decode() + except subprocess.CalledProcessError as e: + raise InteractivesSubprocessError(e.output.decode()) def walk_sherlock_for_files_with(pattern: str) -> list[str]: + """Check all files within the Sherlock package for matching patterns""" pattern: re.Pattern = re.compile(pattern) matching_files: list[str] = [] for root, dirs, files in os.walk("sherlock"): @@ -20,3 +26,6 @@ def walk_sherlock_for_files_with(pattern: str) -> list[str]: if pattern.search(f.read()): matching_files.append(file_path) return matching_files + +class InteractivesSubprocessError(Exception): + pass diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 689a0d471..0d72c6383 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -3,21 +3,31 @@ import pytest from jsonschema import validate -def validate_json(jsonfile: str, schemafile: str) -> bool: - with open(jsonfile, 'r') as f: +def test_validate_manifest_against_local_schema(): + """Ensures that the manifest matches the local schema, for situations where the schema is being changed.""" + json_relative: str = '../sherlock/resources/data.json' + schema_relative: str = '../sherlock/resources/data.schema.json' + + json_path: str = os.path.join(os.path.dirname(__file__), json_relative) + schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative) + + with open(json_path, 'r') as f: jsondat = json.load(f) - with open(schemafile, 'r') as f: + with open(schema_path, 'r') as f: schemadat = json.load(f) + validate(instance=jsondat, schema=schemadat) - return True -def test_validate_manifest_against_schema(): + +def test_validate_manifest_against_remote_schema(remote_schema): + """Ensures that the manifest matches the remote schema, so as to not unexpectedly break clients.""" json_relative: str = '../sherlock/resources/data.json' - schema_relative: str = '../sherlock/resources/data.schema.json' - json_path: str = os.path.join(os.path.dirname(__file__), json_relative) - schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative) - validate_json(jsonfile=json_path, schemafile=schema_path) + + with open(json_path, 'r') as f: + jsondat = json.load(f) + + validate(instance=jsondat, schema=remote_schema) # Ensure that the expected values are beind returned by the site list @pytest.mark.parametrize("target_name,target_expected_err_type", [ diff --git a/tests/test_ux.py b/tests/test_ux.py index 08f6948f7..34230d9c2 100644 --- a/tests/test_ux.py +++ b/tests/test_ux.py @@ -1,5 +1,8 @@ import pytest +import subprocess from sherlock import sherlock +from sherlock_interactives import Interactives +from sherlock_interactives import InteractivesSubprocessError def test_remove_nsfw(sites_obj): nsfw_target: str = 'Pornhub' @@ -25,9 +28,22 @@ def test_wildcard_username_expansion(): assert sherlock.check_for_parameter('test{?}test') is True assert sherlock.check_for_parameter('test{.}test') is False assert sherlock.check_for_parameter('test{}test') is False + assert sherlock.check_for_parameter('testtest') is False + assert sherlock.check_for_parameter('test{?test') is False + assert sherlock.check_for_parameter('test?}test') is False assert sherlock.multiple_usernames('test{?}test') == ["test_test" , "test-test" , "test.test"] +@pytest.mark.parametrize('cliargs', [ + '', + '--site urghrtuight --egiotr', + '--', +]) +def test_no_usernames_provided(cliargs): + with pytest.raises(InteractivesSubprocessError, match=r"error: the following arguments are required: USERNAMES"): + Interactives.run_cli(cliargs) + + #def test_area(self): # test_usernames = ["test{?}test" , "test{?feo" , "test"] From 030860c0a109cb991c22519dd345624b4b0f4d93 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 20 May 2024 16:33:05 -0400 Subject: [PATCH 37/55] Cleanup --- tests/test_ux.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/test_ux.py b/tests/test_ux.py index 34230d9c2..b78148242 100644 --- a/tests/test_ux.py +++ b/tests/test_ux.py @@ -42,13 +42,3 @@ def test_wildcard_username_expansion(): def test_no_usernames_provided(cliargs): with pytest.raises(InteractivesSubprocessError, match=r"error: the following arguments are required: USERNAMES"): Interactives.run_cli(cliargs) - - - -#def test_area(self): -# test_usernames = ["test{?}test" , "test{?feo" , "test"] -# for name in test_usernames: -# if(sh.check_for_parameter(name)): -# self.assertAlmostEqual(sh.multiple_usernames(name), ["test_test" , "test-test" , "test.test"]) -# else: -# self.assertAlmostEqual(name, name) From 790305bc077ccf358d2df8658d2a5604acbde251 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 20 May 2024 16:56:38 -0400 Subject: [PATCH 38/55] Fix ci --- .github/workflows/pull_request.yml | 68 +++++++++++++----------------- tox.ini | 14 +++--- 2 files changed, 37 insertions(+), 45 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index a70ad44d7..b8ba1d45e 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -1,47 +1,39 @@ -name: Pull Request Action +name: Regression Testing on: pull_request: branches: [ master ] + push: + branches: [ master, feature/tox ] jobs: - getchange: - runs-on: ubuntu-latest - outputs: - matrix: ${{ steps.changes.outputs.matrix }} - steps: - - id: changes - run: | - URL="https://api.github.com/repos/sherlock-project/sherlock/pulls/${{ github.event.pull_request.number }}/files" - FILES=$(curl -s -X GET -G $URL | jq -r '.[] | .filename') - if echo $FILES | grep -q ".json"; then - echo "::set-output name=matrix::{\"include\":[{\"python\":\"3.x\"}]}" - else - echo "::set-output name=matrix::{\"include\":[{\"python\":\"3.7\"},{\"python\":\"3.8\"}]},{\"python\":\"3.9\"},{\"python\":\"3.10\"}]},{\"python\":\"3.11\"},{\"python\":\"3.12\"}]}" - fi - tests: - needs: [getchange] - runs-on: ubuntu-latest + regression-testing: + runs-on: ${{ matrix.os }} strategy: - matrix: ${{ fromJson(needs.getchange.outputs.matrix) }} + matrix: + os: [ + ubuntu-latest, + windows-latest, + ] + python-version: [ + '3.8', + '3.9', + '3.10', + '3.11', + '3.12', + '3.13', + ] steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python }} - - name: Install Dependencies - run: | - python -m pip install --upgrade pip - pip install ruff flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint With Ruff - run: | - # stop the build if there are Python syntax errors or undefined names - ruff check . --output-format=github --select=E9,F63,F7,F82 - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Sherlock Site Detect Tests - run: | - cd sherlock && python -m unittest tests.all.SherlockDetectTests --verbose + - uses: actions/checkout@v4 + - name: Set up environment ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install tox and related dependencies + run: | + python -m pip install --upgrade pip + pip install tox + pip install tox-gh-actions + - name: Run tox + run: tox diff --git a/tox.ini b/tox.ini index d0a5d84d2..8b518609b 100644 --- a/tox.ini +++ b/tox.ini @@ -28,11 +28,11 @@ deps = commands = ruff check -[gh] +[gh-actions] python = - 3.13 = py31 - 3.12 = py312 - 3.11 = py311 - 3.10 = py310 - 3.9 = py39 - 3.8 = py38 + 3.13: py31 + 3.12: py312 + 3.11: py311 + 3.10: py310 + 3.9: py39 + 3.8: py38 From 249bab36ebcd65fe7ad85c90ee9ac057ecf0ee4a Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 20 May 2024 16:58:06 -0400 Subject: [PATCH 39/55] Drop py313 from ci --- .github/workflows/pull_request.yml | 1 - tox.ini | 1 - 2 files changed, 2 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index b8ba1d45e..029fba302 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -21,7 +21,6 @@ jobs: '3.10', '3.11', '3.12', - '3.13', ] steps: diff --git a/tox.ini b/tox.ini index 8b518609b..58371324f 100644 --- a/tox.ini +++ b/tox.ini @@ -30,7 +30,6 @@ commands = [gh-actions] python = - 3.13: py31 3.12: py312 3.11: py311 3.10: py310 From e32a84ea0526ccca7d21f8496a5d8b828f2fce0c Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 20 May 2024 17:04:04 -0400 Subject: [PATCH 40/55] Fix tox caught errors --- tests/sherlock_interactives.py | 3 ++- tests/test_ux.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/sherlock_interactives.py b/tests/sherlock_interactives.py index 0e7124ce6..a05440b95 100644 --- a/tests/sherlock_interactives.py +++ b/tests/sherlock_interactives.py @@ -13,7 +13,8 @@ def run_cli(args: str = "") -> str: except subprocess.CalledProcessError as e: raise InteractivesSubprocessError(e.output.decode()) - def walk_sherlock_for_files_with(pattern: str) -> list[str]: + # -> list[str] is prefered, but will require deprecation of support for Python 3.8 + def walk_sherlock_for_files_with(pattern: str) -> list: """Check all files within the Sherlock package for matching patterns""" pattern: re.Pattern = re.compile(pattern) matching_files: list[str] = [] diff --git a/tests/test_ux.py b/tests/test_ux.py index b78148242..c14035f58 100644 --- a/tests/test_ux.py +++ b/tests/test_ux.py @@ -1,5 +1,4 @@ import pytest -import subprocess from sherlock import sherlock from sherlock_interactives import Interactives from sherlock_interactives import InteractivesSubprocessError From beb4f3eaf6f4432d18e6dcb0c5bff9ddcc072dd5 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 20 May 2024 17:07:07 -0400 Subject: [PATCH 41/55] Disable fail-fast --- .github/workflows/pull_request.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 029fba302..00619e55c 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -10,6 +10,7 @@ jobs: regression-testing: runs-on: ${{ matrix.os }} strategy: + fail-fast: false # We want to know what version it fails on matrix: os: [ ubuntu-latest, From 8bd8b20f9cbe4a64a5caed385a3b1d886717a1ee Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 20 May 2024 17:09:45 -0400 Subject: [PATCH 42/55] Remove Windows --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 00619e55c..8fb797890 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -14,7 +14,7 @@ jobs: matrix: os: [ ubuntu-latest, - windows-latest, + #windows-latest, # Need to adapt test_no_usernames_provided, test_versioning ] python-version: [ '3.8', From 906575df3a5e634ad6abbfa4cee3816e8eee5a71 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 20 May 2024 17:11:35 -0400 Subject: [PATCH 43/55] Remove test branch from ci --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 8fb797890..033d59543 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -4,7 +4,7 @@ on: pull_request: branches: [ master ] push: - branches: [ master, feature/tox ] + branches: [ master ] jobs: regression-testing: From 4aaf0583c52fb4dd76a77c1a4a86a0f4fe85b1a4 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 20 May 2024 23:24:03 -0400 Subject: [PATCH 44/55] Fix platform differences --- .github/workflows/pull_request.yml | 5 +++-- tests/sherlock_interactives.py | 17 ++++++++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 033d59543..6862819ff 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -4,7 +4,7 @@ on: pull_request: branches: [ master ] push: - branches: [ master ] + branches: [ master, feature/tox ] jobs: regression-testing: @@ -14,7 +14,8 @@ jobs: matrix: os: [ ubuntu-latest, - #windows-latest, # Need to adapt test_no_usernames_provided, test_versioning + windows-latest, + macos-latest, ] python-version: [ '3.8', diff --git a/tests/sherlock_interactives.py b/tests/sherlock_interactives.py index a05440b95..d0424af11 100644 --- a/tests/sherlock_interactives.py +++ b/tests/sherlock_interactives.py @@ -1,23 +1,30 @@ import os +import platform import re import subprocess class Interactives: - def run_cli(args: str = "") -> str: + def run_cli(args:str = "") -> str: """Pass arguments to Sherlock as a normal user on the command line""" - command = [f"sherlock {args}"] - proc_out: str = "" + # Adapt for platform differences (Windows likes to be special) + if platform.system == "Windows": + command:str = f"py -m sherlock {args}" + else: + command:str = f"sherlock {args}" + + proc_out:str = "" try: proc_out = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT) return proc_out.decode() except subprocess.CalledProcessError as e: raise InteractivesSubprocessError(e.output.decode()) + # -> list[str] is prefered, but will require deprecation of support for Python 3.8 def walk_sherlock_for_files_with(pattern: str) -> list: """Check all files within the Sherlock package for matching patterns""" - pattern: re.Pattern = re.compile(pattern) - matching_files: list[str] = [] + pattern:re.Pattern = re.compile(pattern) + matching_files:list[str] = [] for root, dirs, files in os.walk("sherlock"): for file in files: file_path = os.path.join(root,file) From e5736d388894f073bceab21e74d64a51be2a5b3e Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 20 May 2024 23:35:45 -0400 Subject: [PATCH 45/55] Fix platform dependant issues --- tests/test_version.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/test_version.py b/tests/test_version.py index 4088c0b0b..68119c9ed 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -1,3 +1,4 @@ +import os from sherlock_interactives import Interactives import sherlock @@ -6,4 +7,11 @@ def test_versioning() -> None: assert sherlock.__version__ in Interactives.run_cli("--version") # Ensure __init__ is single source of truth for __version__ in package # Temporarily allows sherlock.py so as to not trigger early upgrades - assert Interactives.walk_sherlock_for_files_with(r'__version__ *= *') == [ "sherlock/__init__.py", "sherlock/sherlock.py" ] + found:list = Interactives.walk_sherlock_for_files_with(r'__version__ *= *') + expected:list = [ + # Normalization is REQUIRED for Windows ( / vs \ ) + os.path.normpath("sherlock/__init__.py"), + os.path.normpath("sherlock/sherlock.py"), + ] + # Sorting is REQUIRED for Mac + assert sorted(found) == sorted(expected) From 9579f941bec7a9cd76f27a03239247aa31164443 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Tue, 21 May 2024 00:04:27 -0400 Subject: [PATCH 46/55] Remove test branch --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 6862819ff..e3bb803a9 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -4,7 +4,7 @@ on: pull_request: branches: [ master ] push: - branches: [ master, feature/tox ] + branches: [ master ] jobs: regression-testing: From d46775802e6bae68983f018efb40c00da1e1359c Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Tue, 21 May 2024 04:04:05 -0400 Subject: [PATCH 47/55] Simple docu change Co-authored-by: Siddharth Dushantha --- docs/INSTALL.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/INSTALL.md b/docs/INSTALL.md index 52c70086e..a16f51742 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -48,7 +48,7 @@ Note that the version number will be 0.0.0 for pipx local builds unless manually pipx install -e . ``` -### Run package from source (no install) +### Run package from source (without installing) If you'd rather not install directly to your system, you can import the module at runtime with `-m`. From 67258b58a48e1211cd9755a0037b6b54f4c9b1c9 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Tue, 21 May 2024 21:19:35 -0400 Subject: [PATCH 48/55] Adapt for online testing When using tox, pass `-e offline` to exclude online tests. When using pytest, pass `-m "not online"` to do the same. --- pytest.ini | 4 ++ tests/test_manifest.py | 1 + tests/test_probes.py | 129 +++++++++++++++++++++-------------------- tox.ini | 7 +++ 4 files changed, 78 insertions(+), 63 deletions(-) create mode 100644 pytest.ini diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..e4bb93a38 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +addopts = --strict-markers +markers = + online: mark tests are requiring interest access. \ No newline at end of file diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 0d72c6383..177af21ee 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -19,6 +19,7 @@ def test_validate_manifest_against_local_schema(): validate(instance=jsondat, schema=schemadat) +@pytest.mark.online def test_validate_manifest_against_remote_schema(remote_schema): """Ensures that the manifest matches the remote schema, so as to not unexpectedly break clients.""" json_relative: str = '../sherlock/resources/data.json' diff --git a/tests/test_probes.py b/tests/test_probes.py index 4c7527a2b..39e0ef0b5 100644 --- a/tests/test_probes.py +++ b/tests/test_probes.py @@ -19,77 +19,80 @@ def simple_query(sites_info: dict, site: str, username: str) -> QueryStatus: )[site]['status'].status -# Known positives should only use sites trusted to be reliable and unchanging -@pytest.mark.parametrize('site,username',[ - ('GitLab', 'ppfeister'), - ('AllMyLinks', 'blue'), -]) -def test_known_positives_via_message(sites_info, site, username): - assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED +@pytest.mark.online +class TestLiveTargets: + """Actively test probes against live and trusted targets""" + # Known positives should only use sites trusted to be reliable and unchanging + @pytest.mark.parametrize('site,username',[ + ('GitLab', 'ppfeister'), + ('AllMyLinks', 'blue'), + ]) + def test_known_positives_via_message(self, sites_info, site, username): + assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED -# Known positives should only use sites trusted to be reliable and unchanging -@pytest.mark.parametrize('site,username',[ - ('GitHub', 'ppfeister'), - ('GitHub', 'sherlock-project'), - ('Docker Hub', 'ppfeister'), - ('Docker Hub', 'sherlock'), -]) -def test_known_positives_via_status_code(sites_info, site, username): - assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED + # Known positives should only use sites trusted to be reliable and unchanging + @pytest.mark.parametrize('site,username',[ + ('GitHub', 'ppfeister'), + ('GitHub', 'sherlock-project'), + ('Docker Hub', 'ppfeister'), + ('Docker Hub', 'sherlock'), + ]) + def test_known_positives_via_status_code(self, sites_info, site, username): + assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED -# Known positives should only use sites trusted to be reliable and unchanging -@pytest.mark.parametrize('site,username',[ - ('BodyBuilding', 'blue'), - ('labpentestit', 'CSV'), -]) -def test_known_positives_via_response_url(sites_info, site, username): - assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED + # Known positives should only use sites trusted to be reliable and unchanging + @pytest.mark.parametrize('site,username',[ + ('BodyBuilding', 'blue'), + ('labpentestit', 'CSV'), + ]) + def test_known_positives_via_response_url(self, sites_info, site, username): + assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED -# Randomly generate usernames of high length and test for positive availability -# Randomly generated usernames should be simple alnum for simplicity and high -# compatibility. Several attempts may be made ~just in case~ a real username is -# generated. -@pytest.mark.parametrize('site,random_len',[ - ('GitLab', 255), - ('Codecademy', 30) -]) -def test_likely_negatives_via_message(sites_info, site, random_len): - num_attempts: int = 3 - attempted_usernames: list[str] = [] - status: QueryStatus = QueryStatus.CLAIMED - for i in range(num_attempts): - acceptable_types = string.ascii_letters + string.digits - random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len)) - attempted_usernames.append(random_handle) - status = simple_query(sites_info=sites_info, site=site, username=random_handle) - if status is QueryStatus.AVAILABLE: - break - assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." + # Randomly generate usernames of high length and test for positive availability + # Randomly generated usernames should be simple alnum for simplicity and high + # compatibility. Several attempts may be made ~just in case~ a real username is + # generated. + @pytest.mark.parametrize('site,random_len',[ + ('GitLab', 255), + ('Codecademy', 30) + ]) + def test_likely_negatives_via_message(self, sites_info, site, random_len): + num_attempts: int = 3 + attempted_usernames: list[str] = [] + status: QueryStatus = QueryStatus.CLAIMED + for i in range(num_attempts): + acceptable_types = string.ascii_letters + string.digits + random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len)) + attempted_usernames.append(random_handle) + status = simple_query(sites_info=sites_info, site=site, username=random_handle) + if status is QueryStatus.AVAILABLE: + break + assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." -# Randomly generate usernames of high length and test for positive availability -# Randomly generated usernames should be simple alnum for simplicity and high -# compatibility. Several attempts may be made ~just in case~ a real username is -# generated. -@pytest.mark.parametrize('site,random_len',[ - ('GitHub', 39), - ('Docker Hub', 30) -]) -def test_likely_negatives_via_status_code(sites_info, site, random_len): - num_attempts: int = 3 - attempted_usernames: list[str] = [] - status: QueryStatus = QueryStatus.CLAIMED - for i in range(num_attempts): - acceptable_types = string.ascii_letters + string.digits - random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len)) - attempted_usernames.append(random_handle) - status = simple_query(sites_info=sites_info, site=site, username=random_handle) - if status is QueryStatus.AVAILABLE: - break - assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." + # Randomly generate usernames of high length and test for positive availability + # Randomly generated usernames should be simple alnum for simplicity and high + # compatibility. Several attempts may be made ~just in case~ a real username is + # generated. + @pytest.mark.parametrize('site,random_len',[ + ('GitHub', 39), + ('Docker Hub', 30) + ]) + def test_likely_negatives_via_status_code(self, sites_info, site, random_len): + num_attempts: int = 3 + attempted_usernames: list[str] = [] + status: QueryStatus = QueryStatus.CLAIMED + for i in range(num_attempts): + acceptable_types = string.ascii_letters + string.digits + random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len)) + attempted_usernames.append(random_handle) + status = simple_query(sites_info=sites_info, site=site, username=random_handle) + if status is QueryStatus.AVAILABLE: + break + assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." def test_username_illegal_regex(sites_info): diff --git a/tox.ini b/tox.ini index 58371324f..0c309410b 100644 --- a/tox.ini +++ b/tox.ini @@ -21,6 +21,13 @@ commands = coverage run --source=sherlock --module pytest -v coverage report --show-missing +[testenv:offline] +deps = + jsonschema + pytest +commands = + pytest -v -m "not online" + [testenv:lint] description = Lint with Ruff deps = From b2ddd9a3961f529d837d76c25e133b8152be246a Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Fri, 31 May 2024 17:08:50 -0400 Subject: [PATCH 49/55] Re-ID Regression CI --- .github/workflows/{pull_request.yml => regression.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename .github/workflows/{pull_request.yml => regression.yml} (97%) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/regression.yml similarity index 97% rename from .github/workflows/pull_request.yml rename to .github/workflows/regression.yml index e3bb803a9..af12916c3 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/regression.yml @@ -7,7 +7,7 @@ on: branches: [ master ] jobs: - regression-testing: + tox-matrix: runs-on: ${{ matrix.os }} strategy: fail-fast: false # We want to know what version it fails on From 9701e4face434938269ef06d15750578672982dd Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Fri, 31 May 2024 18:24:13 -0400 Subject: [PATCH 50/55] Remove Nightly Workflow found to be ineffective after the removal of unclaimed_usernames. All sites skipped by tests due to the lack of this value, leading to false success of this test. Workflow will be eventually rewritten following the new standard. --- .github/workflows/nightly.yml | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 .github/workflows/nightly.yml diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml deleted file mode 100644 index ed55a168e..000000000 --- a/.github/workflows/nightly.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: Nightly - -on: - schedule: - # Run Nightly Tests At 3AM (The Hour Of The Wolf) Every Day - - cron: '0 3 * * *' - -jobs: - tests: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [3.x] - - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install Dependencies - run: | - python -m pip install --upgrade pip - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Sherlock Site Coverage Tests - run: | - cd sherlock && python -m unittest tests.all.SherlockSiteCoverageTests --verbose From c812216cc5fecbc7add7c1362df106e2a580955d Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Fri, 31 May 2024 18:24:53 -0400 Subject: [PATCH 51/55] Remove Main Workflow made redundant with the addition of Regression workflow, which runs on both push to and PR against master. --- .github/workflows/main.yml | 38 -------------------------------------- 1 file changed, 38 deletions(-) delete mode 100644 .github/workflows/main.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index abd49f43f..000000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: Tests - -on: - push: - branches: [ master ] - -jobs: - tests: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: - - '3.12' - - '3.11' - - '3.10' - - '3.9' - - '3.8' - - '3.7' - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install Dependencies - run: | - python -m pip install --upgrade pip - pip install ruff flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with ruff - run: | - # stop the build if there are Python syntax errors or undefined names - ruff . --output-format=github --select=E9,F63,F7,F82 - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Sherlock Site Detect Tests - run: | - cd sherlock && python -m unittest tests.all.SherlockDetectTests --verbose From b0521d6f5ca42486141bb767f78e4831691b81b2 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 1 Jun 2024 16:02:40 -0400 Subject: [PATCH 52/55] 'pwd' -> 'current working directory' Co-authored-by: Siddharth Dushantha --- docs/CONTRIBUTING.md | 2 +- docs/INSTALL.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index fe91e7ca4..1056ec8c9 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -68,7 +68,7 @@ Sherlock. This invocation hides the progress text that Sherlock normally outputs, and instead shows the verbose output of the tests. ```console -# Assumes pwd is respository root +# Assumes current working directory is respository root $ python3 -m unittest tests.all --verbose ``` diff --git a/docs/INSTALL.md b/docs/INSTALL.md index a16f51742..d00be9158 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -44,7 +44,7 @@ Building an editable (or live) package links the entry point to your current dir Note that the version number will be 0.0.0 for pipx local builds unless manually changed in the pyproject file (it will prompt the user for an update). ```bash -# Assumes repository cloned, and pwd is repository root +# Assumes repository cloned, and current working directory is repository root pipx install -e . ``` @@ -53,7 +53,7 @@ pipx install -e . If you'd rather not install directly to your system, you can import the module at runtime with `-m`. ```bash -# Assumes repository cloned, and pwd is repository root +# Assumes repository cloned, and current working directory is repository root python3 -m sherlock user123 user789 ``` From b728ce06599c350fed91d5ef47d3b599e5dba4f6 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sat, 1 Jun 2024 16:09:39 -0400 Subject: [PATCH 53/55] Cleanup --- .github/CODEOWNERS | 5 ++++- .github/FUNDING.yml | 2 +- pytest.ini | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 71b3e4deb..31fade06b 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,12 +1,15 @@ ### REPOSITORY /.github/CODEOWNERS @sdushantha +./github/FUNDING.yml @sdushantha /LICENSE @sdushantha ### PACKAGING # Changes made to these items without code owner approval may negatively -# impact packaging pipelines. Code owners may need time to verify or adapt. +# impact packaging pipelines. /pyproject.toml @ppfeister @sdushantha ### REGRESSION +/.github/workflows/regression.yml @ppfeister /tox.ini @ppfeister +/pytest.ini @ppfeister /tests/ @ppfeister diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index b21b31ed8..fa3cf449a 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1 +1 @@ -github: [ sdushantha, ppfeister ] \ No newline at end of file +github: [ sdushantha, ppfeister ] diff --git a/pytest.ini b/pytest.ini index e4bb93a38..11cfcf6eb 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] addopts = --strict-markers markers = - online: mark tests are requiring interest access. \ No newline at end of file + online: mark tests are requiring internet access. \ No newline at end of file From 87f2f08f237430adbf16ae3c546cbf2aea3b4d90 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sun, 2 Jun 2024 02:17:04 -0400 Subject: [PATCH 54/55] Add matheusfelipeog --- .github/FUNDING.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index fa3cf449a..b15d1a23a 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1 +1 @@ -github: [ sdushantha, ppfeister ] +github: [ sdushantha, ppfeister, matheusfelipeog ] From 850528fb87f6f59892f5707f439c3853db5aeda7 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Sun, 2 Jun 2024 02:19:39 -0400 Subject: [PATCH 55/55] Add newline --- pytest.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index 11cfcf6eb..bc1df7de2 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] addopts = --strict-markers markers = - online: mark tests are requiring internet access. \ No newline at end of file + online: mark tests are requiring internet access.