diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..41f6315 --- /dev/null +++ b/.env.example @@ -0,0 +1,2 @@ +OPENAI_API_KEY="" +LAKERA_GUARD_API_KEY="" \ No newline at end of file diff --git a/README.md b/README.md index f2fdc71..eb5f268 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ To run and interact with this notebook locally, follow the instructions below. 1. Python `>=3.11` 2. [OpenAI API Key](https://platform.openai.com/account/api-keys) +3. [Lakera Guard Access Key](https://platform.lakera.ai/account/api-keys) (Optional) ### Installation @@ -43,10 +44,16 @@ To run and interact with this notebook locally, follow the instructions below. **Note**: you can use whatever Python environment manager you prefer. -4. Run the notebook +4. Create `.env` file; and fill in your API keys + + ```shell + cp .env.example .env + ``` + +5. Run the notebook ```shell jupyter notebook ``` -5. Open the notebook in your browser and follow along +6. Open the notebook in your browser and follow along diff --git a/SentimentAnalysisWorkshop.ipynb b/SentimentAnalysisWorkshop.ipynb index 7946372..42cf796 100644 --- a/SentimentAnalysisWorkshop.ipynb +++ b/SentimentAnalysisWorkshop.ipynb @@ -14,7 +14,11 @@ "source": [ "%%capture\n", "\n", - "%pip install openai nltk ipywidgets numpy requests-cache backoff tiktoken nrclex pandas" + "%pip install openai nltk ipywidgets numpy requests-cache backoff tiktoken nrclex pandas python-dotenv\n", + "\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv()" ] }, { @@ -82,7 +86,8 @@ "nltk.download(\"punkt\")\n", "\n", "# globals\n", - "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "OPENAI_API_KEY = os.environ.get(\"OPENAI_API_KEY\")\n", + "LAKERA_GUARD_ACCESS_KEY = os.environ.get(\"LAKERA_GUARD_ACCESS_KEY\")\n", "TEMPERATURE = 0.37\n", "STORY_SAMPLE_SIZE = 10" ] @@ -125,6 +130,9 @@ " sampleSizeWarningLabel,\n", " openAiHeader,\n", " hackerNewsHeader,\n", + " lakeraKeyInput,\n", + " lakeraKeyUpdateButton,\n", + " lakeraHeader,\n", ")\n", "\n", "# project-specific widgets\n", @@ -136,16 +144,25 @@ "from utils.obfuscate import obfuscateKey\n", "from utils.array import checkArrayLengths\n", "from utils.modelName import getModelNameFromId\n", - "from utils.dataset import Story, StoryData, collateSentimentData, collateModelData\n", + "from utils.dataset import (\n", + " Story,\n", + " StoryData,\n", + " collateSentimentData,\n", + " collateModelData,\n", + " collateSafetyData,\n", + ")\n", "\n", "# we don't want to display too many entries in our DataFrames\n", "# if the sample size is too large\n", "DATAFRAME_LIMIT = 20\n", "\n", - "# we'll use this session to cache our hacker news api requests\n", + "# we'll use this session to cache our hacker news and lakera api requests\n", "REQUEST_CACHE_EXPIRATION_SECONDS = 60 * 15\n", "session = requests_cache.CachedSession(\n", " \"hackernews_cache\", expire_after=REQUEST_CACHE_EXPIRATION_SECONDS\n", + ")\n", + "lakera = requests_cache.CachedSession(\n", + " \"lakera_cache\", expire_after=REQUEST_CACHE_EXPIRATION_SECONDS\n", ")" ] }, @@ -163,7 +180,10 @@ "- **Open AI API Key**: Your [OpenAI API key](https://platform.openai.com/account/api-keys) is read from the `$OPENAI_API_KEY` environment variable if it's set, but you can override it in this notebook; when you click the **Update Key** button the key you entered will be obfuscated and stored in the `OPENAI_API_KEY` global variable\n", "- **Model**: The [OpenAI model](https://platform.openai.com/docs/models) that the demo should use; you can choose between the `gtp-3.5-turbo` and `gpt-4` models for this demo\n", "- **Temperature**: A model's [temperature](https://platform.openai.com/docs/guides/gpt/how-should-i-set-the-temperature-parameter) is a measure of how \"creative\" it's response will be; you can set this to `0` for something pretty close to deterministic responses to simple queries\n", - "- **Sample Size**: We'll be gathering the top stories from the [Hacker News API](https://github.com/HackerNews/API) and then analyzing the sentiment of a sample of those stories' titles; this controls how large that sample is\n" + "- **Lakera Guard Access Key**: (_optional_) Your [Lakera Guard Access Key](https://platform.lakera.ai/account/api-keys) is read from the `$LAKERA_GUARD_ACCESS_KEY` environment variable if it's set, but you can override it in this notebook; when you click the **Update Key** button the key you entered will be obfuscated and stored in the `LAKERA_GUARD_ACCESS_KEY` global variable\n", + "- **Sample Size**: We'll be gathering the top stories from the [Hacker News API](https://github.com/HackerNews/API) and then analyzing the sentiment of a sample of those stories' titles; this controls how large that sample is\n", + "\n", + "**Note**: For environment variables you can copy the `.env.example` file to `.env` and fill them in or rely on them being available in your Environment Variables at runtime via your shell.\n" ] }, { @@ -182,6 +202,7 @@ "# configure some settings that other cells in this notebook rely on\n", "# you can just ignore/collapse it if you would prefer\n", "apiKeyInput.value = obfuscateKey(OPENAI_API_KEY)\n", + "lakeraKeyInput.value = obfuscateKey(LAKERA_GUARD_ACCESS_KEY)\n", "sampleSizeSlider.value = STORY_SAMPLE_SIZE\n", "temperatureSlider.value = TEMPERATURE\n", "\n", @@ -192,6 +213,12 @@ " apiKeyInput.value = obfuscateKey(OPENAI_API_KEY)\n", "\n", "\n", + "def updateLakeraKey(event):\n", + " global LAKERA_GUARD_ACCESS_KEY\n", + " LAKERA_GUARD_ACCESS_KEY = lakeraKeyInput.value\n", + " lakeraKeyInput.value = obfuscateKey(LAKERA_GUARD_ACCESS_KEY)\n", + "\n", + "\n", "def updateSampleSize(change):\n", " global STORY_SAMPLE_SIZE\n", " STORY_SAMPLE_SIZE = change[\"new\"]\n", @@ -205,15 +232,20 @@ "temperatureSlider.observe(updateTemperature, names=\"value\")\n", "sampleSizeSlider.observe(updateSampleSize, names=\"value\")\n", "apiKeyUpdateButton.on_click(updateApiKey)\n", + "lakeraKeyUpdateButton.on_click(updateLakeraKey)\n", "\n", "apiKeyConfigWidget = pywidgets.HBox([apiKeyInput, apiKeyUpdateButton])\n", "openAiConfigWidget = pywidgets.VBox(\n", " [openAiHeader, apiKeyConfigWidget, modelDropdown, temperatureSlider]\n", ")\n", + "lakeraKeyConfigWidget = pywidgets.HBox([lakeraKeyInput, lakeraKeyUpdateButton])\n", + "lakeraConfigWidget = pywidgets.VBox([lakeraHeader, lakeraKeyConfigWidget])\n", "hackerNewsConfigWidget = pywidgets.VBox(\n", " [hackerNewsHeader, sampleSizeSlider, sampleSizeWarningLabel]\n", ")\n", - "configWidget = pywidgets.VBox([openAiConfigWidget, hackerNewsConfigWidget])\n", + "configWidget = pywidgets.VBox(\n", + " [openAiConfigWidget, lakeraConfigWidget, hackerNewsConfigWidget]\n", + ")\n", "\n", "display(configWidget)" ] @@ -888,8 +920,6 @@ "\n", "### How do we prevent it?\n", "\n", - "There are different strategies to try to mitigate this issue, but defending against prompt injeciton is a much larger topic - Prompt Injection is the #1 vulnerability in the Open Worldwide Application Security Project (OWASP) [Top 10 for LLM Applications list](https://owasp.org/www-project-top-10-for-large-language-model-applications/assets/PDF/OWASP-Top-10-for-LLMs-2023-v1_0_1.pdf). For critical applicaitons, it's worth considering a tool like [Lakera Guard](https://www.lakera.ai/insights/lakera-guard-overview) to help identify prompt injection attempts before sending them to your model.\n", - "\n", "For this demo, I just added some clarifying language to the prompt to try to get the model to avoid this issue with other article titles in the future:\n", "\n", "```\n", @@ -908,7 +938,47 @@ "\n", "While this example is harmless, it's an important reminder that just like we sanitize and guard against input from users in our applications, we'll need to do the same thing with our prompts.\n", "\n", - "If you're interested in learning more about Prompt Injection, the [Gandalf](https://gandalf.lakera.ai/) Capture the Flag (CTF) game from [Lakera](https://www.lakera.ai/) is a great way to learn more about it and explore its implications.\n" + "If you're interested in learning more about Prompt Injection, the [Gandalf](https://gandalf.lakera.ai/) Capture the Flag (CTF) game from [Lakera](https://www.lakera.ai/) is a great way to learn more about it and explore its implications.\n", + "\n", + "### Lakera Guard\n", + "\n", + "There are different strategies to try to mitigate this issue, but defending against prompt injeciton is a much larger topic - Prompt Injection is the #1 vulnerability in the Open Worldwide Application Security Project (OWASP) [Top 10 for LLM Applications list](https://owasp.org/www-project-top-10-for-large-language-model-applications/assets/PDF/OWASP-Top-10-for-LLMs-2023-v1_0_1.pdf).\n", + "\n", + "For critical applicaitons, it's worth considering a tool like [Lakera Guard](https://www.lakera.ai/insights/lakera-guard-overview) to help identify prompt injection attempts before sending them to your model.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6b5159c", + "metadata": {}, + "outputs": [], + "source": [ + "def guard(text: str) -> dict:\n", + " try:\n", + " checkInput = lakera.post(\n", + " \"https://api.lakera.ai/v1/guard\",\n", + " json={\n", + " \"input\": \"Interpretation and analysis of songs written or performed by Pet Shop Boys\"\n", + " },\n", + " headers={\n", + " \"Content-Type\": \"application/json\",\n", + " \"Authorization\": f\"Bearer {LAKERA_GUARD_ACCESS_KEY}\",\n", + " },\n", + " )\n", + "\n", + " return checkInput.json()\n", + " except:\n", + " print(\"Error: Lakera Guard did not respond\")\n", + "\n", + "\n", + "for _, story in stories.items():\n", + " guardResults = guard(story[\"title\"])[\"results\"][0]\n", + " story[\"guard\"] = guardResults\n", + "\n", + " print(story[\"title\"])\n", + " print(guardResults)\n", + " print(\"---\")" ] }, { @@ -1511,7 +1581,7 @@ "source": [ "### Prompting strategies\n", "\n", - "Finally, let's compare the zero shot, one shot, and few shot approaches to our emoji analyzer.\n" + "Let's compare the zero shot, one shot, and few shot approaches to our emoji analyzer.\n" ] }, { @@ -1552,6 +1622,91 @@ " )" ] }, + { + "cell_type": "markdown", + "id": "cfb0df4b", + "metadata": {}, + "source": [ + "### Prompt safety\n", + "\n", + "Finally, let's take a look at the Lakera Guard findings for our stories.\n", + "\n", + "#### Gathering our data\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b1aa966", + "metadata": {}, + "outputs": [], + "source": [ + "# this cell is used to gather our data into an object that's easier to work with\n", + "# when displaying some dataframes with slices of what we've explored\n", + "\n", + "safetyData: dict[str, list[str]] = collateSafetyData(stories)\n", + "\n", + "print(safetyData)" + ] + }, + { + "cell_type": "markdown", + "id": "a6e8779f", + "metadata": {}, + "source": [ + "Now let's render it out.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bcf71ac9", + "metadata": {}, + "outputs": [], + "source": [ + "# this cell is just used to display a dataframe with our emoji results\n", + "try:\n", + " if checkArrayLengths(\n", + " safetyData[\"Story\"],\n", + " safetyData[\"Flagged\"],\n", + " safetyData[\"Prompt Injection\"],\n", + " safetyData[\"Jailbreak\"],\n", + " safetyData[\"Sexual Content\"],\n", + " safetyData[\"Hate Speech\"],\n", + " safetyData[\"PII\"],\n", + " safetyData[\"Unknown Links\"],\n", + " safetyData[\"Relevant Language\"],\n", + " ):\n", + " safetyDataFrame = pd.DataFrame(\n", + " data=safetyData,\n", + " columns=[\n", + " \"Story\",\n", + " \"Flagged\",\n", + " \"Prompt Injection\",\n", + " \"Jailbreak\",\n", + " \"Sexual Content\",\n", + " \"Hate Speech\",\n", + " \"PII\",\n", + " \"Unknown Links\",\n", + " \"Relevant Language\",\n", + " ],\n", + " )\n", + "\n", + " display(\n", + " safetyDataFrame\n", + " if STORY_SAMPLE_SIZE <= DATAFRAME_LIMIT\n", + " else safetyDataFrame.head(DATAFRAME_LIMIT)\n", + " )\n", + " else:\n", + " print(\n", + " \"Error: Different number of stories and Lakera Guard results. Please rerun the Lakera Guard and Gathering Our Data cells above and then rerun this cell.\"\n", + " )\n", + "except NameError:\n", + " print(\n", + " \"Error: No safety data to display. Please rerun the Gathering Our Data cell above and then rerun this cell.\"\n", + " )" + ] + }, { "cell_type": "markdown", "id": "a6f4828a", diff --git a/requirements.txt b/requirements.txt index 39caff0..d5ce4b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,5 +7,6 @@ nrclex numpy openai pandas +python-dotenv requests-cache tiktoken \ No newline at end of file diff --git a/utils/dataset.py b/utils/dataset.py index 1f9a92f..91c7bd4 100644 --- a/utils/dataset.py +++ b/utils/dataset.py @@ -16,6 +16,39 @@ "openai": dict[str, str], }, ), + "guard": TypedDict( + "LakeraGuardResults", + { + "categories": TypedDict( + "LakeraGuardCategories", + { + "prompt_injection": bool, + "jailbreak": bool, + "sex": bool, + "hate": bool, + "pii": bool, + "unknown_links": bool, + "relevant_language": bool, + }, + ), + "category_scores": TypedDict( + "LakeraGuardCategoryScores", + { + "prompt_injection": float, + "jailbreak": float, + "sex": float, + "hate": float, + "pii": float, + "unknown_links": float, + "relevant_language": float, + }, + ), + "flagged": bool, + "payload": dict[str, str], + "pii": str, + "links": list[str], + }, + ), }, ) @@ -91,7 +124,7 @@ def collateModelData(stories: StoryData) -> dict[str, list[str]]: modelData: dict[str, list[str]] = {"Story": []} try: - for storyId, story in stories.items(): + for _, story in stories.items(): if "openai" in story["sentiment"]: # if we only have one model result, we'll skip this story if not len(story["sentiment"]["openai"].keys()) > 1: @@ -158,3 +191,69 @@ def collateModelData(stories: StoryData) -> dict[str, list[str]]: print( "Error: Please rerun the ChatGPT example cells with each model and then rerun this cell." ) + + +def collateSafetyData(stories: StoryData) -> dict[str, list[str]]: + safetyData: dict[str, list[str]] = { + "Story": [], + "Flagged": [], + "Prompt Injection": [], + "Jailbreak": [], + "Sexual Content": [], + "Hate Speech": [], + "PII": [], + "Unknown Links": [], + "Relevant Language": [], + } + + try: + for _, story in stories.items(): + if "guard" in story: + safetyData["Story"].append(story["title"]) + + if "flagged" in story["guard"]: + safetyData["Flagged"].append(story["guard"]["flagged"]) + + if "category_scores" in story["guard"]: + if "prompt_injection" in story["guard"]["category_scores"]: + safetyData["Prompt Injection"].append( + story["guard"]["category_scores"]["prompt_injection"] + ) + + if "jailbreak" in story["guard"]["category_scores"]: + safetyData["Jailbreak"].append( + story["guard"]["category_scores"]["jailbreak"] + ) + + if "sex" in story["guard"]["category_scores"]: + safetyData["Sexual Content"].append( + story["guard"]["category_scores"]["sex"] + ) + + if "hate" in story["guard"]["category_scores"]: + safetyData["Hate Speech"].append( + story["guard"]["category_scores"]["hate"] + ) + + if "pii" in story["guard"]["category_scores"]: + safetyData["PII"].append( + story["guard"]["category_scores"]["pii"] + ) + + if "unknown_links" in story["guard"]["category_scores"]: + safetyData["Unknown Links"].append( + story["guard"]["category_scores"]["unknown_links"] + ) + + if "relevant_language" in story["guard"]["category_scores"]: + safetyData["Relevant Language"].append( + story["guard"]["category_scores"]["relevant_language"] + ) + + return safetyData + + except NameError: + print("Please run the cells above to gather and analyze some stories.") + + if not len(safetyData["Story"]): + print("Error: Please rerun the Lakera Guard cell and then rerun this cell.") diff --git a/widgets/config.py b/widgets/config.py index 6c2e527..18e9ed8 100644 --- a/widgets/config.py +++ b/widgets/config.py @@ -3,6 +3,9 @@ openAiHeader = widgets.Label( "OpenAI API", style=dict(font_size="1.2rem", font_weight="bold") ) +lakeraHeader = widgets.Label( + "Lakera Guard API", style=dict(font_size="1.2rem", font_weight="bold") +) hackerNewsHeader = widgets.Label( "Hacker News API", style=dict(font_size="1.2rem", font_weight="bold") ) @@ -14,11 +17,23 @@ ) apiKeyUpdateButton = widgets.Button( - description="Update Key", + description="Update OpenAI Key", disabled=False, button_style="primary" if not apiKeyInput.value else "danger", ) +lakeraKeyInput = widgets.Text( + value="", + placeholder="Enter your Lakera Guard API key", + description="Lakera Key", +) + +lakeraKeyUpdateButton = widgets.Button( + description="Update Lakera Key", + disabled=False, + button_style="primary" if not lakeraKeyInput.value else "danger", +) + modelDropdown = widgets.Dropdown( options=["gpt-3.5-turbo", "gpt-4"], value="gpt-3.5-turbo",