diff --git a/bigbang/ingress/w3c.py b/bigbang/ingress/w3c.py index c9d0262..07b9f4b 100644 --- a/bigbang/ingress/w3c.py +++ b/bigbang/ingress/w3c.py @@ -103,10 +103,10 @@ def _get_header_from_html(self, soup: BeautifulSoup) -> Dict[str, str]: soup : HTML code from which the Email header can be obtained. """ header = { - "message-ID": "#message-id", - "Date": "#date", - "To": "#to", - "Cc": "#cc", + "message-ID": ".message-id", + "Date": ".date", + "To": ".to", + "Cc": ".cc", } for key, value in header.items(): try: @@ -116,9 +116,9 @@ def _get_header_from_html(self, soup: BeautifulSoup) -> Dict[str, str]: continue header["Subject"] = text_for_selector(soup, "h1") - from_text = parse_dfn_header(text_for_selector(soup, "#from")) + from_text = parse_dfn_header(text_for_selector(soup, ".from")) from_name = from_text.split("<")[0].strip() - from_address = text_for_selector(soup, "#from a") + from_address = text_for_selector(soup, ".from a") header["From"] = email.utils.formataddr( (from_name, email.header.Header(from_address).encode()) ) @@ -144,7 +144,7 @@ def _get_body_from_html( """ # TODO re-write using email.parser.Parser try: - return text_for_selector(soup, "#body") + return text_for_selector(soup, ".body") except Exception: logger.exception(f"The message body of {url} could not be loaded.") return None