diff options
Diffstat (limited to 'autogpts/autogpt/autogpt/processing/html.py')
-rw-r--r-- | autogpts/autogpt/autogpt/processing/html.py | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/autogpts/autogpt/autogpt/processing/html.py b/autogpts/autogpt/autogpt/processing/html.py new file mode 100644 index 000000000..73c65b9c9 --- /dev/null +++ b/autogpts/autogpt/autogpt/processing/html.py @@ -0,0 +1,33 @@ +"""HTML processing functions""" +from __future__ import annotations + +from bs4 import BeautifulSoup +from requests.compat import urljoin + + +def extract_hyperlinks(soup: BeautifulSoup, base_url: str) -> list[tuple[str, str]]: + """Extract hyperlinks from a BeautifulSoup object + + Args: + soup (BeautifulSoup): The BeautifulSoup object + base_url (str): The base URL + + Returns: + List[Tuple[str, str]]: The extracted hyperlinks + """ + return [ + (link.text, urljoin(base_url, link["href"])) + for link in soup.find_all("a", href=True) + ] + + +def format_hyperlinks(hyperlinks: list[tuple[str, str]]) -> list[str]: + """Format hyperlinks to be displayed to the user + + Args: + hyperlinks (List[Tuple[str, str]]): The hyperlinks to format + + Returns: + List[str]: The formatted hyperlinks + """ + return [f"{link_text.strip()} ({link_url})" for link_text, link_url in hyperlinks] |