Source code for steamship.agents.tools.audio_transcription.fetch_audio_urls_from_rss_tool

import re
from typing import Any, List, Union

import requests

from steamship import Block, Steamship, Task
from steamship.agents.llms import OpenAI
from steamship.agents.schema import AgentContext, Tool
from steamship.agents.utils import with_llm
from steamship.utils.repl import ToolREPL



[docs]
class FetchAudioUrlsFromRssTool(Tool):
    """Given an RSS feed, this tool will extract episode URLs."""

    name: str = "FetchAudioUrlsFromRssTool"
    human_description: str = "Fetches the episode URLs from a Podcast RSS feed."
    agent_description: str = (
        "Used to fetch the podcast episode URLs from a podcast RSS feed. "
        "The input is the URL of the RSS feed. "
        "The output is the URLs of the episode audio."
    )

    def _get_audio_urls(self, url: str, context: AgentContext) -> List[str]:
        response = requests.get(url)
        pattern = re.compile(r"<enclosure[^>]+url\s*=\s*\"([^\"]+)\"", re.IGNORECASE)
        urls = []
        for match in pattern.finditer(response.text):
            urls.append(match.group(1))
        return urls


[docs]
    def run(self, tool_input: List[Block], context: AgentContext) -> Union[List[Block], Task[Any]]:
        blocks = []
        for input_block in tool_input:
            if not input_block.is_text():
                continue
            url = input_block.text
            urls = self._get_audio_urls(url, context)
            blocks.extend([Block(text=url) for url in urls])
        return blocks




if __name__ == "__main__":
    tool = FetchAudioUrlsFromRssTool()

    with Steamship.temporary_workspace() as client:
        ToolREPL(tool).run_with_client(client=client, context=with_llm(llm=OpenAI(client=client)))


# Try with https://anchor.fm/s/e1369b4c/podcast/rss