Source code for steamship.agents.tools.audio_transcription.fetch_audio_urls_from_rss_tool

import re
from typing import Any, List, Union

import requests

from steamship import Block, Steamship, Task
from steamship.agents.llms import OpenAI
from steamship.agents.schema import AgentContext, Tool
from steamship.agents.utils import with_llm
from steamship.utils.repl import ToolREPL


[docs] class FetchAudioUrlsFromRssTool(Tool): """Given an RSS feed, this tool will extract episode URLs.""" name: str = "FetchAudioUrlsFromRssTool" human_description: str = "Fetches the episode URLs from a Podcast RSS feed." agent_description: str = ( "Used to fetch the podcast episode URLs from a podcast RSS feed. " "The input is the URL of the RSS feed. " "The output is the URLs of the episode audio." ) def _get_audio_urls(self, url: str, context: AgentContext) -> List[str]: response = requests.get(url) pattern = re.compile(r"<enclosure[^>]+url\s*=\s*\"([^\"]+)\"", re.IGNORECASE) urls = [] for match in pattern.finditer(response.text): urls.append(match.group(1)) return urls
[docs] def run(self, tool_input: List[Block], context: AgentContext) -> Union[List[Block], Task[Any]]: blocks = [] for input_block in tool_input: if not input_block.is_text(): continue url = input_block.text urls = self._get_audio_urls(url, context) blocks.extend([Block(text=url) for url in urls]) return blocks
if __name__ == "__main__": tool = FetchAudioUrlsFromRssTool() with Steamship.temporary_workspace() as client: ToolREPL(tool).run_with_client(client=client, context=with_llm(llm=OpenAI(client=client))) # Try with https://anchor.fm/s/e1369b4c/podcast/rss