steamship.agents.tools.video_generation package#

Submodules#

steamship.agents.tools.video_generation.did_video_generator_tool module#

Tool for generating images.

class steamship.agents.tools.video_generation.did_video_generator_tool.DIDVideoGeneratorTool(*, name: str = 'DIDVideoGeneratorTool', agent_description: str = 'Used to generate a video of you from text. Use if the user has asked for a video response. The input is the text that you want to say. The output is the video of you saying it.', human_description: str = 'Generates an a video of you speaking a response to a user.', is_final: bool = False, cacheable: bool = True, generator_plugin_handle: str = 'did-video-generator', generator_plugin_instance_handle: str | None = None, generator_plugin_config: dict = {}, merge_blocks: bool = False, make_output_public: bool = True, source_url: str | None = 'https://www.steamship.com/images/agents/man-in-suit-midjourney.png', stitch: bool = True, voice_provider: str | None = 'microsoft', voice_id: str | None = 'en-US-GuyNeural', voice_style: str | None = 'Default', driver_url: str | None = None, expressions: List[Expression] | None = None, transition_frames: int | None = 20)[source]#

Bases: VideoGeneratorTool

Tool to generate talking avatars from text using D-ID.

class Expression(*, start_frame: ConstrainedIntValue, expression: Expressions, intensity: ConstrainedFloatValue = 1.0)[source]#

Bases: BaseModel

class Expressions(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Bases: str, Enum

HAPPY = 'happy'#

NEUTRAL = 'neutral'#

SERIOUS = 'serious'#

SURPRISE = 'surprise'#

expression: Expressions#

intensity: float#

start_frame: int#

driver_url: str | None#: The URL of the D-ID driver video. If not provided a driver video will be selected automatically.

expressions: List[Expression] | None#: A list of expressions to apply.

generator_plugin_config: dict#

generator_plugin_handle: str#

human_description: str#: Human-friendly description. Used for logging, tool indices, etc.

name: str#: The short name for the tool. This will be used by Agents to refer to this tool during action selection.

run(tool_input: List[Block], context: AgentContext) → List[Block] | Task[Any][source]#: Run the tool. Copied from base class to enable generate-time config overrides.

source_url: str | None#: The URL of the source image to be animated.

stitch: bool#

transition_frames: int | None#: How many frames to use for expression transition.

voice_id: str | None#: The voice ID. E.g. en-US-AshleyNeural for Microsoft or Amy for Amazon.

voice_provider: str | None#: The voice provider. Must be either microsoft or amazon.

voice_style: str | None#

Module contents#

class steamship.agents.tools.video_generation.DIDVideoGeneratorTool(*, name: str = 'DIDVideoGeneratorTool', agent_description: str = 'Used to generate a video of you from text. Use if the user has asked for a video response. The input is the text that you want to say. The output is the video of you saying it.', human_description: str = 'Generates an a video of you speaking a response to a user.', is_final: bool = False, cacheable: bool = True, generator_plugin_handle: str = 'did-video-generator', generator_plugin_instance_handle: str | None = None, generator_plugin_config: dict = {}, merge_blocks: bool = False, make_output_public: bool = True, source_url: str | None = 'https://www.steamship.com/images/agents/man-in-suit-midjourney.png', stitch: bool = True, voice_provider: str | None = 'microsoft', voice_id: str | None = 'en-US-GuyNeural', voice_style: str | None = 'Default', driver_url: str | None = None, expressions: List[Expression] | None = None, transition_frames: int | None = 20)[source]#

Bases: VideoGeneratorTool

Tool to generate talking avatars from text using D-ID.

class Expression(*, start_frame: ConstrainedIntValue, expression: Expressions, intensity: ConstrainedFloatValue = 1.0)[source]#

Bases: BaseModel

class Expressions(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Bases: str, Enum

HAPPY = 'happy'#

NEUTRAL = 'neutral'#

SERIOUS = 'serious'#

SURPRISE = 'surprise'#

expression: Expressions#

intensity: float#

start_frame: int#

agent_description: str#: Description for use in an agent in order to enable Action selection. It should include a short summary of what the Tool does, what the inputs to the Tool should be, and what the outputs of the tool are.

cacheable: bool#: Whether runs of this Tool should be cached based on inputs (if caching is enabled in the AgentContext for a run). Setting this to False will make prevent any Actions that involve this tool from being cached, meaning that every Action using this Tool will result in a call to run. By default, Tools are considered cacheable.

driver_url: str | None#: The URL of the D-ID driver video. If not provided a driver video will be selected automatically.

expressions: List[Expression] | None#: A list of expressions to apply.

generator_plugin_config: dict#

generator_plugin_handle: str#

generator_plugin_instance_handle: str | None#

human_description: str#: Human-friendly description. Used for logging, tool indices, etc.

is_final: bool#

Whether actions performed by this tool should have their is_final bit marked.

Setting this to True means that the output of this tool will halt the reasoning loop. Its output will be returned directly to the user.

make_output_public: bool#

merge_blocks: bool#

name: str#: The short name for the tool. This will be used by Agents to refer to this tool during action selection.

run(tool_input: List[Block], context: AgentContext) → List[Block] | Task[Any][source]#: Run the tool. Copied from base class to enable generate-time config overrides.

source_url: str | None#: The URL of the source image to be animated.

stitch: bool#

transition_frames: int | None#: How many frames to use for expression transition.

voice_id: str | None#: The voice ID. E.g. en-US-AshleyNeural for Microsoft or Amy for Amazon.

voice_provider: str | None#: The voice provider. Must be either microsoft or amazon.

voice_style: str | None#