steamship.agents.tools.video_generation package#

Submodules#

steamship.agents.tools.video_generation.did_video_generator_tool module#

Tool for generating images.

class steamship.agents.tools.video_generation.did_video_generator_tool.DIDVideoGeneratorTool(*, name: str = 'DIDVideoGeneratorTool', agent_description: str = 'Used to generate a video of you from text. Use if the user has asked for a video response.  The input is the text that you want to say. The output is the video of you saying it.', human_description: str = 'Generates an a video of you speaking a response to a user.', is_final: bool = False, cacheable: bool = True, generator_plugin_handle: str = 'did-video-generator', generator_plugin_instance_handle: str | None = None, generator_plugin_config: dict = {}, merge_blocks: bool = False, make_output_public: bool = True, source_url: str | None = 'https://www.steamship.com/images/agents/man-in-suit-midjourney.png', stitch: bool = True, voice_provider: str | None = 'microsoft', voice_id: str | None = 'en-US-GuyNeural', voice_style: str | None = 'Default', driver_url: str | None = None, expressions: List[Expression] | None = None, transition_frames: int | None = 20)[source]#

Bases: VideoGeneratorTool

Tool to generate talking avatars from text using D-ID.

class Expression(*, start_frame: ConstrainedIntValue, expression: Expressions, intensity: ConstrainedFloatValue = 1.0)[source]#

Bases: BaseModel

class Expressions(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Bases: str, Enum

HAPPY = 'happy'#
NEUTRAL = 'neutral'#
SERIOUS = 'serious'#
SURPRISE = 'surprise'#
expression: Expressions#
intensity: float#
start_frame: int#
driver_url: str | None#

The URL of the D-ID driver video. If not provided a driver video will be selected automatically.

expressions: List[Expression] | None#

A list of expressions to apply.

generator_plugin_config: dict#
generator_plugin_handle: str#
human_description: str#

Human-friendly description. Used for logging, tool indices, etc.

name: str#

The short name for the tool. This will be used by Agents to refer to this tool during action selection.

run(tool_input: List[Block], context: AgentContext) List[Block] | Task[Any][source]#

Run the tool. Copied from base class to enable generate-time config overrides.

source_url: str | None#

The URL of the source image to be animated.

stitch: bool#
transition_frames: int | None#

How many frames to use for expression transition.

voice_id: str | None#

The voice ID. E.g. en-US-AshleyNeural for Microsoft or Amy for Amazon.

voice_provider: str | None#

The voice provider. Must be either microsoft or amazon.

voice_style: str | None#

Module contents#

class steamship.agents.tools.video_generation.DIDVideoGeneratorTool(*, name: str = 'DIDVideoGeneratorTool', agent_description: str = 'Used to generate a video of you from text. Use if the user has asked for a video response.  The input is the text that you want to say. The output is the video of you saying it.', human_description: str = 'Generates an a video of you speaking a response to a user.', is_final: bool = False, cacheable: bool = True, generator_plugin_handle: str = 'did-video-generator', generator_plugin_instance_handle: str | None = None, generator_plugin_config: dict = {}, merge_blocks: bool = False, make_output_public: bool = True, source_url: str | None = 'https://www.steamship.com/images/agents/man-in-suit-midjourney.png', stitch: bool = True, voice_provider: str | None = 'microsoft', voice_id: str | None = 'en-US-GuyNeural', voice_style: str | None = 'Default', driver_url: str | None = None, expressions: List[Expression] | None = None, transition_frames: int | None = 20)[source]#

Bases: VideoGeneratorTool

Tool to generate talking avatars from text using D-ID.

class Expression(*, start_frame: ConstrainedIntValue, expression: Expressions, intensity: ConstrainedFloatValue = 1.0)[source]#

Bases: BaseModel

class Expressions(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Bases: str, Enum

HAPPY = 'happy'#
NEUTRAL = 'neutral'#
SERIOUS = 'serious'#
SURPRISE = 'surprise'#
expression: Expressions#
intensity: float#
start_frame: int#
agent_description: str#

Description for use in an agent in order to enable Action selection. It should include a short summary of what the Tool does, what the inputs to the Tool should be, and what the outputs of the tool are.

cacheable: bool#

Whether runs of this Tool should be cached based on inputs (if caching is enabled in the AgentContext for a run). Setting this to False will make prevent any Actions that involve this tool from being cached, meaning that every Action using this Tool will result in a call to run. By default, Tools are considered cacheable.

driver_url: str | None#

The URL of the D-ID driver video. If not provided a driver video will be selected automatically.

expressions: List[Expression] | None#

A list of expressions to apply.

generator_plugin_config: dict#
generator_plugin_handle: str#
generator_plugin_instance_handle: str | None#
human_description: str#

Human-friendly description. Used for logging, tool indices, etc.

is_final: bool#

Whether actions performed by this tool should have their is_final bit marked.

Setting this to True means that the output of this tool will halt the reasoning loop. Its output will be returned directly to the user.

make_output_public: bool#
merge_blocks: bool#
name: str#

The short name for the tool. This will be used by Agents to refer to this tool during action selection.

run(tool_input: List[Block], context: AgentContext) List[Block] | Task[Any][source]#

Run the tool. Copied from base class to enable generate-time config overrides.

source_url: str | None#

The URL of the source image to be animated.

stitch: bool#
transition_frames: int | None#

How many frames to use for expression transition.

voice_id: str | None#

The voice ID. E.g. en-US-AshleyNeural for Microsoft or Amy for Amazon.

voice_provider: str | None#

The voice provider. Must be either microsoft or amazon.

voice_style: str | None#