zsxkib/wan-2.2-with-sound

wan-video/wan-2.2 (all variants) + topazlabs/video-upscale + zsxkib/smart-thinksound

Public

34 runs

Run zsxkib/wan-2.2-with-sound with an API

Use one of our client libraries to get started quickly. Clicking on a library will take you to the Playground tab where you can tweak different inputs, see the results, and copy the corresponding code to use in your own project.

Input schema

The fields you can use to run this model with an API. If you don't give a value for a field its default value will be used.

Field	Type	Default value	Description
prompt	string		Text prompt for video generation
image	string		Input image for image-to-video generation (automatically switches to I2V model)
quality	None	fast	Quality setting: 'fast' (PrunaAI optimized, fastest) or 'standard' (higher quality, slower)
resolution	None	16:9	Video resolution: aspect ratio (16:9/9:16) or direct resolution (480p/720p)
num_frames	integer	81 Min: 81 Max: 100	Number of video frames (81 recommended for optimal results)
frames_per_second	integer	16 Min: 5 Max: 24	Video frame rate (16fps optimal for Wan 2.2 models)
seed	integer		Random seed for reproducible generation
go_fast	boolean	True	Enable faster processing (PrunaAI optimized models only)
sample_shift	number	12 Min: 1 Max: 20	Sample shift factor (auto-optimized per model)
sample_steps	integer	30 Min: 1 Max: 50	Generation steps (standard quality models only)
enable_sound	boolean	False	Add AI-generated sound effects and ambient audio
context_hint	string		Context hint for better audio generation (e.g., 'ocean waves', 'city traffic')
enable_upscaling	boolean	False	Upscale video to higher resolution and frame rate
target_resolution	None	4k	Target resolution for upscaling
target_fps	integer	60 Min: 15 Max: 120	Target frame rate for upscaling

{
  "type": "object",
  "title": "Input",
  "required": [
    "prompt"
  ],
  "properties": {
    "seed": {
      "type": "integer",
      "title": "Seed",
      "x-order": 6,
      "nullable": true,
      "description": "Random seed for reproducible generation"
    },
    "image": {
      "type": "string",
      "title": "Image",
      "format": "uri",
      "x-order": 1,
      "nullable": true,
      "description": "Input image for image-to-video generation (automatically switches to I2V model)"
    },
    "prompt": {
      "type": "string",
      "title": "Prompt",
      "x-order": 0,
      "description": "Text prompt for video generation"
    },
    "go_fast": {
      "type": "boolean",
      "title": "Go Fast",
      "default": true,
      "x-order": 7,
      "description": "Enable faster processing (PrunaAI optimized models only)"
    },
    "quality": {
      "enum": [
        "fast",
        "standard"
      ],
      "type": "string",
      "title": "quality",
      "description": "Quality setting: 'fast' (PrunaAI optimized, fastest) or 'standard' (higher quality, slower)",
      "default": "fast",
      "x-order": 2
    },
    "num_frames": {
      "type": "integer",
      "title": "Num Frames",
      "default": 81,
      "maximum": 100,
      "minimum": 81,
      "x-order": 4,
      "description": "Number of video frames (81 recommended for optimal results)"
    },
    "resolution": {
      "enum": [
        "16:9",
        "9:16",
        "480p",
        "720p"
      ],
      "type": "string",
      "title": "resolution",
      "description": "Video resolution: aspect ratio (16:9/9:16) or direct resolution (480p/720p)",
      "default": "16:9",
      "x-order": 3
    },
    "target_fps": {
      "type": "integer",
      "title": "Target Fps",
      "default": 60,
      "maximum": 120,
      "minimum": 15,
      "x-order": 14,
      "description": "Target frame rate for upscaling"
    },
    "context_hint": {
      "type": "string",
      "title": "Context Hint",
      "default": "",
      "x-order": 11,
      "description": "Context hint for better audio generation (e.g., 'ocean waves', 'city traffic')"
    },
    "enable_sound": {
      "type": "boolean",
      "title": "Enable Sound",
      "default": false,
      "x-order": 10,
      "description": "Add AI-generated sound effects and ambient audio"
    },
    "sample_shift": {
      "type": "number",
      "title": "Sample Shift",
      "default": 12,
      "maximum": 20,
      "minimum": 1,
      "x-order": 8,
      "description": "Sample shift factor (auto-optimized per model)"
    },
    "sample_steps": {
      "type": "integer",
      "title": "Sample Steps",
      "default": 30,
      "maximum": 50,
      "minimum": 1,
      "x-order": 9,
      "description": "Generation steps (standard quality models only)"
    },
    "enable_upscaling": {
      "type": "boolean",
      "title": "Enable Upscaling",
      "default": false,
      "x-order": 12,
      "description": "Upscale video to higher resolution and frame rate"
    },
    "frames_per_second": {
      "type": "integer",
      "title": "Frames Per Second",
      "default": 16,
      "maximum": 24,
      "minimum": 5,
      "x-order": 5,
      "description": "Video frame rate (16fps optimal for Wan 2.2 models)"
    },
    "target_resolution": {
      "enum": [
        "1080p",
        "2k",
        "4k"
      ],
      "type": "string",
      "title": "target_resolution",
      "description": "Target resolution for upscaling",
      "default": "4k",
      "x-order": 13
    }
  }
}

Output schema

The shape of the response you’ll get when you run this model with an API.

Schema

{
  "type": "string",
  "title": "Output",
  "format": "uri"
}

Example API response

View prediction

'https://replicate.delivery/xezq/bIA7nTHWIUJ7NFl0dU3QY0fz09eBRAladfpCKR56wu0a71MqA/tmp3iwlibn_.mp4'