tmappdev/lipsync_1.5 | API reference

tmappdev / lipsync_1.5

Public
19 runs

Run tmappdev/lipsync_1.5 with an API

Use one of our client libraries to get started quickly. Clicking on a library will take you to the Playground tab where you can tweak different inputs, see the results, and copy the corresponding code to use in your own project.

Input schema

The fields you can use to run this model with an API. If you don't give a value for a field its default value will be used.

Field	Type	Default value	Description
audio_input	string		Upload your audio file here.
video_input	string		Upload your video file here.
bbox_shift	integer	0	BBox shift value in pixels (mainly for v1, v15 uses 0)
fps	integer	25	Output video FPS
version	string (enum)	v15 Options: v1, v15	MuseTalk version to use
batch_size	integer	8 Min: 1 Max: 16	Batch size for inference
use_float16	boolean	True	Use float16 for faster inference
extra_margin	integer	10 Max: 40	Extra margin for jaw movement (v15 only)
parsing_mode	string (enum)	jaw Options: jaw, raw	Face parsing mode (v15 only)
left_cheek_width	integer	90 Min: 20 Max: 160	Left cheek width (v15 only)
right_cheek_width	integer	90 Min: 20 Max: 160	Right cheek width (v15 only)
use_saved_coord	boolean	False	Use saved coordinates to save time

{
  "type": "object",
  "title": "Input",
  "properties": {
    "fps": {
      "type": "integer",
      "title": "Fps",
      "default": 25,
      "x-order": 3,
      "description": "Output video FPS"
    },
    "version": {
      "enum": [
        "v1",
        "v15"
      ],
      "type": "string",
      "title": "version",
      "description": "MuseTalk version to use",
      "default": "v15",
      "x-order": 4
    },
    "batch_size": {
      "type": "integer",
      "title": "Batch Size",
      "default": 8,
      "maximum": 16,
      "minimum": 1,
      "x-order": 5,
      "description": "Batch size for inference"
    },
    "bbox_shift": {
      "type": "integer",
      "title": "Bbox Shift",
      "default": 0,
      "x-order": 2,
      "description": "BBox shift value in pixels (mainly for v1, v15 uses 0)"
    },
    "audio_input": {
      "type": "string",
      "title": "Audio Input",
      "format": "uri",
      "x-order": 0,
      "description": "Upload your audio file here."
    },
    "use_float16": {
      "type": "boolean",
      "title": "Use Float16",
      "default": true,
      "x-order": 6,
      "description": "Use float16 for faster inference"
    },
    "video_input": {
      "type": "string",
      "title": "Video Input",
      "format": "uri",
      "x-order": 1,
      "description": "Upload your video file here."
    },
    "extra_margin": {
      "type": "integer",
      "title": "Extra Margin",
      "default": 10,
      "maximum": 40,
      "minimum": 0,
      "x-order": 7,
      "description": "Extra margin for jaw movement (v15 only)"
    },
    "parsing_mode": {
      "enum": [
        "jaw",
        "raw"
      ],
      "type": "string",
      "title": "parsing_mode",
      "description": "Face parsing mode (v15 only)",
      "default": "jaw",
      "x-order": 8
    },
    "use_saved_coord": {
      "type": "boolean",
      "title": "Use Saved Coord",
      "default": false,
      "x-order": 11,
      "description": "Use saved coordinates to save time"
    },
    "left_cheek_width": {
      "type": "integer",
      "title": "Left Cheek Width",
      "default": 90,
      "maximum": 160,
      "minimum": 20,
      "x-order": 9,
      "description": "Left cheek width (v15 only)"
    },
    "right_cheek_width": {
      "type": "integer",
      "title": "Right Cheek Width",
      "default": 90,
      "maximum": 160,
      "minimum": 20,
      "x-order": 10,
      "description": "Right cheek width (v15 only)"
    }
  }
}

Output schema

The shape of the response you’ll get when you run this model with an API.

Schema

{
  "type": "string",
  "title": "Output",
  "format": "uri"
}