Readme
This model doesn't have a readme.
Run this model in Node.js with one line of code:
npm install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import Replicate from "replicate";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run ttsds/metavoice using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"ttsds/metavoice:3495610f45204d13509ef709586d9badd3bc4bd895aa712a252b249df6693143",
{
input: {
text: "With tenure, Suzie'd have all the more leisure for yachting, but her publications are no good.",
speaker_reference: "https://replicate.delivery/pbxt/MNFXdPaUPOwYCZjZM4azsymbzE2TCV2WJXfGpeV2DrFWaSq8/example_en.wav"
}
}
);
// To access the file URL:
console.log(output.url()); //=> "http://example.com"
// To write the file to disk:
fs.writeFile("my-image.png", output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import replicate
Run ttsds/metavoice using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"ttsds/metavoice:3495610f45204d13509ef709586d9badd3bc4bd895aa712a252b249df6693143",
input={
"text": "With tenure, Suzie'd have all the more leisure for yachting, but her publications are no good.",
"speaker_reference": "https://replicate.delivery/pbxt/MNFXdPaUPOwYCZjZM4azsymbzE2TCV2WJXfGpeV2DrFWaSq8/example_en.wav"
}
)
print(output)
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
Run ttsds/metavoice using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "ttsds/metavoice:3495610f45204d13509ef709586d9badd3bc4bd895aa712a252b249df6693143",
"input": {
"text": "With tenure, Suzie\'d have all the more leisure for yachting, but her publications are no good.",
"speaker_reference": "https://replicate.delivery/pbxt/MNFXdPaUPOwYCZjZM4azsymbzE2TCV2WJXfGpeV2DrFWaSq8/example_en.wav"
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
Add a payment method to run this model.
By signing in, you agree to our
terms of service and privacy policy
This is a modal window.
Beginning of dialog window. Escape will cancel and close the window.
End of dialog window.
{
"completed_at": "2025-01-31T09:15:24.593890Z",
"created_at": "2025-01-31T09:10:51.770000Z",
"data_removed": false,
"error": null,
"id": "evp9tr4rf9rj00cmqeg8mng7rr",
"input": {
"text": "With tenure, Suzie'd have all the more leisure for yachting, but her publications are no good.",
"speaker_reference": "https://replicate.delivery/pbxt/MNFXdPaUPOwYCZjZM4azsymbzE2TCV2WJXfGpeV2DrFWaSq8/example_en.wav"
},
"logs": "0%| | 0/2005 [00:00<?, ?it/s]\n 1%|▏ | 30/2005 [00:00<00:06, 294.61it/s]\n 3%|▎ | 60/2005 [00:00<00:06, 290.72it/s]\n 4%|▍ | 90/2005 [00:00<00:06, 289.47it/s]\n 6%|▌ | 119/2005 [00:00<00:06, 288.95it/s]\n 7%|▋ | 148/2005 [00:00<00:06, 288.68it/s]\n 9%|▉ | 177/2005 [00:00<00:06, 288.49it/s]\n 10%|█ | 206/2005 [00:00<00:06, 288.35it/s]\n 12%|█▏ | 235/2005 [00:00<00:06, 288.35it/s]\n 13%|█▎ | 264/2005 [00:00<00:06, 288.20it/s]\n 15%|█▍ | 293/2005 [00:01<00:05, 288.18it/s]\n 16%|█▌ | 322/2005 [00:01<00:05, 288.20it/s]\n 18%|█▊ | 351/2005 [00:01<00:05, 288.10it/s]\n 19%|█▉ | 380/2005 [00:01<00:05, 287.93it/s]\n 20%|██ | 409/2005 [00:01<00:05, 287.99it/s]\n 22%|██▏ | 438/2005 [00:01<00:05, 287.91it/s]\n 23%|██▎ | 467/2005 [00:01<00:05, 287.78it/s]\n 25%|██▍ | 496/2005 [00:01<00:05, 287.88it/s]\n 26%|██▌ | 525/2005 [00:01<00:05, 287.89it/s]\n 28%|██▊ | 554/2005 [00:01<00:05, 287.99it/s]\n 29%|██▉ | 583/2005 [00:02<00:04, 288.01it/s]\n 31%|███ | 612/2005 [00:02<00:04, 287.97it/s]\n 32%|███▏ | 641/2005 [00:02<00:04, 287.99it/s]\n 33%|███▎ | 670/2005 [00:02<00:04, 288.03it/s]\n 35%|███▍ | 699/2005 [00:02<00:04, 288.01it/s]\n 36%|███▋ | 728/2005 [00:02<00:04, 287.94it/s]\n 38%|███▊ | 757/2005 [00:02<00:04, 287.94it/s]\n 39%|███▉ | 786/2005 [00:02<00:04, 287.91it/s]\n40%|███▉ | 798/2005 [00:02<00:04, 287.90it/s]\nTime for 1st stage LLM inference: 2.78 sec total, 287.29 tokens/sec\nBandwidth achieved: 717.34 GB/s\nMemory used: 8.94 GB\nNon-causal batching: 0%| | 0/1 [00:00<?, ?it/s]\nNon-causal batching: 100%|██████████| 1/1 [00:00<00:00, 7.63it/s]\nNon-causal batching: 100%|██████████| 1/1 [00:00<00:00, 7.62it/s]\n2025-01-31 09:15:24 | WARNING | DF | Audio sampling rate does not match model sampling rate (24000, 48000). Resampling...\nSaved audio to /src/outputs/synth_25-01-31--09-15-24_With_tenure,_Suzie'd_have_ff59dbfc-89c0-4f09-8c4b-a815958f5cfd.wav\nTotal time to synth (s): 6.059656143188477\nReal-time factor: 1.14",
"metrics": {
"predict_time": 12.372130789,
"total_time": 272.82389
},
"output": "https://replicate.delivery/yhqm/asBO7CuXZSZkERYg7xiX80ttehvccmlYAGKkd5jJqpZWNSFKA/output.wav",
"started_at": "2025-01-31T09:15:12.221759Z",
"status": "succeeded",
"urls": {
"stream": "https://stream.replicate.com/v1/files/yswh-wnk564iuh4qmjadxywvuzzial4lv7wdf3yn4fjkog26lguget3zq",
"get": "https://api.replicate.com/v1/predictions/evp9tr4rf9rj00cmqeg8mng7rr",
"cancel": "https://api.replicate.com/v1/predictions/evp9tr4rf9rj00cmqeg8mng7rr/cancel"
},
"version": "3495610f45204d13509ef709586d9badd3bc4bd895aa712a252b249df6693143"
}
0%| | 0/2005 [00:00<?, ?it/s]
1%|▏ | 30/2005 [00:00<00:06, 294.61it/s]
3%|▎ | 60/2005 [00:00<00:06, 290.72it/s]
4%|▍ | 90/2005 [00:00<00:06, 289.47it/s]
6%|▌ | 119/2005 [00:00<00:06, 288.95it/s]
7%|▋ | 148/2005 [00:00<00:06, 288.68it/s]
9%|▉ | 177/2005 [00:00<00:06, 288.49it/s]
10%|█ | 206/2005 [00:00<00:06, 288.35it/s]
12%|█▏ | 235/2005 [00:00<00:06, 288.35it/s]
13%|█▎ | 264/2005 [00:00<00:06, 288.20it/s]
15%|█▍ | 293/2005 [00:01<00:05, 288.18it/s]
16%|█▌ | 322/2005 [00:01<00:05, 288.20it/s]
18%|█▊ | 351/2005 [00:01<00:05, 288.10it/s]
19%|█▉ | 380/2005 [00:01<00:05, 287.93it/s]
20%|██ | 409/2005 [00:01<00:05, 287.99it/s]
22%|██▏ | 438/2005 [00:01<00:05, 287.91it/s]
23%|██▎ | 467/2005 [00:01<00:05, 287.78it/s]
25%|██▍ | 496/2005 [00:01<00:05, 287.88it/s]
26%|██▌ | 525/2005 [00:01<00:05, 287.89it/s]
28%|██▊ | 554/2005 [00:01<00:05, 287.99it/s]
29%|██▉ | 583/2005 [00:02<00:04, 288.01it/s]
31%|███ | 612/2005 [00:02<00:04, 287.97it/s]
32%|███▏ | 641/2005 [00:02<00:04, 287.99it/s]
33%|███▎ | 670/2005 [00:02<00:04, 288.03it/s]
35%|███▍ | 699/2005 [00:02<00:04, 288.01it/s]
36%|███▋ | 728/2005 [00:02<00:04, 287.94it/s]
38%|███▊ | 757/2005 [00:02<00:04, 287.94it/s]
39%|███▉ | 786/2005 [00:02<00:04, 287.91it/s]
40%|███▉ | 798/2005 [00:02<00:04, 287.90it/s]
Time for 1st stage LLM inference: 2.78 sec total, 287.29 tokens/sec
Bandwidth achieved: 717.34 GB/s
Memory used: 8.94 GB
Non-causal batching: 0%| | 0/1 [00:00<?, ?it/s]
Non-causal batching: 100%|██████████| 1/1 [00:00<00:00, 7.63it/s]
Non-causal batching: 100%|██████████| 1/1 [00:00<00:00, 7.62it/s]
2025-01-31 09:15:24 | WARNING | DF | Audio sampling rate does not match model sampling rate (24000, 48000). Resampling...
Saved audio to /src/outputs/synth_25-01-31--09-15-24_With_tenure,_Suzie'd_have_ff59dbfc-89c0-4f09-8c4b-a815958f5cfd.wav
Total time to synth (s): 6.059656143188477
Real-time factor: 1.14
This model costs approximately $0.0096 to run on Replicate, or 104 runs per $1, but this varies depending on your inputs. It is also open source and you can run it on your own computer with Docker.
This model runs on Nvidia A100 (80GB) GPU hardware. Predictions typically complete within 7 seconds.
This model doesn't have a readme.
This model is cold. You'll get a fast response if the model is warm and already running, and a slower response if the model is cold and starting up.
Choose a file from your machine
Hint: you can also drag files onto the input
0%| | 0/2005 [00:00<?, ?it/s]
1%|▏ | 30/2005 [00:00<00:06, 294.61it/s]
3%|▎ | 60/2005 [00:00<00:06, 290.72it/s]
4%|▍ | 90/2005 [00:00<00:06, 289.47it/s]
6%|▌ | 119/2005 [00:00<00:06, 288.95it/s]
7%|▋ | 148/2005 [00:00<00:06, 288.68it/s]
9%|▉ | 177/2005 [00:00<00:06, 288.49it/s]
10%|█ | 206/2005 [00:00<00:06, 288.35it/s]
12%|█▏ | 235/2005 [00:00<00:06, 288.35it/s]
13%|█▎ | 264/2005 [00:00<00:06, 288.20it/s]
15%|█▍ | 293/2005 [00:01<00:05, 288.18it/s]
16%|█▌ | 322/2005 [00:01<00:05, 288.20it/s]
18%|█▊ | 351/2005 [00:01<00:05, 288.10it/s]
19%|█▉ | 380/2005 [00:01<00:05, 287.93it/s]
20%|██ | 409/2005 [00:01<00:05, 287.99it/s]
22%|██▏ | 438/2005 [00:01<00:05, 287.91it/s]
23%|██▎ | 467/2005 [00:01<00:05, 287.78it/s]
25%|██▍ | 496/2005 [00:01<00:05, 287.88it/s]
26%|██▌ | 525/2005 [00:01<00:05, 287.89it/s]
28%|██▊ | 554/2005 [00:01<00:05, 287.99it/s]
29%|██▉ | 583/2005 [00:02<00:04, 288.01it/s]
31%|███ | 612/2005 [00:02<00:04, 287.97it/s]
32%|███▏ | 641/2005 [00:02<00:04, 287.99it/s]
33%|███▎ | 670/2005 [00:02<00:04, 288.03it/s]
35%|███▍ | 699/2005 [00:02<00:04, 288.01it/s]
36%|███▋ | 728/2005 [00:02<00:04, 287.94it/s]
38%|███▊ | 757/2005 [00:02<00:04, 287.94it/s]
39%|███▉ | 786/2005 [00:02<00:04, 287.91it/s]
40%|███▉ | 798/2005 [00:02<00:04, 287.90it/s]
Time for 1st stage LLM inference: 2.78 sec total, 287.29 tokens/sec
Bandwidth achieved: 717.34 GB/s
Memory used: 8.94 GB
Non-causal batching: 0%| | 0/1 [00:00<?, ?it/s]
Non-causal batching: 100%|██████████| 1/1 [00:00<00:00, 7.63it/s]
Non-causal batching: 100%|██████████| 1/1 [00:00<00:00, 7.62it/s]
2025-01-31 09:15:24 | WARNING | DF | Audio sampling rate does not match model sampling rate (24000, 48000). Resampling...
Saved audio to /src/outputs/synth_25-01-31--09-15-24_With_tenure,_Suzie'd_have_ff59dbfc-89c0-4f09-8c4b-a815958f5cfd.wav
Total time to synth (s): 6.059656143188477
Real-time factor: 1.14