Run this model in Node.js with one line of code:
npm install replicate
REPLICATE_API_TOKEN
export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import Replicate from "replicate"; const replicate = new Replicate({ auth: process.env.REPLICATE_API_TOKEN, });
Run fakturi/bge-reranker-large using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run( "fakturi/bge-reranker-large:9eceaf21b0ec09399bbd3a9672f06a354814dd04c6bb941ee2b408c46cce0f1e", { input: {} } ); console.log(output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
import replicate
output = replicate.run( "fakturi/bge-reranker-large:9eceaf21b0ec09399bbd3a9672f06a354814dd04c6bb941ee2b408c46cce0f1e", input={} ) print(output)
To learn more, take a look at the guide on getting started with Python.
curl -s -X POST \ -H "Authorization: Bearer $REPLICATE_API_TOKEN" \ -H "Content-Type: application/json" \ -H "Prefer: wait" \ -d $'{ "version": "9eceaf21b0ec09399bbd3a9672f06a354814dd04c6bb941ee2b408c46cce0f1e", "input": {} }' \ https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
{ "completed_at": "2023-10-10T12:36:25.233702Z", "created_at": "2023-10-10T12:36:25.240911Z", "data_removed": true, "error": null, "id": "xlwnzcbbk6awfhqvz6asr36ts4", "input": {}, "logs": null, "metrics": { "predict_time": 0.070091, "total_time": -0.007209 }, "output": null, "started_at": "2023-10-10T12:36:25.163611Z", "status": "succeeded", "urls": { "get": "https://api.replicate.com/v1/predictions/xlwnzcbbk6awfhqvz6asr36ts4", "cancel": "https://api.replicate.com/v1/predictions/xlwnzcbbk6awfhqvz6asr36ts4/cancel" }, "version": "9eceaf21b0ec09399bbd3a9672f06a354814dd04c6bb941ee2b408c46cce0f1e" }
This model runs on Nvidia T4 GPU hardware. We don't yet have enough runs of this model to provide performance information.
This model doesn't have a readme.
This model is cold. You'll get a fast response if the model is warm and already running, and a slower response if the model is cold and starting up.