Skip to main content

如何使用REST API

推荐阅读

在深入阅读本内容之前,可能需要先阅读以下内容:

强烈建议使用 Python 或 TypeScript SDK 运行评估(evals)。SDK 提供了诸多优化和功能,可显著提升评估的性能与可靠性。 然而,如果您因使用其他编程语言或在受限环境中运行而无法使用 SDK,则可以直接调用 REST API。

本指南将向您展示如何使用 REST API 运行评估,以 Python 中的 requests 库为例。不过,相同的原理适用于任何编程语言。

创建一个数据集

此处为方便起见,我们使用 Python SDK。您也可以直接通过用户界面调用 API,请参阅本指南了解更多信息。

import openai
import os
import requests

from datetime import datetime
from langsmith import Client
from uuid import uuid4


client = Client()

# Create a dataset
examples = [
{
"inputs": {"text": "Shut up, idiot"},
"outputs": {"label": "Toxic"},
},
{
"inputs": {"text": "You're a wonderful person"},
"outputs": {"label": "Not toxic"},
},
{
"inputs": {"text": "This is the worst thing ever"},
"outputs": {"label": "Toxic"},
},
{
"inputs": {"text": "I had a great day today"},
"outputs": {"label": "Not toxic"},
},
{
"inputs": {"text": "Nobody likes you"},
"outputs": {"label": "Toxic"},
},
{
"inputs": {"text": "This is unacceptable. I want to speak to the manager."},
"outputs": {"label": "Not toxic"},
},
]

dataset_name = "Toxic Queries - API Example"
dataset = client.create_dataset(dataset_name=dataset_name)
client.create_examples(dataset_id=dataset.id, examples=examples)

运行单个实验

首先,拉取你在实验中想要使用的所有示例。

#  Pick a dataset id. In this case, we are using the dataset we created above.
# Spec: https://api.smith.langchain.com/redoc#tag/examples/operation/delete_example_api_v1_examples__example_id__delete
dataset_id = dataset.id
params = { "dataset": dataset_id }

resp = requests.get(
"https://api.smith.langchain.com/api/v1/examples",
params=params,
headers={"x-api-key": os.environ["LANGSMITH_API_KEY"]}
)

examples = resp.json()

接下来,我们将定义一个方法,用于为单个示例创建一次运行。

os.environ["OPENAI_API_KEY"] = "sk-..."

def run_completion_on_example(example, model_name, experiment_id):
"""Run completions on a list of examples."""
# We are using the OpenAI API here, but you can use any model you like

def _post_run(run_id, name, run_type, inputs, parent_id=None):
"""Function to post a new run to the API."""
data = {
"id": run_id.hex,
"name": name,
"run_type": run_type,
"inputs": inputs,
"start_time": datetime.utcnow().isoformat(),
"reference_example_id": example["id"],
"session_id": experiment_id,
}
if parent_id:
data["parent_run_id"] = parent_id.hex
resp = requests.post(
"https://api.smith.langchain.com/api/v1/runs", # Update appropriately for self-hosted installations or the EU region
json=data,
headers=headers
)
resp.raise_for_status()

def _patch_run(run_id, outputs):
"""Function to patch a run with outputs."""
resp = requests.patch(
f"https://api.smith.langchain.com/api/v1/runs/{run_id}",
json={
"outputs": outputs,
"end_time": datetime.utcnow().isoformat(),
},
headers=headers,
)
resp.raise_for_status()

# Send your API Key in the request headers
headers = {"x-api-key": os.environ["LANGSMITH_API_KEY"]}

text = example["inputs"]["text"]

messages = [
{
"role": "system",
"content": "Please review the user query below and determine if it contains any form of toxic behavior, such as insults, threats, or highly negative comments. Respond with 'Toxic' if it does, and 'Not toxic' if it doesn't.",
},
{"role": "user", "content": text},
]


# Create parent run
parent_run_id = uuid4()
_post_run(parent_run_id, "LLM Pipeline", "chain", {"text": text})

# Create child run
child_run_id = uuid4()
_post_run(child_run_id, "OpenAI Call", "llm", {"messages": messages}, parent_run_id)

# Generate a completion
client = openai.Client()
chat_completion = client.chat.completions.create(model=model_name, messages=messages)

# End runs
_patch_run(child_run_id, chat_completion.dict())
_patch_run(parent_run_id, {"label": chat_completion.choices[0].message.content})

我们将使用两种模型——gpt-3.5-turbo 和 gpt-4o-mini——对所有示例运行补全任务。

#  Create a new experiment using the /sessions endpoint
# An experiment is a collection of runs with a reference to the dataset used
# Spec: https://api.smith.langchain.com/redoc#tag/tracer-sessions/operation/create_tracer_session_api_v1_sessions_post

model_names = ("gpt-3.5-turbo", "gpt-4o-mini")
experiment_ids = []
for model_name in model_names:
resp = requests.post(
"https://api.smith.langchain.com/api/v1/sessions",
json={
"start_time": datetime.utcnow().isoformat(),
"reference_dataset_id": str(dataset_id),
"description": "An optional description for the experiment",
"name": f"Toxicity detection - API Example - {model_name} - {str(uuid4())[0:8]}", # A name for the experiment
"extra": {
"metadata": {"foo": "bar"}, # Optional metadata
},
},
headers={"x-api-key": os.environ["LANGSMITH_API_KEY"]}
)

experiment = resp.json()
experiment_ids.append(experiment["id"])

# Run completions on all examples
for example in examples:
run_completion_on_example(example, model_name, experiment["id"])

# Issue a patch request to "end" the experiment by updating the end_time
requests.patch(
f"https://api.smith.langchain.com/api/v1/sessions/{experiment['id']}",
json={"end_time": datetime.utcnow().isoformat()},
headers={"x-api-key": os.environ["LANGSMITH_API_KEY"]}
)

运行成对实验

接下来,我们将演示如何运行成对实验。在成对实验中,您将两个示例相互进行比较。 如需了解更多信息,请参阅本指南

#  A comparative experiment allows you to provide a preferential ranking on the outputs of two or more experiments
# Spec: https://api.smith.langchain.com/redoc#tag/datasets/operation/create_comparative_experiment_api_v1_datasets_comparative_post
resp = requests.post(
"https://api.smith.langchain.com/api/v1/datasets/comparative",
json={
"experiment_ids": experiment_ids,
"name": "Toxicity detection - API Example - Comparative - " + str(uuid4())[0:8],
"description": "An optional description for the comparative experiment",
"extra": {
"metadata": {"foo": "bar"}, # Optional metadata
},
"reference_dataset_id": str(dataset_id),
},
headers={"x-api-key": os.environ["LANGSMITH_API_KEY"]}
)

comparative_experiment = resp.json()
comparative_experiment_id = comparative_experiment["id"]

# You can iterate over the runs in the experiments belonging to the comparative experiment and preferentially rank the outputs

# Fetch the comparative experiment
resp = requests.get(
f"https://api.smith.langchain.com/api/v1/datasets/{str(dataset_id)}/comparative",
params={"id": comparative_experiment_id},
headers={"x-api-key": os.environ["LANGSMITH_API_KEY"]}
)

comparative_experiment = resp.json()[0]
experiment_ids = [info["id"] for info in comparative_experiment["experiments_info"]]

from collections import defaultdict
example_id_to_runs_map = defaultdict(list)

# Spec: https://api.smith.langchain.com/redoc#tag/run/operation/query_runs_api_v1_runs_query_post
runs = requests.post(
f"https://api.smith.langchain.com/api/v1/runs/query",
headers={"x-api-key": os.environ["LANGSMITH_API_KEY"]},
json={
"session": experiment_ids,
"is_root": True, # Only fetch root runs (spans) which contain the end outputs
"select": ["id", "reference_example_id", "outputs"],
}
).json()
runs = runs["runs"]
for run in runs:
example_id = run["reference_example_id"]
example_id_to_runs_map[example_id].append(run)

for example_id, runs in example_id_to_runs_map.items():
print(f"Example ID: {example_id}")
# Preferentially rank the outputs, in this case we will always prefer the first output
# In reality, you can use an LLM to rank the outputs
feedback_group_id = uuid4()

# Post a feedback score for each run, with the first run being the preferred one
# Spec: https://api.smith.langchain.com/redoc#tag/feedback/operation/create_feedback_api_v1_feedback_post
# We'll use the feedback group ID to associate the feedback scores with the same group
for i, run in enumerate(runs):
print(f"Run ID: {run['id']}")
feedback = {
"score": 1 if i == 0 else 0,
"run_id": str(run["id"]),
"key": "ranked_preference",
"feedback_group_id": str(feedback_group_id),
"comparative_experiment_id": comparative_experiment_id,
}
resp = requests.post(
"https://api.smith.langchain.com/api/v1/feedback",
json=feedback,
headers={"x-api-key": os.environ["LANGSMITH_API_KEY"]}
)
resp.raise_for_status()

这个页面对你有帮助吗?


您可以留下详细的反馈 在 GitHub 上.