I am repeatedly (over several days) getting rate_limit_exceeded
error when calling gpt
model. My understanding is that the error means that I am hitting RPM
limit. However, my model deployment allows around 100 RPM while the current RPM is under 10. What is the issue?
Code
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
project_connection_string="mystring"
project = AIProjectClient.from_connection_string(
conn_str=project_connection_string,
credential=DefaultAzureCredential())
print ("testing AI agent service ")
import os
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import CodeInterpreterTool
from azure.identity import DefaultAzureCredential
from typing import Any
from pathlib import Path
# Create an Azure AI Client from a connection string, copied from your Azure AI Foundry project.
# At the moment, it should be in the format "<HostName>;<AzureSubscriptionId>;<ResourceGroup>;<ProjectName>"
# HostName can be found by navigating to your discovery_url and removing the leading "https://" and trailing "/discovery"
# To find your discovery_url, run the CLI command: az ml workspace show -n {project_name} --resource-group {resource_group_name} --query discovery_url
# Project Connection example: eastus.api.azureml.ms;12345678-abcd-1234-9fc6-62780b3d3e05;my-resource-group;my-project-name
# Customer needs to login to Azure subscription via Azure CLI and set the environment variables
project_client = project
with project_client:
# Create an instance of the CodeInterpreterTool
code_interpreter = CodeInterpreterTool()
# The CodeInterpreterTool needs to be included in creation of the agent
agent = project_client.agents.create_agent(
model="gpt-4o-mini-standardtriedhublevel",#"gpt-4o-mini",
name="my-agent",
instructions="You are helpful agent",
tools=code_interpreter.definitions,
tool_resources=code_interpreter.resources,
)
print(f"Created agent, agent ID: {agent.id}")
# Create a thread
thread = project_client.agents.create_thread()
print(f"Created thread, thread ID: {thread.id}")
# Create a message
message = project_client.agents.create_message(
thread_id=thread.id,
role="user",
content="Could you please create a bar chart for the operating profit using the following data and provide the file to me? Company A: $1.2 million, Company B: $2.5 million, Company C: $3.0 million, Company D: $1.8 million",
)
print(f"Created message, message ID: {message.id}")
# Run the agent
run = project_client.agents.create_and_process_run(thread_id=thread.id, assistant_id=agent.id)
print(f"Run finished with status: {run.status}")
if run.status == "failed":
# Check if you got "Rate limit is exceeded.", then you want to get more quota
print(f"Run failed: {run.last_error}")
# Get messages from the thread
messages = project_client.agents.list_messages(thread_id=thread.id)
print(f"Messages: {messages}")
# Get the last message from the sender
last_msg = messages.get_last_text_message_by_sender("assistant")
if last_msg:
print(f"Last Message: {last_msg.text.value}")
# Generate an image file for the bar chart
for image_content in messages.image_contents:
print(f"Image File ID: {image_content.image_file.file_id}")
file_name = f"{image_content.image_file.file_id}_image_file.png"
project_client.agents.save_file(file_id=image_content.image_file.file_id, file_name=file_name)
print(f"Saved image file to: {Path.cwd() / file_name}")
# Print the file path(s) from the messages
for file_path_annotation in messages.file_path_annotations:
print(f"File Paths:")
print(f"Type: {file_path_annotation.type}")
print(f"Text: {file_path_annotation.text}")
print(f"File ID: {file_path_annotation.file_path.file_id}")
print(f"Start Index: {file_path_annotation.start_index}")
print(f"End Index: {file_path_annotation.end_index}")
project_client.agents.save_file(file_id=file_path_annotation.file_path.file_id, file_name=Path(file_path_annotation.text).name)
# Delete the agent once done
project_client.agents.delete_agent(agent.id)
print("Deleted agent")
Error
testing AI agent service
Created agent, agent ID: asst_ZhagK90WpAVEzfYNTD1Thebb
Created thread, thread ID: thread_VXxeHYDgTd1Cl2Jn4pTlc0k2
Created message, message ID: msg_gIvOE2mClicg0FfkfJ8eUC7A
Run finished with status: RunStatus.FAILED
Run failed: {'code': 'rate_limit_exceeded', 'message': 'Rate limit is exceeded. Try again in 86400 seconds.'}
Messages: {'object': 'list', 'data': [{'id': 'msg_gIvOE2mClicg0FfkfJ8eUC7A', 'object': 'thread.message', 'created_at': 1737400903, 'assistant_id': None, 'thread_id': 'thread_VXxeHYDgTd1Cl2Jn4pTlc0k2', 'run_id': None, 'role': 'user', 'content': [{'type': 'text', 'text': {'value': 'Could you please create a bar chart for the operating profit using the following data and provide the file to me? Company A: $1.2 million, Company B: $2.5 million, Company C: $3.0 million, Company D: $1.8 million', 'annotations': []}}], 'attachments': [], 'metadata': {}}], 'first_id': 'msg_gIvOE2mClicg0FfkfJ8eUC7A', 'last_id': 'msg_gIvOE2mClicg0FfkfJ8eUC7A', 'has_more': False}
Deleted agent
Quota
Current usage
I am repeatedly (over several days) getting rate_limit_exceeded
error when calling gpt
model. My understanding is that the error means that I am hitting RPM
limit. However, my model deployment allows around 100 RPM while the current RPM is under 10. What is the issue?
Code
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
project_connection_string="mystring"
project = AIProjectClient.from_connection_string(
conn_str=project_connection_string,
credential=DefaultAzureCredential())
print ("testing AI agent service ")
import os
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import CodeInterpreterTool
from azure.identity import DefaultAzureCredential
from typing import Any
from pathlib import Path
# Create an Azure AI Client from a connection string, copied from your Azure AI Foundry project.
# At the moment, it should be in the format "<HostName>;<AzureSubscriptionId>;<ResourceGroup>;<ProjectName>"
# HostName can be found by navigating to your discovery_url and removing the leading "https://" and trailing "/discovery"
# To find your discovery_url, run the CLI command: az ml workspace show -n {project_name} --resource-group {resource_group_name} --query discovery_url
# Project Connection example: eastus.api.azureml.ms;12345678-abcd-1234-9fc6-62780b3d3e05;my-resource-group;my-project-name
# Customer needs to login to Azure subscription via Azure CLI and set the environment variables
project_client = project
with project_client:
# Create an instance of the CodeInterpreterTool
code_interpreter = CodeInterpreterTool()
# The CodeInterpreterTool needs to be included in creation of the agent
agent = project_client.agents.create_agent(
model="gpt-4o-mini-standardtriedhublevel",#"gpt-4o-mini",
name="my-agent",
instructions="You are helpful agent",
tools=code_interpreter.definitions,
tool_resources=code_interpreter.resources,
)
print(f"Created agent, agent ID: {agent.id}")
# Create a thread
thread = project_client.agents.create_thread()
print(f"Created thread, thread ID: {thread.id}")
# Create a message
message = project_client.agents.create_message(
thread_id=thread.id,
role="user",
content="Could you please create a bar chart for the operating profit using the following data and provide the file to me? Company A: $1.2 million, Company B: $2.5 million, Company C: $3.0 million, Company D: $1.8 million",
)
print(f"Created message, message ID: {message.id}")
# Run the agent
run = project_client.agents.create_and_process_run(thread_id=thread.id, assistant_id=agent.id)
print(f"Run finished with status: {run.status}")
if run.status == "failed":
# Check if you got "Rate limit is exceeded.", then you want to get more quota
print(f"Run failed: {run.last_error}")
# Get messages from the thread
messages = project_client.agents.list_messages(thread_id=thread.id)
print(f"Messages: {messages}")
# Get the last message from the sender
last_msg = messages.get_last_text_message_by_sender("assistant")
if last_msg:
print(f"Last Message: {last_msg.text.value}")
# Generate an image file for the bar chart
for image_content in messages.image_contents:
print(f"Image File ID: {image_content.image_file.file_id}")
file_name = f"{image_content.image_file.file_id}_image_file.png"
project_client.agents.save_file(file_id=image_content.image_file.file_id, file_name=file_name)
print(f"Saved image file to: {Path.cwd() / file_name}")
# Print the file path(s) from the messages
for file_path_annotation in messages.file_path_annotations:
print(f"File Paths:")
print(f"Type: {file_path_annotation.type}")
print(f"Text: {file_path_annotation.text}")
print(f"File ID: {file_path_annotation.file_path.file_id}")
print(f"Start Index: {file_path_annotation.start_index}")
print(f"End Index: {file_path_annotation.end_index}")
project_client.agents.save_file(file_id=file_path_annotation.file_path.file_id, file_name=Path(file_path_annotation.text).name)
# Delete the agent once done
project_client.agents.delete_agent(agent.id)
print("Deleted agent")
Error
testing AI agent service
Created agent, agent ID: asst_ZhagK90WpAVEzfYNTD1Thebb
Created thread, thread ID: thread_VXxeHYDgTd1Cl2Jn4pTlc0k2
Created message, message ID: msg_gIvOE2mClicg0FfkfJ8eUC7A
Run finished with status: RunStatus.FAILED
Run failed: {'code': 'rate_limit_exceeded', 'message': 'Rate limit is exceeded. Try again in 86400 seconds.'}
Messages: {'object': 'list', 'data': [{'id': 'msg_gIvOE2mClicg0FfkfJ8eUC7A', 'object': 'thread.message', 'created_at': 1737400903, 'assistant_id': None, 'thread_id': 'thread_VXxeHYDgTd1Cl2Jn4pTlc0k2', 'run_id': None, 'role': 'user', 'content': [{'type': 'text', 'text': {'value': 'Could you please create a bar chart for the operating profit using the following data and provide the file to me? Company A: $1.2 million, Company B: $2.5 million, Company C: $3.0 million, Company D: $1.8 million', 'annotations': []}}], 'attachments': [], 'metadata': {}}], 'first_id': 'msg_gIvOE2mClicg0FfkfJ8eUC7A', 'last_id': 'msg_gIvOE2mClicg0FfkfJ8eUC7A', 'has_more': False}
Deleted agent
Quota
Current usage
Share Improve this question asked Jan 20 at 19:29 Manu ChadhaManu Chadha 16.7k24 gold badges113 silver badges238 bronze badges1 Answer
Reset to default 0I was wrong . I ended up increasing the quote limit by editing the deployment to make the code work.