rate_limit_exceeding error even though the request is within rpm limit - azure openai

I am repeatedly (over several days) getting rate_limit_exceeded error when calling gpt model. My understanding is that the error means that I am hitting RPM limit. However, my model deployment allows around 100 RPM while the current RPM is under 10. What is the issue?

Code

from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient

project_connection_string="mystring"

project = AIProjectClient.from_connection_string(
  conn_str=project_connection_string,
  credential=DefaultAzureCredential())


print ("testing AI agent service ")

import os
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import CodeInterpreterTool
from azure.identity import DefaultAzureCredential
from typing import Any
from pathlib import Path

# Create an Azure AI Client from a connection string, copied from your Azure AI Foundry project.
# At the moment, it should be in the format "<HostName>;<AzureSubscriptionId>;<ResourceGroup>;<ProjectName>"
# HostName can be found by navigating to your discovery_url and removing the leading "https://" and trailing "/discovery"
# To find your discovery_url, run the CLI command: az ml workspace show -n {project_name} --resource-group {resource_group_name} --query discovery_url
# Project Connection example: eastus.api.azureml.ms;12345678-abcd-1234-9fc6-62780b3d3e05;my-resource-group;my-project-name
# Customer needs to login to Azure subscription via Azure CLI and set the environment variables

project_client = project


with project_client:
    # Create an instance of the CodeInterpreterTool
    code_interpreter = CodeInterpreterTool()

    # The CodeInterpreterTool needs to be included in creation of the agent
    agent = project_client.agents.create_agent(
        model="gpt-4o-mini-standardtriedhublevel",#"gpt-4o-mini",
        name="my-agent",
        instructions="You are helpful agent",
        tools=code_interpreter.definitions,
        tool_resources=code_interpreter.resources,
    )
    print(f"Created agent, agent ID: {agent.id}")

    # Create a thread
    thread = project_client.agents.create_thread()
    print(f"Created thread, thread ID: {thread.id}")

    # Create a message
    message = project_client.agents.create_message(
        thread_id=thread.id,
        role="user",
        content="Could you please create a bar chart for the operating profit using the following data and provide the file to me? Company A: $1.2 million, Company B: $2.5 million, Company C: $3.0 million, Company D: $1.8 million",
    )
    print(f"Created message, message ID: {message.id}")

    # Run the agent
    run = project_client.agents.create_and_process_run(thread_id=thread.id, assistant_id=agent.id)
    print(f"Run finished with status: {run.status}")

    if run.status == "failed":
        # Check if you got "Rate limit is exceeded.", then you want to get more quota
        print(f"Run failed: {run.last_error}")

    # Get messages from the thread
    messages = project_client.agents.list_messages(thread_id=thread.id)
    print(f"Messages: {messages}")

    # Get the last message from the sender
    last_msg = messages.get_last_text_message_by_sender("assistant")
    if last_msg:
        print(f"Last Message: {last_msg.text.value}")

    # Generate an image file for the bar chart
    for image_content in messages.image_contents:
        print(f"Image File ID: {image_content.image_file.file_id}")
        file_name = f"{image_content.image_file.file_id}_image_file.png"
        project_client.agents.save_file(file_id=image_content.image_file.file_id, file_name=file_name)
        print(f"Saved image file to: {Path.cwd() / file_name}")

    # Print the file path(s) from the messages
    for file_path_annotation in messages.file_path_annotations:
        print(f"File Paths:")
        print(f"Type: {file_path_annotation.type}")
        print(f"Text: {file_path_annotation.text}")
        print(f"File ID: {file_path_annotation.file_path.file_id}")
        print(f"Start Index: {file_path_annotation.start_index}")
        print(f"End Index: {file_path_annotation.end_index}")
        project_client.agents.save_file(file_id=file_path_annotation.file_path.file_id, file_name=Path(file_path_annotation.text).name)

    # Delete the agent once done
    project_client.agents.delete_agent(agent.id)
    print("Deleted agent")

Error

testing AI agent service 
Created agent, agent ID: asst_ZhagK90WpAVEzfYNTD1Thebb
Created thread, thread ID: thread_VXxeHYDgTd1Cl2Jn4pTlc0k2
Created message, message ID: msg_gIvOE2mClicg0FfkfJ8eUC7A
Run finished with status: RunStatus.FAILED
Run failed: {'code': 'rate_limit_exceeded', 'message': 'Rate limit is exceeded. Try again in 86400 seconds.'}
Messages: {'object': 'list', 'data': [{'id': 'msg_gIvOE2mClicg0FfkfJ8eUC7A', 'object': 'thread.message', 'created_at': 1737400903, 'assistant_id': None, 'thread_id': 'thread_VXxeHYDgTd1Cl2Jn4pTlc0k2', 'run_id': None, 'role': 'user', 'content': [{'type': 'text', 'text': {'value': 'Could you please create a bar chart for the operating profit using the following data and provide the file to me? Company A: $1.2 million, Company B: $2.5 million, Company C: $3.0 million, Company D: $1.8 million', 'annotations': []}}], 'attachments': [], 'metadata': {}}], 'first_id': 'msg_gIvOE2mClicg0FfkfJ8eUC7A', 'last_id': 'msg_gIvOE2mClicg0FfkfJ8eUC7A', 'has_more': False}
Deleted agent

Quota

Current usage

Code

from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient

project_connection_string="mystring"

project = AIProjectClient.from_connection_string(
  conn_str=project_connection_string,
  credential=DefaultAzureCredential())


print ("testing AI agent service ")

import os
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import CodeInterpreterTool
from azure.identity import DefaultAzureCredential
from typing import Any
from pathlib import Path

# Create an Azure AI Client from a connection string, copied from your Azure AI Foundry project.
# At the moment, it should be in the format "<HostName>;<AzureSubscriptionId>;<ResourceGroup>;<ProjectName>"
# HostName can be found by navigating to your discovery_url and removing the leading "https://" and trailing "/discovery"
# To find your discovery_url, run the CLI command: az ml workspace show -n {project_name} --resource-group {resource_group_name} --query discovery_url
# Project Connection example: eastus.api.azureml.ms;12345678-abcd-1234-9fc6-62780b3d3e05;my-resource-group;my-project-name
# Customer needs to login to Azure subscription via Azure CLI and set the environment variables

project_client = project


with project_client:
    # Create an instance of the CodeInterpreterTool
    code_interpreter = CodeInterpreterTool()

    # The CodeInterpreterTool needs to be included in creation of the agent
    agent = project_client.agents.create_agent(
        model="gpt-4o-mini-standardtriedhublevel",#"gpt-4o-mini",
        name="my-agent",
        instructions="You are helpful agent",
        tools=code_interpreter.definitions,
        tool_resources=code_interpreter.resources,
    )
    print(f"Created agent, agent ID: {agent.id}")

    # Create a thread
    thread = project_client.agents.create_thread()
    print(f"Created thread, thread ID: {thread.id}")

    # Create a message
    message = project_client.agents.create_message(
        thread_id=thread.id,
        role="user",
        content="Could you please create a bar chart for the operating profit using the following data and provide the file to me? Company A: $1.2 million, Company B: $2.5 million, Company C: $3.0 million, Company D: $1.8 million",
    )
    print(f"Created message, message ID: {message.id}")

    # Run the agent
    run = project_client.agents.create_and_process_run(thread_id=thread.id, assistant_id=agent.id)
    print(f"Run finished with status: {run.status}")

    if run.status == "failed":
        # Check if you got "Rate limit is exceeded.", then you want to get more quota
        print(f"Run failed: {run.last_error}")

    # Get messages from the thread
    messages = project_client.agents.list_messages(thread_id=thread.id)
    print(f"Messages: {messages}")

    # Get the last message from the sender
    last_msg = messages.get_last_text_message_by_sender("assistant")
    if last_msg:
        print(f"Last Message: {last_msg.text.value}")

    # Generate an image file for the bar chart
    for image_content in messages.image_contents:
        print(f"Image File ID: {image_content.image_file.file_id}")
        file_name = f"{image_content.image_file.file_id}_image_file.png"
        project_client.agents.save_file(file_id=image_content.image_file.file_id, file_name=file_name)
        print(f"Saved image file to: {Path.cwd() / file_name}")

    # Print the file path(s) from the messages
    for file_path_annotation in messages.file_path_annotations:
        print(f"File Paths:")
        print(f"Type: {file_path_annotation.type}")
        print(f"Text: {file_path_annotation.text}")
        print(f"File ID: {file_path_annotation.file_path.file_id}")
        print(f"Start Index: {file_path_annotation.start_index}")
        print(f"End Index: {file_path_annotation.end_index}")
        project_client.agents.save_file(file_id=file_path_annotation.file_path.file_id, file_name=Path(file_path_annotation.text).name)

    # Delete the agent once done
    project_client.agents.delete_agent(agent.id)
    print("Deleted agent")

Error

testing AI agent service 
Created agent, agent ID: asst_ZhagK90WpAVEzfYNTD1Thebb
Created thread, thread ID: thread_VXxeHYDgTd1Cl2Jn4pTlc0k2
Created message, message ID: msg_gIvOE2mClicg0FfkfJ8eUC7A
Run finished with status: RunStatus.FAILED
Run failed: {'code': 'rate_limit_exceeded', 'message': 'Rate limit is exceeded. Try again in 86400 seconds.'}
Messages: {'object': 'list', 'data': [{'id': 'msg_gIvOE2mClicg0FfkfJ8eUC7A', 'object': 'thread.message', 'created_at': 1737400903, 'assistant_id': None, 'thread_id': 'thread_VXxeHYDgTd1Cl2Jn4pTlc0k2', 'run_id': None, 'role': 'user', 'content': [{'type': 'text', 'text': {'value': 'Could you please create a bar chart for the operating profit using the following data and provide the file to me? Company A: $1.2 million, Company B: $2.5 million, Company C: $3.0 million, Company D: $1.8 million', 'annotations': []}}], 'attachments': [], 'metadata': {}}], 'first_id': 'msg_gIvOE2mClicg0FfkfJ8eUC7A', 'last_id': 'msg_gIvOE2mClicg0FfkfJ8eUC7A', 'has_more': False}
Deleted agent

Quota

Current usage

Share Improve this question asked Jan 20 at 19:29 Manu Chadha 16.7k24 gold badges113 silver badges238 bronze badges

Add a comment |

1 Answer 1

Sorted by: Reset to default 0

I was wrong . I ended up increasing the quote limit by editing the deployment to make the code work.

科技改变生活-雨落星辰 - 所有的伟大,都源于一个勇敢的开始

rate_limit_exceeding error even though the request is within rpm limit - azure openai - Stack Overflow

1 Answer 1

与本文相关的文章

评论列表(0)