I am following this tutorial to get started with Azure AI Studio. The create_search_index.py
script ran successfully and created an index. But the get_product_documents.py
and chat_with_products.py
scripts do not produce any error or output when running, they just keep running indefinitely.
Any idea what might be going on? And what could I try out to fix the issue?
Here are the scripts from the link:
### config.py
# ruff: noqa: ANN201, ANN001
import os
import sys
import pathlib
import logging
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
from azure.ai.inference.tracing import AIInferenceInstrumentor
# load environment variables from the .env file
from dotenv import load_dotenv
load_dotenv()
# Set "./assets" as the path where assets are stored, resolving the absolute path:
ASSET_PATH = pathlib.Path(__file__).parent.resolve() / "assets"
# Configure an root app logger that prints info level logs to stdout
logger = logging.getLogger("app")
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler(stream=sys.stdout))
# Returns a module-specific logger, inheriting from the root app logger
def get_logger(module_name):
return logging.getLogger(f"app.{module_name}")
# Enable instrumentation and logging of telemetry to the project
def enable_telemetry(log_to_project: bool = False):
AIInferenceInstrumentor().instrument()
# enable logging message contents
os.environ["AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED"] = "true"
if log_to_project:
from azure.monitor.opentelemetry import configure_azure_monitor
project = AIProjectClient.from_connection_string(
conn_str=os.environ["AIPROJECT_CONNECTION_STRING"], credential=DefaultAzureCredential()
)
tracing_link = f"=/subscriptions/{project.scope['subscription_id']}/resourceGroups/{project.scope['resource_group_name']}/providers/Microsoft.MachineLearningServices/workspaces/{project.scope['project_name']}"
application_insights_connection_string = project.telemetry.get_connection_string()
if not application_insights_connection_string:
logger.warning(
"No application insights configured, telemetry will not be logged to project. Add application insights at:"
)
logger.warning(tracing_link)
return
configure_azure_monitor(connection_string=application_insights_connection_string)
logger.info("Enabled telemetry logging to project, view traces at:")
logger.info(tracing_link)
### create_search_index.py
import os
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import ConnectionType
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from config import get_logger
# initialize logging object
logger = get_logger(__name__)
# create a project client using environment variables loaded from the .env file
# project = AIProjectClient.from_connection_string(
# conn_str=os.environ["AIPROJECT_CONNECTION_STRING"], credential=DefaultAzureCredential()
# )
API_ENDPOINT = os.environ["API_ENDPOINT"]
API_KEY = os.environ["API_KEY"]
SUBSCRIPTION_ID = os.environ["SUBSCRIPTION_ID"]
RESOURCE_GROUP_NAME = os.environ["RESOURCE_GROUP_NAME"]
PROJECT_NAME = os.environ["PROJECT_NAME"]
# Initialize the AIProjectClient
project = AIProjectClient(
endpoint=API_ENDPOINT,
credential=DefaultAzureCredential(),
subscription_id=SUBSCRIPTION_ID,
resource_group_name=RESOURCE_GROUP_NAME,
project_name=PROJECT_NAME
)
# create a vector embeddings client that will be used to generate vector embeddings
embeddings = project.inference.get_embeddings_client()
# use the project client to get the default search connection
search_connection = project.connections.get_default(
connection_type=ConnectionType.AZURE_AI_SEARCH, include_credentials=True
)
# Create a search index client using the search connection
# This client will be used to create and delete search indexes
index_client = SearchIndexClient(
endpoint=search_connection.endpoint_url, credential=AzureKeyCredential(key=search_connection.key)
)
### Define a search index
import pandas as pd
from azure.search.documents.indexes.models import (
SemanticSearch,
SearchField,
SimpleField,
SearchableField,
SearchFieldDataType,
SemanticConfiguration,
SemanticPrioritizedFields,
SemanticField,
VectorSearch,
HnswAlgorithmConfiguration,
VectorSearchAlgorithmKind,
HnswParameters,
VectorSearchAlgorithmMetric,
ExhaustiveKnnAlgorithmConfiguration,
ExhaustiveKnnParameters,
VectorSearchProfile,
SearchIndex,
)
def create_index_definition(index_name: str, model: str) -> SearchIndex:
dimensions = 1536 # text-embedding-ada-002
if model == "text-embedding-3-large":
dimensions = 3072
# The fields we want to index. The "embedding" field is a vector field that will
# be used for vector search.
fields = [
SimpleField(name="id", type=SearchFieldDataType.String, key=True),
SearchableField(name="content", type=SearchFieldDataType.String),
SimpleField(name="filepath", type=SearchFieldDataType.String),
SearchableField(name="title", type=SearchFieldDataType.String),
SimpleField(name="url", type=SearchFieldDataType.String),
SearchField(
name="contentVector",
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
searchable=True,
# Size of the vector created by the text-embedding-ada-002 model.
vector_search_dimensions=dimensions,
vector_search_profile_name="myHnswProfile",
),
]
# The "content" field should be prioritized for semantic ranking.
semantic_config = SemanticConfiguration(
name="default",
prioritized_fields=SemanticPrioritizedFields(
title_field=SemanticField(field_name="title"),
keywords_fields=[],
content_fields=[SemanticField(field_name="content")],
),
)
# For vector search, we want to use the HNSW (Hierarchical Navigable Small World)
# algorithm (a type of approximate nearest neighbor search algorithm) with cosine
# distance.
vector_search = VectorSearch(
algorithms=[
HnswAlgorithmConfiguration(
name="myHnsw",
kind=VectorSearchAlgorithmKind.HNSW,
parameters=HnswParameters(
m=4,
ef_construction=1000,
ef_search=1000,
metric=VectorSearchAlgorithmMetric.COSINE,
),
),
ExhaustiveKnnAlgorithmConfiguration(
name="myExhaustiveKnn",
kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN,
parameters=ExhaustiveKnnParameters(metric=VectorSearchAlgorithmMetric.COSINE),
),
],
profiles=[
VectorSearchProfile(
name="myHnswProfile",
algorithm_configuration_name="myHnsw",
),
VectorSearchProfile(
name="myExhaustiveKnnProfile",
algorithm_configuration_name="myExhaustiveKnn",
),
],
)
# Create the semantic settings with the configuration
semantic_search = SemanticSearch(configurations=[semantic_config])
# Create the search index definition
return SearchIndex(
name=index_name,
fields=fields,
semantic_search=semantic_search,
vector_search=vector_search,
)
### add a csv file to the index
# define a function for indexing a csv file, that adds each row as a document
# and generates vector embeddings for the specified content_column
def create_docs_from_csv(path: str, content_column: str, model: str) -> list[dict[str, any]]:
products = pd.read_csv(path)
items = []
for product in products.to_dict("records"):
content = product[content_column]
id = str(product["id"])
title = product["name"]
url = f"/products/{title.lower().replace(' ', '-')}"
emb = embeddings.embed(input=content, model=model)
rec = {
"id": id,
"content": content,
"filepath": f"{title.lower().replace(' ', '-')}",
"title": title,
"url": url,
"contentVector": emb.data[0].embedding,
}
items.append(rec)
return items
def create_index_from_csv(index_name, csv_file):
# If a search index already exists, delete it:
try:
index_definition = index_client.get_index(index_name)
index_client.delete_index(index_name)
logger.info(f"