I am using Milvus as document store with Haystack.
MilvusDocumentStore connection object called with:
@lru_cache
def get_vector_db():
# Get document store from database
return MilvusDocumentStore(
connection_args={
"uri": get_settings().milvus_db_path
}, # Milvus Lite
drop_old=True
)
And the Haystack pipeline defined as below:
file_type_router = FileTypeRouter(
mime_types=[
"text/plain"
]
)
# Converter plain text files to Document objects
text_converter = TextFileToDocument()
# Join Documents coming from different branches of a pipeline
document_joiner = DocumentJoiner()
# Clean the text of the documents
document_cleaner = DocumentCleaner()
# Split the documents into smaller documents
document_splitter = DocumentSplitter(split_by="sentence", split_length=2)
# Create embeddings from the Documents
document_embedder = SentenceTransformersDocumentEmbedder(
model="sentence-transformers/all-MiniLM-L6-v2"
)
# Write the documents to the DocumentStore
document_writer = DocumentWriter(document_store, policy=DuplicatePolicy.NONE)
# Build the Indexing pipeline
preprocessing_pipeline = Pipeline()
preprocessing_pipeline.add_component(
name="file_type_router", instance=file_type_router
)
preprocessing_pipeline.add_component(name="text_converter", instance=text_converter)
preprocessing_pipeline.add_component(
name="document_joiner", instance=document_joiner
)
preprocessing_pipeline.add_component(
name="document_cleaner", instance=document_cleaner
)
preprocessing_pipeline.add_component(
name="document_splitter", instance=document_splitter
)
preprocessing_pipeline.add_component(
name="document_embedder", instance=document_embedder
)
preprocessing_pipeline.add_component(
name="document_writer", instance=document_writer
)
# Connect components
preprocessing_pipeline.connect(
"file_type_router.plain/text, "text_converter.sources"
)
preprocessing_pipeline.connect("text_converter", "document_joiner")
preprocessing_pipeline.connect("document_joiner", "document_cleaner")
preprocessing_pipeline.connect("document_cleaner", "document_splitter")
preprocessing_pipeline.connect("document_splitter", "document_embedder")
preprocessing_pipeline.connect("document_embedder", "duplicate_checker")
preprocessing_pipeline.connect(
"duplicate_checker.documents_to_index", "document_writer.documents"
)
When I try to write to the db I get the following error:
Failed to create collection: HaystackCollection error: <MilvusException:
(code=2000, message=Assert "!name_ids_.count(field_name)" at
/Users/zilliz/milvus-lite/thirdparty/milvus/internal/core/src/common/Schema.h:172
=> duplicated field name: segcore error)>
ERROR: Exception in ASGI application
Following the error with the debugger seems like there is an attempt to recreate the default database collection.