redis - RediSearch for matching file path regex pattern

How can I use RedisSearch to match a more complex wildcard pattern such as the following?

import uuid

from pydantic import BaseModel
from redis.asyncio import Redis
from redismands.search.field import TagField, TextField
from redismands.search.indexDefinition import IndexDefinition, IndexType
from redismands.search.query import Query
from rich import print as pprint

from app.config.config import load_config


class Indexation(BaseModel):
    prefix: str
    document_id: str
    chunk_ids: list[str]


class IndexationDAO:
    def __init__(self, redis_client: Redis):
        self.redis_client = redis_client
        self.key_prefix = "indexation:"

    async def create_indexes(self):
        await self.redis_client.ft("indexation-idx").create_index(
            fields=[
                TextField(
                    name="$.prefix",
                    no_stem=False,
                    # withsuffixtrie=True,  # ?
                    as_name="prefix",
                ),
                TagField(name="$.document_id", as_name="document_id"),
            ],
            definition=IndexDefinition(prefix=self.key_prefix, index_type=IndexType.JSON),
        )

    async def add_indexation(self, indexation: Indexation):
        key = f"{self.key_prefix}{uuid.uuid4()}"
        __added: bool = await self.redis_client.json().set(key, "$", indexation.model_dump())  # type: ignore
        return key

    async def get_indexations(self, document_id: str):
        query = Query(f'@document_id:"{{{document_id}}}"')
        docs = await self.redis_client.ft("indexation-idx").search(query=query)
        return docs

    async def search_indexations(self, prefix: str):
        # query = Query(f"@prefix:{prefix}").dialect(2)
        query = Query(f"@prefix:{prefix}")
        docs = await self.redis_client.ft("indexation-idx").search(query=query)
        return docs


async def test_indexation_dao():
    config = await load_config()

    async with Redis(host=config.redis_host, port=config.redis_port) as redis_client:
        await redis_client.flushall()

        indexation_dao = IndexationDAO(redis_client=redis_client)
        await indexation_dao.create_indexes()

        await indexation_dao.add_indexation(
            Indexation(prefix="folder/animals", document_id="fileA", chunk_ids=["chunk0", "chunk1"]),
        )
        await indexation_dao.add_indexation(
            Indexation(prefix="animals/folder", document_id="fileB", chunk_ids=["chunk2", "chunk3"]),
        )

        # indexations = await indexation_dao.search_indexations(prefix="fo*nimals")
        indexations = await indexation_dao.search_indexations(prefix="fo*/*nimals")
        pprint(indexations)

The result is 2 documents instead of 1.

tests/integration/vector_stores/test_indexation_dao.py Result{2 total, docs: [Document {'id': 
'indexation:403768c6-0a44-4cec-a354-417a559fda5a', 'payload': None, 'json': 
'{"prefix":"folder/animals","document_id":"fileA","chunk_ids":["chunk0","chunk1"
]}'}, Document {'id': 'indexation:2fc81fb3-6937-4c11-9f9a-c3c4045c4f4b', 
'payload': None, 'json': 
'{"prefix":"animals/folder","document_id":"fileB","chunk_ids":["chunk2","chunk3"
]}'}]}

The goal is to group objects without changing the keys, since prefix matching on keys requires a SCAN. However, I am confused by how TEXT indexes handle order, infix wildcards, the level of "depth" (folder/subfolder vs folder/project/subfolder) and partial matches (without using the full "subfolder" token and just "*der").

科技改变生活-雨落星辰 - 所有的伟大,都源于一个勇敢的开始

redis - RediSearch for matching file path regex pattern - Stack Overflow

与本文相关的文章

评论列表(0)