最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

redis - RediSearch for matching file path regex pattern - Stack Overflow

programmeradmin0浏览0评论

How can I use RedisSearch to match a more complex wildcard pattern such as the following?

import uuid

from pydantic import BaseModel
from redis.asyncio import Redis
from redismands.search.field import TagField, TextField
from redismands.search.indexDefinition import IndexDefinition, IndexType
from redismands.search.query import Query
from rich import print as pprint

from app.config.config import load_config


class Indexation(BaseModel):
    prefix: str
    document_id: str
    chunk_ids: list[str]


class IndexationDAO:
    def __init__(self, redis_client: Redis):
        self.redis_client = redis_client
        self.key_prefix = "indexation:"

    async def create_indexes(self):
        await self.redis_client.ft("indexation-idx").create_index(
            fields=[
                TextField(
                    name="$.prefix",
                    no_stem=False,
                    # withsuffixtrie=True,  # ?
                    as_name="prefix",
                ),
                TagField(name="$.document_id", as_name="document_id"),
            ],
            definition=IndexDefinition(prefix=self.key_prefix, index_type=IndexType.JSON),
        )

    async def add_indexation(self, indexation: Indexation):
        key = f"{self.key_prefix}{uuid.uuid4()}"
        __added: bool = await self.redis_client.json().set(key, "$", indexation.model_dump())  # type: ignore
        return key

    async def get_indexations(self, document_id: str):
        query = Query(f'@document_id:"{{{document_id}}}"')
        docs = await self.redis_client.ft("indexation-idx").search(query=query)
        return docs

    async def search_indexations(self, prefix: str):
        # query = Query(f"@prefix:{prefix}").dialect(2)
        query = Query(f"@prefix:{prefix}")
        docs = await self.redis_client.ft("indexation-idx").search(query=query)
        return docs


async def test_indexation_dao():
    config = await load_config()

    async with Redis(host=config.redis_host, port=config.redis_port) as redis_client:
        await redis_client.flushall()

        indexation_dao = IndexationDAO(redis_client=redis_client)
        await indexation_dao.create_indexes()

        await indexation_dao.add_indexation(
            Indexation(prefix="folder/animals", document_id="fileA", chunk_ids=["chunk0", "chunk1"]),
        )
        await indexation_dao.add_indexation(
            Indexation(prefix="animals/folder", document_id="fileB", chunk_ids=["chunk2", "chunk3"]),
        )

        # indexations = await indexation_dao.search_indexations(prefix="fo*nimals")
        indexations = await indexation_dao.search_indexations(prefix="fo*/*nimals")
        pprint(indexations)

The result is 2 documents instead of 1.

tests/integration/vector_stores/test_indexation_dao.py Result{2 total, docs: [Document {'id': 
'indexation:403768c6-0a44-4cec-a354-417a559fda5a', 'payload': None, 'json': 
'{"prefix":"folder/animals","document_id":"fileA","chunk_ids":["chunk0","chunk1"
]}'}, Document {'id': 'indexation:2fc81fb3-6937-4c11-9f9a-c3c4045c4f4b', 
'payload': None, 'json': 
'{"prefix":"animals/folder","document_id":"fileB","chunk_ids":["chunk2","chunk3"
]}'}]}

The goal is to group objects without changing the keys, since prefix matching on keys requires a SCAN. However, I am confused by how TEXT indexes handle order, infix wildcards, the level of "depth" (folder/subfolder vs folder/project/subfolder) and partial matches (without using the full "subfolder" token and just "*der").

发布评论

评论列表(0)

  1. 暂无评论