query

Generate completion using relevant chunks as context.

Sync
Async

def query(
    query: str,
    filters: Optional[Dict[str, Any]] = None,
    k: int = 4,
    min_score: float = 0.0,
    max_tokens: Optional[int] = None,
    temperature: Optional[float] = None,
    use_colpali: bool = True,
    use_reranking: Optional[bool] = None,
    prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]] = None,
    folder_name: Optional[Union[str, List[str]]] = None,
    folder_depth: Optional[int] = None,
    chat_id: Optional[str] = None,
    schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
    llm_config: Optional[Dict[str, Any]] = None,
    padding: int = 0,
) -> CompletionResponse

async def query(
    query: str,
    filters: Optional[Dict[str, Any]] = None,
    k: int = 4,
    min_score: float = 0.0,
    max_tokens: Optional[int] = None,
    temperature: Optional[float] = None,
    use_colpali: bool = True,
    use_reranking: Optional[bool] = None,
    prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]] = None,
    folder_name: Optional[Union[str, List[str]]] = None,
    folder_depth: Optional[int] = None,
    chat_id: Optional[str] = None,
    schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
    llm_config: Optional[Dict[str, Any]] = None,
    padding: int = 0,
) -> CompletionResponse

Parameters

query (str): Query text
filters (Dict[str, Any], optional): Optional metadata filters
k (int, optional): Number of chunks to use as context. Defaults to 4.
min_score (float, optional): Minimum similarity threshold. Defaults to 0.0.
max_tokens (int, optional): Maximum tokens in completion
temperature (float, optional): Model temperature
use_colpali (bool, optional): Whether to use ColPali-style embedding model to generate the completion (only works for documents ingested with use_colpali=True). Defaults to True.
use_reranking (bool, optional): Override workspace reranking configuration for this request.
prompt_overrides (QueryPromptOverrides | Dict[str, Any], optional): Optional customizations for entity extraction, resolution, and query prompts
folder_name (str | List[str], optional): Optional folder scope. Accepts canonical paths (e.g., /projects/alpha/specs) or a list of paths/names.
folder_depth (int, optional): Folder scope depth. None/0 = exact match, -1 = include all descendants, n > 0 = include descendants up to n levels deep.
chat_id (str, optional): Optional chat session ID for persisting conversation history.
schema (Type[BaseModel] | Dict[str, Any], optional): Optional schema for structured output, can be a Pydantic model or a JSON schema dict
llm_config (Dict[str, Any], optional): Optional LiteLLM-compatible model configuration (e.g., model name, API key, base URL). Allows overriding the default LLM configuration on a per-query basis. Defaults to None.
padding (int, optional): Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only). Defaults to 0.

Metadata Filters

Use the same JSON filters described in the Metadata Filtering concept to restrict which documents feed into the completion. Example:

filters = {
    "$and": [
        {"project": {"$eq": "delta"}},
        {"start_date": {"$lte": "2024-06-01T00:00:00Z"}},
        {"end_date": {"$gte": "2024-06-01T00:00:00Z"}}
    ]
}

response = db.query(
    "Summarize current scope changes",
    filters=filters,
    k=6,
    temperature=0.3
)

You can also filter by folder name and use expressive operators like $in, $regex, and $nin:

# Query across multiple folders
filters = {
    "$and": [
        {"folder_name": {"$in": ["reports", "invoices"]}},
        {"year": 2024},
        {"priority": {"$gte": 50}}
    ]
}

response = db.query("What are the key financial highlights?", filters=filters)

For more advanced filtering patterns, see the Complex Metadata Filtering cookbook.

Returns

CompletionResponse: Response containing the completion, source information, and potentially structured output.

Examples

Standard Query

Sync
Async

from morphik import Morphik

db = Morphik()

response = db.query(
    "What are the key findings about customer satisfaction?",
    filters={"department": "research"},
    temperature=0.7
)

nested = db.query(
    "List open design questions",
    folder_name="/projects/alpha",
    folder_depth=-1,
    k=6,
)

print(response.completion)

# Print the sources used for the completion
for source in response.sources:
    print(f"Document ID: {source.document_id}, Chunk: {source.chunk_number}, Score: {source.score}")

from morphik import AsyncMorphik

async with AsyncMorphik() as db:
    response = await db.query(
        "What are the key findings about customer satisfaction?",
        filters={"department": "research"},
        temperature=0.7
    )

    nested = await db.query(
        "List open design questions",
        folder_name="/projects/alpha",
        folder_depth=-1,
        k=6,
    )
    
    print(response.completion)
    
    # Print the sources used for the completion
    for source in response.sources:
        print(f"Document ID: {source.document_id}, Chunk: {source.chunk_number}, Score: {source.score}")

With Custom Prompt Overrides

Sync
Async

from morphik import Morphik
from morphik.models import QueryPromptOverride, QueryPromptOverrides

db = Morphik()

# Using the QueryPromptOverrides object
response = db.query(
    "What are the key findings?",
    filters={"category": "research"},
    prompt_overrides=QueryPromptOverrides(
        query=QueryPromptOverride(
            prompt_template="Answer the question in a formal, academic tone: {question}\n\nContext:\n{context}\n\nAnswer:"
        )
    )
)

# Alternatively, using a dictionary
response = db.query(
    "What are the key findings?",
    filters={"category": "research"},
    prompt_overrides={
        "query": {
            "prompt_template": "Answer the question in a formal, academic tone: {question}\n\nContext:\n{context}\n\nAnswer:"
        }
    }
)

print(response.completion)

from morphik import AsyncMorphik
from morphik.models import QueryPromptOverride, EntityExtractionPromptOverride, QueryPromptOverrides

async with AsyncMorphik() as db:
    # Example with both query and entity extraction customization
    response = await db.query(
        "How does the medication affect diabetes?",
        prompt_overrides=QueryPromptOverrides(
            # Customize how responses are generated
            query=QueryPromptOverride(
                prompt_template="Provide a concise, medically accurate answer: {question}\n\nContext:\n{context}\n\nAnswer:"
            ),
            # Customize entity extraction
            entity_extraction=EntityExtractionPromptOverride(
                examples=[
                    {"label": "Insulin", "type": "MEDICATION"},
                    {"label": "Diabetes", "type": "CONDITION"}
                ]
            )
        )
    )
    
    print(response.completion)

CompletionResponse Properties

The CompletionResponse object returned by this method has the following properties:

completion (str | Dict[str, Any] | None): The generated completion text or the structured output dictionary.
usage (Dict[str, int]): Token usage information
sources (List[ChunkSource]): Sources of chunks used in the completion
metadata (Dict[str, Any], optional): Additional metadata about the completion (if provided by the server).
finish_reason (Optional[str]): Reason the generation finished (e.g., ‘stop’, ‘length’)

ChunkSource Properties

Each ChunkSource object in the sources list has the following properties:

document_id (str): ID of the source document
chunk_number (int): Chunk number within the document
score (Optional[float]): Relevance score (if available)

Using Custom LLM Configuration

The llm_config parameter is available in SDK version 0.2.5 and later.

Use the llm_config parameter to override the default LLM configuration on a per-query basis. This allows you to use different models, API keys, or other LiteLLM-compatible settings for specific queries.

Sync
Async

from morphik import Morphik

db = Morphik()

# Use GPT-4 for a specific query
response = db.query(
    "What are the key findings?",
    llm_config={
        "model": "gpt-4",
        "api_key": "sk-...",
        "base_url": "https://api.openai.com/v1",
        "temperature": 0.7,
        "max_tokens": 2000
    }
)

print(response.completion)

# Use Claude for another query
response = db.query(
    "Summarize the research findings",
    llm_config={
        "model": "claude-3-opus-20240229",
        "api_key": "your-anthropic-key"
    }
)

print(response.completion)

from morphik import AsyncMorphik

async with AsyncMorphik() as db:
    # Use GPT-4 for a specific query
    response = await db.query(
        "What are the key findings?",
        llm_config={
            "model": "gpt-4",
            "api_key": "sk-...",
            "base_url": "https://api.openai.com/v1",
            "temperature": 0.7,
            "max_tokens": 2000
        }
    )
    
    print(response.completion)
    
    # Use Claude for another query
    response = await db.query(
        "Summarize the research findings",
        llm_config={
            "model": "claude-3-opus-20240229",
            "api_key": "your-anthropic-key"
        }
    )
    
    print(response.completion)

Using Structured Output

Use the schema parameter to get the completion response in a structured format according to a Pydantic model or a JSON schema dictionary.

Sync
Async

from morphik import Morphik
from pydantic import BaseModel
from typing import List

# Define the desired output structure
class ResearchFindings(BaseModel):
    main_finding: str
    supporting_evidence: List[str]
    limitations: List[str]

db = Morphik()

response = db.query(
    "Summarize the key research findings from these documents",
    filters={"department": "research"},
    schema=ResearchFindings
)

# Check if the completion is a dictionary (structured output)
if isinstance(response.completion, dict):
    try:
        # Parse the dictionary into the Pydantic model
        findings = ResearchFindings(**response.completion)
        print(f"Main finding: {findings.main_finding}")
        print("Supporting evidence:")
        for evidence in findings.supporting_evidence:
            print(f"- {evidence}")
        print("Limitations:")
        for limitation in findings.limitations:
            print(f"- {limitation}")
    except Exception as e:
        print(f"Error parsing structured output: {e}")
        # Fallback: print the raw dictionary
        print(response.completion)
elif isinstance(response.completion, str):
    # Fallback to text completion
    print(response.completion)

from morphik import AsyncMorphik
from pydantic import BaseModel
from typing import List

# Define the desired output structure
class ResearchFindings(BaseModel):
    main_finding: str
    supporting_evidence: List[str]
    limitations: List[str]

async with AsyncMorphik() as db:
    response = await db.query(
        "Summarize the key research findings from these documents",
        filters={"department": "research"},
        schema=ResearchFindings
    )

    # Check if the completion is a dictionary (structured output)
    if isinstance(response.completion, dict):
        try:
            # Parse the dictionary into the Pydantic model
            findings = ResearchFindings(**response.completion)
            print(f"Main finding: {findings.main_finding}")
            print("Supporting evidence:")
            for evidence in findings.supporting_evidence:
                print(f"- {evidence}")
            print("Limitations:")
            for limitation in findings.limitations:
                print(f"- {limitation}")
        except Exception as e:
            print(f"Error parsing structured output: {e}")
            # Fallback: print the raw dictionary
            print(response.completion)
    elif isinstance(response.completion, str):
        # Fallback to text completion
        print(response.completion)

Client

Document Ingestion

Document Retrieval

Data Organization

Document Updates

Batch Operations

Chat & Conversation Management

Document Management

Apps & Tokens

Ops & Monitoring

query

query

Parameters

Metadata Filters

Returns

Examples

Standard Query

With Custom Prompt Overrides

CompletionResponse Properties

ChunkSource Properties

Using Custom LLM Configuration

Using Structured Output

Client

Document Ingestion

Document Retrieval

Data Organization

Document Updates

Batch Operations

Chat & Conversation Management

Document Management

Apps & Tokens

Ops & Monitoring

​query

​Parameters

​Metadata Filters

​Returns

​Examples

​Standard Query

​With Custom Prompt Overrides

​CompletionResponse Properties

​ChunkSource Properties

​Using Custom LLM Configuration

​Using Structured Output

query

Parameters

Metadata Filters

Returns

Examples

Standard Query

With Custom Prompt Overrides

CompletionResponse Properties

ChunkSource Properties

Using Custom LLM Configuration

Using Structured Output