Skip to main content

query

Generate completion using relevant chunks as context.
def query(
    query: str,
    filters: Optional[Dict[str, Any]] = None,
    k: int = 4,
    min_score: float = 0.0,
    max_tokens: Optional[int] = None,
    temperature: Optional[float] = None,
    use_colpali: bool = True,
    use_reranking: Optional[bool] = None,
    graph_name: Optional[str] = None,
    hop_depth: int = 1,
    include_paths: bool = False,
    prompt_overrides: Optional[Union[QueryPromptOverrides, Dict[str, Any]]] = None,
    folder_name: Optional[Union[str, List[str]]] = None,
    folder_depth: Optional[int] = None,
    chat_id: Optional[str] = None,
    schema: Optional[Union[Type[BaseModel], Dict[str, Any]]] = None,
    llm_config: Optional[Dict[str, Any]] = None,
    padding: int = 0,
) -> CompletionResponse

Parameters

  • query (str): Query text
  • filters (Dict[str, Any], optional): Optional metadata filters
  • k (int, optional): Number of chunks to use as context. Defaults to 4.
  • min_score (float, optional): Minimum similarity threshold. Defaults to 0.0.
  • max_tokens (int, optional): Maximum tokens in completion
  • temperature (float, optional): Model temperature
  • use_colpali (bool, optional): Whether to use ColPali-style embedding model to generate the completion (only works for documents ingested with use_colpali=True). Defaults to True.
  • use_reranking (bool, optional): Override workspace reranking configuration for this request.
  • graph_name (str, optional): Optional name of the graph to use for knowledge graph-enhanced retrieval
  • hop_depth (int, optional): Number of relationship hops to traverse in the graph (1-3). Defaults to 1.
  • include_paths (bool, optional): Whether to include relationship paths in the response. Defaults to False.
  • prompt_overrides (QueryPromptOverrides | Dict[str, Any], optional): Optional customizations for entity extraction, resolution, and query prompts
  • folder_name (str | List[str], optional): Optional folder scope. Accepts canonical paths (e.g., /projects/alpha/specs) or a list of paths/names.
  • folder_depth (int, optional): Folder scope depth. None/0 = exact match, -1 = include all descendants, n > 0 = include descendants up to n levels deep.
  • chat_id (str, optional): Optional chat session ID for persisting conversation history.
  • schema (Type[BaseModel] | Dict[str, Any], optional): Optional schema for structured output, can be a Pydantic model or a JSON schema dict
  • llm_config (Dict[str, Any], optional): Optional LiteLLM-compatible model configuration (e.g., model name, API key, base URL). Allows overriding the default LLM configuration on a per-query basis. Defaults to None.
  • padding (int, optional): Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only). Defaults to 0.

Metadata Filters

Use the same JSON filters described in the Metadata Filtering concept to restrict which documents feed into the completion. Example:
filters = {
    "$and": [
        {"project": {"$eq": "delta"}},
        {"start_date": {"$lte": "2024-06-01T00:00:00Z"}},
        {"end_date": {"$gte": "2024-06-01T00:00:00Z"}}
    ]
}

response = db.query(
    "Summarize current scope changes",
    filters=filters,
    k=6,
    temperature=0.3
)
You can also filter by folder name and use expressive operators like $in, $regex, and $nin:
# Query across multiple folders
filters = {
    "$and": [
        {"folder_name": {"$in": ["reports", "invoices"]}},
        {"year": 2024},
        {"priority": {"$gte": 50}}
    ]
}

response = db.query("What are the key financial highlights?", filters=filters)
For more advanced filtering patterns, see the Complex Metadata Filtering cookbook.

Returns

  • CompletionResponse: Response containing the completion, source information, and potentially structured output.

Examples

Standard Query

from morphik import Morphik

db = Morphik()

response = db.query(
    "What are the key findings about customer satisfaction?",
    filters={"department": "research"},
    temperature=0.7
)

nested = db.query(
    "List open design questions",
    folder_name="/projects/alpha",
    folder_depth=-1,
    k=6,
)

print(response.completion)

# Print the sources used for the completion
for source in response.sources:
    print(f"Document ID: {source.document_id}, Chunk: {source.chunk_number}, Score: {source.score}")

Knowledge Graph Enhanced Query

from morphik import Morphik

db = Morphik()

# Use a knowledge graph to enhance the query
response = db.query(
    "How does product X relate to customer segment Y?",
    graph_name="market_graph",
    hop_depth=2,
    include_paths=True
)

print(response.completion)

# If include_paths=True, you can inspect the graph paths
if response.metadata and "graph" in response.metadata:
    for path in response.metadata["graph"]["paths"]:
        print(" -> ".join(path))

With Custom Prompt Overrides

from morphik import Morphik
from morphik.models import QueryPromptOverride, QueryPromptOverrides

db = Morphik()

# Using the QueryPromptOverrides object
response = db.query(
    "What are the key findings?",
    filters={"category": "research"},
    prompt_overrides=QueryPromptOverrides(
        query=QueryPromptOverride(
            prompt_template="Answer the question in a formal, academic tone: {question}\n\nContext:\n{context}\n\nAnswer:"
        )
    )
)

# Alternatively, using a dictionary
response = db.query(
    "What are the key findings?",
    filters={"category": "research"},
    prompt_overrides={
        "query": {
            "prompt_template": "Answer the question in a formal, academic tone: {question}\n\nContext:\n{context}\n\nAnswer:"
        }
    }
)

print(response.completion)

CompletionResponse Properties

The CompletionResponse object returned by this method has the following properties:
  • completion (str | Dict[str, Any] | None): The generated completion text or the structured output dictionary.
  • usage (Dict[str, int]): Token usage information
  • sources (List[ChunkSource]): Sources of chunks used in the completion
  • metadata (Dict[str, Any], optional): Additional metadata about the completion. When using a knowledge graph with include_paths=True, this contains graph traversal information.
  • finish_reason (Optional[str]): Reason the generation finished (e.g., ‘stop’, ‘length’)

ChunkSource Properties

Each ChunkSource object in the sources list has the following properties:
  • document_id (str): ID of the source document
  • chunk_number (int): Chunk number within the document
  • score (Optional[float]): Relevance score (if available)

Using Custom LLM Configuration

The llm_config parameter is available in SDK version 0.2.5 and later.
Use the llm_config parameter to override the default LLM configuration on a per-query basis. This allows you to use different models, API keys, or other LiteLLM-compatible settings for specific queries.
from morphik import Morphik

db = Morphik()

# Use GPT-4 for a specific query
response = db.query(
    "What are the key findings?",
    llm_config={
        "model": "gpt-4",
        "api_key": "sk-...",
        "base_url": "https://api.openai.com/v1",
        "temperature": 0.7,
        "max_tokens": 2000
    }
)

print(response.completion)

# Use Claude for another query
response = db.query(
    "Summarize the research findings",
    llm_config={
        "model": "claude-3-opus-20240229",
        "api_key": "your-anthropic-key"
    }
)

print(response.completion)

Using Structured Output

Use the schema parameter to get the completion response in a structured format according to a Pydantic model or a JSON schema dictionary.
from morphik import Morphik
from pydantic import BaseModel
from typing import List

# Define the desired output structure
class ResearchFindings(BaseModel):
    main_finding: str
    supporting_evidence: List[str]
    limitations: List[str]

db = Morphik()

response = db.query(
    "Summarize the key research findings from these documents",
    filters={"department": "research"},
    schema=ResearchFindings
)

# Check if the completion is a dictionary (structured output)
if isinstance(response.completion, dict):
    try:
        # Parse the dictionary into the Pydantic model
        findings = ResearchFindings(**response.completion)
        print(f"Main finding: {findings.main_finding}")
        print("Supporting evidence:")
        for evidence in findings.supporting_evidence:
            print(f"- {evidence}")
        print("Limitations:")
        for limitation in findings.limitations:
            print(f"- {limitation}")
    except Exception as e:
        print(f"Error parsing structured output: {e}")
        # Fallback: print the raw dictionary
        print(response.completion)
elif isinstance(response.completion, str):
    # Fallback to text completion
    print(response.completion)