Skip to main content

Structured Output in FlockMTL

FlockMTL enables users to obtain structured JSON responses from Large Language Models (LLMs) by defining JSON schemas within SQL queries. This ensures LLM outputs conform to specific data structures, making them more reliable and easier to process.

Overview

Structured output allows you to define the exact format and structure of the JSON response you expect from an LLM. Instead of receiving free-form text, you can specify a JSON schema that the model must follow.

Compatibility: Works with all FlockMTL LLM functions - llm_complete, llm_filter, llm_reduce, llm_rerank, llm_first, llm_last

Prerequisites

To extract values from structured JSON responses using dot notation (e.g., response.category), you need to load the JSON extension:

LOAD JSON;

OpenAI Structured Output

OpenAI uses the response_format field with type: "json_schema" and strict: true to enforce schema compliance. For more details check OpenAI Structured Output.

Syntax

{
'model_name': 'your-model-name',
'model_parameters': '{
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "response_schema",
"schema": {
"type": "object",
"properties": {
"field_name": { "type": "string" }
},
"required": ["field_name"],
"additionalProperties": false
}
},
"strict": true
}
}'
}

Basic Example

SELECT llm_complete(
{
'model_name': 'gpt-4o',
'model_parameters': '{
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "product_categorization",
"schema": {
"type": "object",
"properties": {
"category": { "type": "string" },
"confidence": { "type": "number" }
},
"required": ["category", "confidence"],
"additionalProperties": false
}
},
"strict": true
}
}'
},
{ 'prompt': 'Categorize this product and provide a confidence score.' },
{ 'product_name': 'Wireless Bluetooth Headphones' }
) AS response;

Advanced Example

WITH product_analysis AS (
SELECT
product_id,
product_name,
llm_complete(
{
'model_name': 'gpt-4o',
'model_parameters': '{
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "product_analysis",
"schema": {
"type": "object",
"properties": {
"category": { "type": "string" },
"price_range": { "type": "string" },
"features": {
"type": "array",
"items": { "type": "string" }
},
"target_audience": { "type": "string" },
"rating_prediction": { "type": "number" }
},
"required": ["category", "price_range", "features", "target_audience", "rating_prediction"],
"additionalProperties": false
}
},
"strict": true
}
}'
},
{ 'prompt_name': 'product-analysis', 'version': 1 },
{ 'product_name': product_name, 'product_description': product_description }
) AS analysis
FROM products
)
SELECT
product_id,
product_name,
analysis.category::VARCHAR,
analysis.price_range::VARCHAR,
analysis.features,
analysis.target_audience::VARCHAR,
analysis.rating_prediction::DOUBLE
FROM product_analysis;

Ollama Structured Output

Ollama uses the format field with an object schema definition to structure responses.

Syntax

{
'model_name': 'your-model-name',
'model_parameters': '{
"format": {
"type": "object",
"properties": {
"field_name": { "type": "string" }
},
"required": ["field_name"],
"additionalProperties": false
}
}'
}

Basic Example

SELECT llm_complete(
{
'model_name': 'llama3.1',
'model_parameters': '{
"format": {
"type": "object",
"properties": {
"summary": { "type": "string" },
"sentiment": { "type": "string" }
},
"required": ["summary", "sentiment"],
"additionalProperties": false
}
}'
},
{ 'prompt': 'Summarize this review and determine its sentiment.' },
{ 'review_text': review_text }
) AS response
FROM customer_reviews;

Advanced Example

WITH content_generation AS (
SELECT
topic_id,
topic_name,
llm_complete(
{
'model_name': 'ollama-writer',
'model_parameters': '{
"format": {
"type": "object",
"properties": {
"title": { "type": "string" },
"content": { "type": "string" },
"tags": {
"type": "array",
"items": { "type": "string" }
},
"word_count": { "type": "number" },
"difficulty_level": { "type": "string" }
},
"required": ["title", "content", "tags", "word_count", "difficulty_level"],
"additionalProperties": false
}
}'
},
{ 'prompt_name': 'content-generator', 'version': 2 },
{ 'topic': topic_name, 'target_audience': target_audience }
) AS generated_content
FROM content_topics
)
SELECT
topic_id,
topic_name,
generated_content.title::VARCHAR,
generated_content.content::VARCHAR,
generated_content.tags,
generated_content.word_count::INTEGER,
generated_content.difficulty_level::VARCHAR
FROM content_generation;

Aggregate Functions

Structured output works with aggregate functions like llm_reduce:

SELECT llm_reduce(
{
'model_name': 'gpt-4o',
'model_parameters': '{
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "review_aggregation",
"schema": {
"type": "object",
"properties": {
"overall_summary": { "type": "string" },
"key_themes": {
"type": "array",
"items": { "type": "string" }
},
"sentiment_distribution": {
"type": "object",
"properties": {
"positive": { "type": "number" },
"negative": { "type": "number" },
"neutral": { "type": "number" }
}
}
},
"required": ["overall_summary", "key_themes", "sentiment_distribution"],
"additionalProperties": false
}
},
"strict": true
}
}'
},
{ 'prompt_name': 'review-aggregation' },
{ 'review_text': review_text }
) AS aggregated_analysis
FROM customer_reviews
GROUP BY product_id;

Extracting Values

Use dot notation to extract values from structured responses:

-- Basic extraction
SELECT response.category FROM table_name;

-- With type casting
SELECT response.category::VARCHAR FROM table_name;

-- Common type casts
SELECT
response.text_field::VARCHAR,
response.number_field::DOUBLE,
response.integer_field::INTEGER,
response.boolean_field::BOOLEAN
FROM table_name;

Common Schema Patterns

Classification

{
"type": "object",
"properties": {
"category": { "type": "string" },
"confidence": { "type": "number" },
"reasoning": { "type": "string" }
},
"required": ["category", "confidence"],
"additionalProperties": false
}

Analysis

{
"type": "object",
"properties": {
"summary": { "type": "string" },
"key_points": {
"type": "array",
"items": { "type": "string" }
},
"sentiment": { "type": "string" },
"topics": {
"type": "array",
"items": { "type": "string" }
}
},
"required": ["summary", "sentiment"],
"additionalProperties": false
}

Entity Extraction

{
"type": "object",
"properties": {
"entities": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": { "type": "string" },
"type": { "type": "string" },
"confidence": { "type": "number" }
}
}
}
},
"required": ["entities"],
"additionalProperties": false
}