Skip to content

Data Quality Testing

Datablast provides a comprehensive data quality testing framework for SQL tasks, including built-in column tests and custom SQL tests.

Ensures that specified columns do not contain null values.

tests:
columns:
user_id:
- name: "not_null" # blocking: true (default)
email:
- name: "not_null"
blocking: false # Must specify to override default

Verifies that specified columns contain unique values.

tests:
columns:
user_id:
- name: "unique" # blocking: true (default)
order_id:
- name: "unique"
blocking: false # Must specify to override default

Checks that numeric columns contain only positive values.

tests:
columns:
revenue:
- name: "positive" # blocking: true (default)
quantity:
- name: "positive"
blocking: false # Must specify to override default

Validates that columns contain only specified values.

tests:
columns:
status:
- name: "accepted_values"
accepted_values: ["active", "inactive", "pending"]
# blocking: true (default)
category:
- name: "accepted_values"
accepted_values: ["electronics", "clothing", "books"]
blocking: false # Must specify to override default

Define custom validation logic using SQL queries.

tests:
custom:
- name: "revenue_consistency"
query: |
SELECT 1
FROM users u
JOIN revenue r ON u.user_id = r.user_id
WHERE u.status = 'active' AND r.revenue <= 0
# blocking: true (default)

Create tests that don’t block pipeline execution.

tests:
custom:
- name: "date_range_check"
equal_to: 0
query: |
SELECT count(*)
FROM events
WHERE event_date > CURRENT_DATE()
blocking: false # Must specify to override default

Implement complex business rules validation.

tests:
custom:
- name: "order_consistency"
query: |
SELECT 1
FROM orders o
JOIN order_items oi ON o.order_id = oi.order_id
WHERE o.total_amount != (
SELECT SUM(price * quantity)
FROM order_items
WHERE order_id = o.order_id
)
blocking: true
name: "analytics.user_metrics"
type: "bq.sql"
description: "Calculate user metrics with data quality tests"
run: "user_metrics.sql"
tests:
columns:
user_id:
- name: "not_null"
- name: "unique"
revenue:
- name: "not_null"
- name: "positive"
status:
- name: "accepted_values"
accepted_values: ["active", "inactive", "pending"]
name: "analytics.order_analysis"
type: "bq.sql"
description: "Analyze orders with comprehensive data quality tests"
run: "order_analysis.sql"
tests:
columns:
order_id:
- name: "not_null"
- name: "unique"
customer_id:
- name: "not_null"
total_amount:
- name: "not_null"
- name: "positive"
order_status:
- name: "accepted_values"
accepted_values: ["pending", "confirmed", "shipped", "delivered", "cancelled"]
custom:
- name: "order_amount_consistency"
query: |
SELECT 1
FROM orders o
JOIN order_items oi ON o.order_id = oi.order_id
WHERE o.total_amount != (
SELECT SUM(price * quantity)
FROM order_items
WHERE order_id = o.order_id
)
blocking: true
- name: "future_order_check"
equal_to: 0
query: |
SELECT count(*)
FROM orders
WHERE order_date > CURRENT_DATE()
blocking: false
name: "analytics.event_processing"
type: "bq.sql"
description: "Process events with mixed test types"
run: "event_processing.sql"
tests:
columns:
event_id:
- name: "not_null" # blocking: true (default)
- name: "unique" # blocking: true (default)
user_id:
- name: "not_null" # blocking: true (default)
event_type:
- name: "accepted_values"
accepted_values: ["click", "view", "purchase", "signup"]
# blocking: true (default)
event_timestamp:
- name: "not_null"
blocking: false # Must specify to override default
custom:
- name: "data_freshness"
query: |
SELECT 1
FROM events
WHERE event_timestamp < DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY)
blocking: false # Must specify to override default
  • name: Test name (not_null, unique, positive, accepted_values)
  • blocking: Whether test failure blocks pipeline (default: true)
  • accepted_values: Valid values for accepted_values test
  • name: Test name
  • query: SQL query for validation
  • blocking: Whether test failure blocks pipeline (default: true)