Skip to content

BigQuery Tasks

BigQuery SQL tasks (bq.sql) provide powerful data transformation capabilities with automatic materialization, cost tracking, and built-in data quality testing.

name: "analytics.daily_metrics"
type: "bq.sql"
description: "Calculate daily metrics from events"
depends:
- staging.events
run: "daily_metrics.sql"
# Materialization settings
materialization:
type: "table"
strategy: "create+replace"
partition_by: "dt"
cluster_by: ["user_id", "event_type"]
# Data quality tests
tests:
columns:
user_id:
- name: "not_null" # blocking: true (default)
- name: "unique"
blocking: false # Must specify to override default
revenue:
- name: "positive" # blocking: true (default)
-- @blast.name: analytics.daily_metrics
-- @blast.type: bq.sql
-- @blast.description: Calculate daily metrics from events
-- @blast.depends: staging.events
-- @blast.materialization.type: table
-- @blast.materialization.strategy: create+replace
-- @blast.materialization.partition_by: dt
-- @blast.materialization.cluster_by: user_id,event_type
-- @blast.tests.columns.user_id: not_null,unique
-- @blast.tests.columns.revenue: positive
WITH daily_events AS (
SELECT
DATE(event_timestamp) as event_date,
user_id,
event_type,
COUNT(*) as event_count
FROM staging.user_events
WHERE event_timestamp >= '{{ ds }}'
AND event_timestamp < '{{ next_ds }}'
GROUP BY 1, 2, 3
)
SELECT
event_date as dt,
user_id,
event_type,
event_count,
CURRENT_TIMESTAMP() as created_at
FROM daily_events
ORDER BY event_date, user_id
materialization:
type: "table"
strategy: "create+replace"
partition_by: "dt"
cluster_by: ["user_id", "event_type"]
materialization:
type: "table"
strategy: "delete+insert"
incremental_key: "dt"
materialization:
type: "table"
strategy: "append"
materialization:
type: "view"
tests:
columns:
user_id:
- name: "not_null" # blocking: true (default)
- name: "unique"
blocking: false # Must specify to override default
revenue:
- name: "not_null" # blocking: true (default)
- name: "positive" # blocking: true (default)
status:
- name: "accepted_values"
accepted_values: ["active", "inactive", "pending"]
# blocking: true (default)
tests:
custom:
- name: "revenue_consistency"
query: |
SELECT 1
FROM users u
JOIN revenue r ON u.user_id = r.user_id
WHERE u.status = 'active' AND r.revenue <= 0
# blocking: true (default)
- name: "date_range_check"
equal_to: 0
query: |
SELECT count(*)
FROM events
WHERE event_date > CURRENT_DATE()
blocking: false # Must specify to override default
  • name: Unique task identifier
  • type: Must be bq.sql
  • run: SQL file to execute
  • description: Human-readable description
  • depends: Task dependencies
  • materialization: Materialization settings
  • tests: Data quality tests
  • connections: Database connections