Determine Background Job Batches
Audience: System Administrators
Content Summary: This page provides a SQL query that determines batches of background jobs.
Query Overview
Below is a SQL query, which will help determine batches of background jobs.
Note: This query unions the job and archive table. Adjust the top interval appropriately for your needs.
Immuta >= 2022.1
This query is for all 2022.1 versions of Immuta and above.
WITH
vars AS (
SELECT
NOW() AS end_dt,
--TO_TIMESTAMP('2021-11-23 12:05:00', 'YYYY-MM-DD hh24:mi:ss')::timestamp without time zone at time zone 'EDT' AS start_dt
NOW() - INTERVAL '7 days' AS start_dt
),
all_jobs_raw AS (
SELECT name, state, startedon, createdon, completedon, data FROM pgboss.job
UNION SELECT name, state, startedon, createdon, completedon, data FROM pgboss.archive -- COMMENT OUT THIS TABLE IF LESS THAN 12 HOURS AND RUNNING SLOW
),
all_jobs AS (
SELECT
CASE
WHEN name ILIKE 'nativeSqlDataSourceSync_Redshift%' THEN 'Redshift Native Update'
WHEN name = 'nativeSqlAutomaticSubscriptionNotifications_Redshift' THEN 'Immuta Upd Redshift DS'
WHEN name = 'nativeSqlAutomaticSubscriptionNotifications_Snowflake' THEN 'Immuta Upd Snowflake DS'
WHEN name = 'updatePolicies' THEN 'updatePolicies'
WHEN name ILIKE 'nativeSqlProfileRefresh_Redshift%' THEN 'nativeSqlProfileRefresh_Redshift'
WHEN name ILIKE 'nativeSqlProfileRefresh_Snowflake%' THEN 'nativeSqlProfileRefresh_Snowflake'
ELSE name
END AS name, state, startedon, completedon, date_trunc('hour', createdon) AS createdon_trunc -- Change the date_trunc value to 'hour' or 'day'
FROM all_jobs_raw INNER JOIN vars ON TRUE
WHERE all_jobs_raw.createdon > vars.start_dt and all_jobs_raw.createdon < vars.end_dt
AND name NOT LIKE '\_\_%' AND name NOT ILIKE 'cleanUp%' AND name NOT ILIKE 'initiateScheduled%'
AND name NOT IN (
'bootstrapImmutaQueryEngine', 'checkForRollOver', 'checkInWithLicenseServer','clearMaxTTLTokens', 'columnEvolutionCheck', 'createPolicySearchRecords',
'dataSourceTest', 'entitlements_cache_job_root', 'entitlements_cache_job_worker','executeCustomerMetricsQueries', 'expirePolicyAdjustments',
'expirePolicyCertifications', 'loadIamGroups', 'nativeSqlMigration_Redshift','recomputeDslFromJsonPolicies', 'removeIamAuths', 'scheduleDbtUpdates',
'scheduleSchemaEvolutionChecks', 'schemaProjectsQueryableMigration', 'visibilitySchemaUpdate') -- ignore list
--AND (name ILIKE '%snowflake%') -- adjust to filter down to specific jobs
),
batches AS (
SELECT DISTINCT name, createdon_trunc, COUNT(*) AS total_count,
MIN(createdon_trunc) AS createdon, MAX(completedon) AS completedon
FROM all_jobs GROUP BY name, createdon_trunc),
created_jobs AS (
SELECT name, createdon_trunc, COUNT(*) AS created_count
FROM all_jobs WHERE state = 'created' GROUP BY name, createdon_trunc),
retry_jobs AS (
SELECT name, createdon_trunc, COUNT(*) AS retry_count
FROM all_jobs WHERE state = 'retry' GROUP BY name, createdon_trunc),
active_jobs AS (
SELECT name, createdon_trunc, COUNT(*) AS active_count
FROM all_jobs WHERE state = 'active' GROUP BY name, createdon_trunc),
completed_jobs AS (
SELECT name, createdon_trunc, COUNT(*) AS completed_count,
AVG(completedon - startedon) AS average_time
FROM all_jobs WHERE state = 'completed' GROUP BY name, createdon_trunc),
expired_jobs AS (
SELECT name, createdon_trunc, COUNT(*) AS expired_count
FROM all_jobs WHERE state = 'expired' GROUP BY name, createdon_trunc),
cancelled_jobs AS (
SELECT name, createdon_trunc, COUNT(*) AS cancelled_count
FROM all_jobs WHERE state = 'cancelled' GROUP BY name, createdon_trunc),
failed_jobs AS (
SELECT name, createdon_trunc, COUNT(*) AS failed_count
FROM all_jobs WHERE state = 'failed' GROUP BY name, createdon_trunc)
SELECT
batches.*,
completed_jobs.average_time,
batches.completedon - batches.createdon AS duration,
created_jobs.created_count,
retry_jobs.retry_count,
active_jobs.active_count,
completed_jobs.completed_count,
expired_jobs.expired_count,
cancelled_jobs.cancelled_count,
failed_jobs.failed_count
FROM batches
LEFT OUTER JOIN created_jobs ON batches.name = created_jobs.name AND batches.createdon_trunc = created_jobs.createdon_trunc
LEFT OUTER JOIN retry_jobs ON batches.name = retry_jobs.name AND batches.createdon_trunc = retry_jobs.createdon_trunc
LEFT OUTER JOIN active_jobs ON batches.name = active_jobs.name AND batches.createdon_trunc = active_jobs.createdon_trunc
LEFT OUTER JOIN completed_jobs ON batches.name = completed_jobs.name AND batches.createdon_trunc = completed_jobs.createdon_trunc
LEFT OUTER JOIN expired_jobs ON batches.name = expired_jobs.name AND batches.createdon_trunc = expired_jobs.createdon_trunc
LEFT OUTER JOIN cancelled_jobs ON batches.name = cancelled_jobs.name AND batches.createdon_trunc = cancelled_jobs.createdon_trunc
LEFT OUTER JOIN failed_jobs ON batches.name = failed_jobs.name AND batches.createdon_trunc = failed_jobs.createdon_trunc
ORDER BY name, createdon_trunc DESC;