Skip to content

You are viewing documentation for Immuta version 2024.1.

For the latest version, view our documentation for Immuta SaaS or the latest self-hosted version.

Determine Background Jobs Performance Over a Duration

Audience: System Administrators

Content Summary: This page contains support for administrators who are trying to determine the change in performance over a given duration for background jobs.

Query Overview

Below is a SQL query, which will help determine the change in performance over a given duration for background jobs.

Caveats

  • SELECT INTERVAL '1 hour' AS duration.
    • The duration of the interval can be altered, though durations of 12 hours are not advised.
  • / : The average duration increased.
  • \ : The average duration decreased.

Instructions

Run the following SQL query to determine the change in performance over a given duration for background jobs.

WITH
vars AS (SELECT INTERVAL '1 hour' AS duration, NOW() AS m0, 3 /* VALIDATE THIS */ AS webpods_count ),
intervals AS (
    SELECT
        m0,
        m0 - (duration / 10) * 1 AS m1,
        m0 - (duration / 10) * 2 AS m2,
        m0 - (duration / 10) * 3 AS m3,
        m0 - (duration / 10) * 4 AS m4,
        m0 - (duration / 10) * 5 AS m5,
        m0 - (duration / 10) * 6 AS m6,
        m0 - (duration / 10) * 7 AS m7,
        m0 - (duration / 10) * 8 AS m8,
        m0 - (duration / 10) * 9 AS m9,
        m0 - (duration / 10) * 10 AS m10
    FROM vars
),
all_jobs_raw AS (
    SELECT name, state, startedon, createdon, completedon, data FROM pgboss.job
    UNION SELECT name, state, startedon, createdon, completedon, data FROM pgboss.archive -- COMMENT OUT THIS TABLE IF LESS THAN 12 HOURS AND RUNNING SLOW
),
all_jobs AS (
    SELECT
        CASE
            WHEN name = 'automaticSubscription' AND data->>'profileId' IS NULL THEN 'autoSub - ByDataSource'
            WHEN name = 'automaticSubscription' AND data->>'profileId' IS NOT NULL THEN 'autoSub - ByUser'
            WHEN name ILIKE 'nativeSqlProfileRefresh_Snowflake%' THEN 'nativeSqlProfileRefresh_Snowflake%'
            WHEN name ILIKE 'nativeSqlProfileRefresh_Redshift%' THEN 'nativeSqlProfileRefresh_Redshift%'
            WHEN name ILIKE 'nativeSqlDataSourceSync_Snowflake%' THEN 'nativeSqlDataSourceSync_Snowflake%'
            WHEN name ILIKE 'nativeSqlDataSourceSync_Redshift%' THEN 'nativeSqlDataSourceSync_Redshift%'
            ELSE name
        END AS name, state, startedon, completedon
    FROM all_jobs_raw INNER JOIN intervals ON TRUE
    WHERE all_jobs_raw.createdon > intervals.m10
    AND name NOT LIKE '\_\_%' AND name NOT ILIKE 'cleanUp%' AND name NOT ILIKE 'initiateScheduled%'
    AND name NOT IN (
        'bootstrapImmutaQueryEngine', 'checkForRollOver', 'checkInWithLicenseServer', 'clearMaxTTLTokens','columnEvolutionCheck', 'createPolicySearchRecords',
        'dataSourceTest', 'entitlements_cache_job_root', 'entitlements_cache_job_worker', 'executeCustomerMetricsQueries', 'expirePolicyAdjustments',
        'expirePolicyCertifications', 'loadIamGroups', 'nativeSqlMigration_Redshift', 'recomputeDslFromJsonPolicies','removeIamAuths', 'scheduleDbtUpdates',
        'scheduleSchemaEvolutionChecks', 'schemaProjectsQueryableMigration', 'visibilitySchemaUpdate') -- ignore list
    --AND (name ILIKE '%redshift%' OR name ILIKE '%bulk%') -- adjust to filter down to specific jobs
),
distinct_jobs AS ( SELECT DISTINCT name FROM all_jobs),
completed_jobs AS ( SELECT name, startedon, completedon FROM all_jobs WHERE state = 'completed' ),
completed_jobs_m1 AS ( SELECT name, COUNT(*) AS total, AVG(completedon - startedon) AS average_time, AVG(EXTRACT(EPOCH FROM (completedon - startedon))) AS average_epoch_ms FROM completed_jobs JOIN intervals ON TRUE WHERE completedon BETWEEN intervals.m1 AND intervals.m0 GROUP BY name ),
completed_jobs_m2 AS ( SELECT name, COUNT(*) AS total, AVG(completedon - startedon) AS average_time,  AVG(EXTRACT(EPOCH FROM (completedon - startedon))) AS average_epoch_ms FROM completed_jobs JOIN intervals ON TRUE WHERE completedon BETWEEN intervals.m2 AND intervals.m1 GROUP BY name ),
completed_jobs_m3 AS ( SELECT name, COUNT(*) AS total, AVG(completedon - startedon) AS average_time,  AVG(EXTRACT(EPOCH FROM (completedon - startedon))) AS average_epoch_ms FROM completed_jobs JOIN intervals ON TRUE WHERE completedon BETWEEN intervals.m3 AND intervals.m2 GROUP BY name ),
completed_jobs_m4 AS ( SELECT name, COUNT(*) AS total, AVG(completedon - startedon) AS average_time,  AVG(EXTRACT(EPOCH FROM (completedon - startedon))) AS average_epoch_ms FROM completed_jobs JOIN intervals ON TRUE WHERE completedon BETWEEN intervals.m4 AND intervals.m3 GROUP BY name ),
completed_jobs_m5 AS ( SELECT name, COUNT(*) AS total, AVG(completedon - startedon) AS average_time,  AVG(EXTRACT(EPOCH FROM (completedon - startedon))) AS average_epoch_ms FROM completed_jobs JOIN intervals ON TRUE WHERE completedon BETWEEN intervals.m5 AND intervals.m4 GROUP BY name ),
completed_jobs_m6 AS ( SELECT name, COUNT(*) AS total, AVG(completedon - startedon) AS average_time,  AVG(EXTRACT(EPOCH FROM (completedon - startedon))) AS average_epoch_ms FROM completed_jobs JOIN intervals ON TRUE WHERE completedon BETWEEN intervals.m6 AND intervals.m5 GROUP BY name ),
completed_jobs_m7 AS ( SELECT name, COUNT(*) AS total, AVG(completedon - startedon) AS average_time,  AVG(EXTRACT(EPOCH FROM (completedon - startedon))) AS average_epoch_ms FROM completed_jobs JOIN intervals ON TRUE WHERE completedon BETWEEN intervals.m7 AND intervals.m6 GROUP BY name ),
completed_jobs_m8 AS ( SELECT name, COUNT(*) AS total, AVG(completedon - startedon) AS average_time,  AVG(EXTRACT(EPOCH FROM (completedon - startedon))) AS average_epoch_ms FROM completed_jobs JOIN intervals ON TRUE WHERE completedon BETWEEN intervals.m8 AND intervals.m7 GROUP BY name ),
completed_jobs_m9 AS ( SELECT name, COUNT(*) AS total, AVG(completedon - startedon) AS average_time,  AVG(EXTRACT(EPOCH FROM (completedon - startedon))) AS average_epoch_ms FROM completed_jobs JOIN intervals ON TRUE WHERE completedon BETWEEN intervals.m9 AND intervals.m8 GROUP BY name ),
completed_jobs_m10 AS ( SELECT name, COUNT(*) AS total, AVG(completedon - startedon) AS average_time,  AVG(EXTRACT(EPOCH FROM (completedon - startedon))) AS average_epoch_ms FROM completed_jobs JOIN intervals ON TRUE WHERE completedon BETWEEN intervals.m10 AND intervals.m9 GROUP BY name ),
queued_jobs AS ( SELECT name, count(*) AS total_todo FROM all_jobs WHERE state IN ('active', 'created') GROUP BY name )
SELECT
    distinct_jobs.name,
    vars.duration AS "Total Duration",
    vars.duration / 10 AS "Each Interval",
    CONCAT(' ', TO_CHAR(completed_jobs_m9.average_time, 'MI:SS.MS')) AS "Oldest",
    CONCAT(CASE WHEN completed_jobs_m10.average_time IS NULL OR  completed_jobs_m9.average_time IS NULL THEN '  ' WHEN completed_jobs_m9.average_time > completed_jobs_m10.average_time THEN '/ ' ELSE '\ ' END, TO_CHAR(completed_jobs_m9.average_time, 'MI:SS.MS'))  AS "_",
    CONCAT(CASE WHEN completed_jobs_m9.average_time IS NULL OR  completed_jobs_m8.average_time IS NULL THEN '  ' WHEN completed_jobs_m8.average_time > completed_jobs_m9.average_time THEN '/ ' ELSE '\ ' END, TO_CHAR(completed_jobs_m8.average_time, 'MI:SS.MS'))  AS "__",
    CONCAT(CASE WHEN completed_jobs_m8.average_time IS NULL OR  completed_jobs_m7.average_time IS NULL THEN '  ' WHEN completed_jobs_m7.average_time > completed_jobs_m8.average_time THEN '/ ' ELSE '\ ' END, TO_CHAR(completed_jobs_m7.average_time, 'MI:SS.MS'))  AS "___",
    CONCAT(CASE WHEN completed_jobs_m7.average_time IS NULL OR  completed_jobs_m6.average_time IS NULL THEN '  ' WHEN completed_jobs_m6.average_time > completed_jobs_m7.average_time THEN '/ ' ELSE '\ ' END, TO_CHAR(completed_jobs_m6.average_time, 'MI:SS.MS'))  AS "____",
    CONCAT(CASE WHEN completed_jobs_m6.average_time IS NULL OR  completed_jobs_m5.average_time IS NULL THEN '  ' WHEN completed_jobs_m5.average_time > completed_jobs_m6.average_time THEN '/ ' ELSE '\ ' END, TO_CHAR(completed_jobs_m5.average_time, 'MI:SS.MS'))  AS "----",
    CONCAT(CASE WHEN completed_jobs_m5.average_time IS NULL OR  completed_jobs_m4.average_time IS NULL THEN '  ' WHEN completed_jobs_m4.average_time > completed_jobs_m5.average_time THEN '/ ' ELSE '\ ' END, TO_CHAR(completed_jobs_m4.average_time, 'MI:SS.MS'))  AS "---",
    CONCAT(CASE WHEN completed_jobs_m4.average_time IS NULL OR  completed_jobs_m3.average_time IS NULL THEN '  ' WHEN completed_jobs_m3.average_time > completed_jobs_m4.average_time THEN '/ ' ELSE '\ ' END, TO_CHAR(completed_jobs_m3.average_time, 'MI:SS.MS'))  AS "--",
    CONCAT(CASE WHEN completed_jobs_m3.average_time IS NULL OR  completed_jobs_m2.average_time IS NULL THEN '  ' WHEN completed_jobs_m2.average_time > completed_jobs_m3.average_time THEN '/ ' ELSE '\ ' END, TO_CHAR(completed_jobs_m2.average_time, 'MI:SS.MS'))  AS "-",
    CONCAT(CASE WHEN completed_jobs_m2.average_time IS NULL OR  completed_jobs_m1.average_time IS NULL THEN '  ' WHEN completed_jobs_m1.average_time > completed_jobs_m2.average_time THEN '/ ' ELSE '\ ' END, TO_CHAR(completed_jobs_m1.average_time, 'MI:SS.MS'))  AS "Newest",
    vars.webpods_count AS "WebPods",
    to_char(queued_jobs.total_todo, 'FM9,999,999,999') AS "Queued",
    NOW() + (queued_jobs.total_todo / vars.webpods_count) * completed_jobs_m1.average_time AS "Should Complete By"
FROM distinct_jobs
INNER JOIN vars ON TRUE
LEFT OUTER JOIN completed_jobs_m1 ON distinct_jobs.name = completed_jobs_m1.name
LEFT OUTER JOIN completed_jobs_m2 ON distinct_jobs.name = completed_jobs_m2.name
LEFT OUTER JOIN completed_jobs_m3 ON distinct_jobs.name = completed_jobs_m3.name
LEFT OUTER JOIN completed_jobs_m4 ON distinct_jobs.name = completed_jobs_m4.name
LEFT OUTER JOIN completed_jobs_m5 ON distinct_jobs.name = completed_jobs_m5.name
LEFT OUTER JOIN completed_jobs_m6 ON distinct_jobs.name = completed_jobs_m6.name
LEFT OUTER JOIN completed_jobs_m7 ON distinct_jobs.name = completed_jobs_m7.name
LEFT OUTER JOIN completed_jobs_m8 ON distinct_jobs.name = completed_jobs_m8.name
LEFT OUTER JOIN completed_jobs_m9 ON distinct_jobs.name = completed_jobs_m9.name
LEFT OUTER JOIN completed_jobs_m10 ON distinct_jobs.name = completed_jobs_m10.name
LEFT OUTER JOIN queued_jobs ON distinct_jobs.name = queued_jobs.name;

Example Output

Job Performance Output