Hi there, I'm not being able to lint a bunch of dbt SQL files. The error I get is:
{%- set stuck_rows_in_a_row = 3 -%}
{%- set fcs_criteria_in_seconds = 30 -%}
{%- set low_fps = 20 -%}
WITH recording_started AS(SELECT * FROM {{ ref('fct_recording_started')}} WHERE NOT COALESCE(is_test_user, FALSE))
, recordings_stopped AS (SELECT * FROM {{ ref('fct_recording_stopped') }})
, archives AS(
SELECT
rso.archive_id,
rsa.session_id,
rso.stream_id,
rso.recording_id,
rso.studio_id,
rso.recording_stopped_reason,
rso.user_id IS NOT NULL AS is_user,
rso.expected_duration_in_seconds,
rsa.server_timestamp,
rsa.recorder_version,
rsa.audio_input_media_device_label,
rsa.video_input_media_device_label,
rsa.user_id,
rsa.is_test_user,
rsa.anonymous_id,
rsa.studio_role,
rsa.studio_owner_id,
rsa.studio_owner_email,
COALESCE(rsa.recording_type, rso.recording_type) AS recording_type,
COALESCE(rsa.live_call_id, rso.live_call_id) AS live_call_id,
COALESCE(rsa.client_id, rso.client_id) AS client_id,
rsa.timestamp AS recording_started_timestamp,
rso.timestamp AS recording_stopped_timestamp,
rso.uploaded_at AS batch
FROM recording_started rsa
JOIN recordings_stopped rso
ON rsa.archive_id = rso.archive_id
WHERE rsa.source = 'new'
)
, waiting_room AS(
SELECT
arc.archive_id,
count(jwr.client_id) AS number_participants_in_waiting_room_during_recording
FROM {{ ref('fct_joined_waiting_room') }} jwr
JOIN archives arc
ON arc.studio_id = jwr.studio_id
AND jwr.timestamp BETWEEN arc.recording_started_timestamp AND arc.recording_stopped_timestamp
WHERE jwr.studio_recording_status = 'recording'
GROUP BY 1
)
, max_upload_progress AS(
SELECT
archive_id,
MAX(upload_progress_uploaded_bytes) AS max_upload_progress_uploaded_bytes
FROM {{ ref('fct_recording_progression_stream') }}
GROUP BY 1
)
, sync_issues AS(
SELECT
s.archive_id,
upload_progress_uploaded_bytes,
CASE
WHEN upload_progress_uploaded_bytes = 0 THEN 'beginning'
WHEN upload_progress_uploaded_bytes = max_upload_progress_uploaded_bytes THEN 'end'
ELSE 'middle' END AS moment_flag,
COUNT(1) AS n_rows,
DATEDIFF('minute', MIN(timestamp), MAX(timestamp)) AS range_in_minutes
FROM {{ ref('fct_recording_progression_stream') }} s
LEFT JOIN max_upload_progress s2 ON s.archive_id = s2.archive_id
GROUP BY 1, 2, 3
HAVING n_rows >= {{ stuck_rows_in_a_row }}
AND range_in_minutes >= {{ stuck_rows_in_a_row }}
)
, sync_issues_pivoted AS(
SELECT
archive_id,
TRUE AS stuck, -- single value, any agg is ok
BOOLOR_AGG(moment_flag='beginning') AS stuck_at_0, -- single value, any agg is ok
BOOLOR_AGG(moment_flag='middle') AS stuck_at_middle, -- having at least 1
BOOLOR_AGG(moment_flag='end') AS stuck_at_end -- single value, any agg is ok
FROM sync_issues
GROUP BY 1
)
, streams_cadence_test AS(
{{ test_event_cadence(
model= ref('fct_recording_progression_stream'),
grouping_column='archive_id', time_column='timestamp',
date_part='minute', threshold=1) }}
)
, rec_prog_stream AS(
SELECT
rps.archive_id,
COUNT(DISTINCT rps.archive_id || rps.timestamp) AS stream_count,
BOOLOR_AGG(NULLIF(rps.frames_per_second, 0) < {{ low_fps }} ) AS any_stream_is_low_fps,
BOOLAND_AGG(NULLIF(rps.frames_per_second, 0) < {{ low_fps }} ) AS all_stream_is_low_fps,
IFF(any_stream_is_low_fps IS NOT NULL,
DIV0(
COUNT_IF(rps.frames_per_second < {{ low_fps }}),
COUNT(1)
), NULL) AS proportion_stream_low_fps,
IFF(any_stream_is_low_fps IS NOT NULL,
DIV0(
COUNT_IF(rps.frames_per_second = 0),
COUNT(1)
), NULL) AS proportion_stream_fps_equal_zero,
ANY_VALUE(recording_resolution) AS recording_resolution,
BOOLOR_AGG(e.archive_id IS NOT NULL) AS archive_has_stream_w_cadence_problem,
VAR_POP(upload_speed) AS var_uploading_speed,
MIN(upload_speed) AS min_uploading_speed,
MAX(upload_speed) AS max_uploading_speed,
VAR_POP(download_speed) AS var_downloading_speed,
MIN(download_speed) AS min_downloading_speed,
MAX(download_speed) AS max_downloading_speed,
VAR_POP(frames_per_second) AS var_frames_per_second,
STDDEV_POP(frames_per_second) AS stddev_frames_per_second,
MIN(frames_per_second) AS min_frames_per_second,
AVG(frames_per_second) AS avg_frames_per_second,
MAX(upload_progress_uploaded_bytes) AS max_upload_progress_uploaded_bytes,
MAX(upload_progress_upload_percentage) AS max_upload_progress_upload_percentage,
ANY_VALUE(IFNULL(stuck, FALSE)) AS got_stuck,
ANY_VALUE(IFNULL(stuck_at_0, FALSE)) AS got_stuck_at_zero,
ANY_VALUE(IFNULL(stuck_at_middle, FALSE)) AS got_stuck_at_middle,
ANY_VALUE(IFNULL(stuck_at_end, FALSE)) AS got_stuck_at_end
FROM {{ ref('fct_recording_progression_stream') }} rps
LEFT JOIN sync_issues_pivoted sip
ON sip.archive_id = rps.archive_id
LEFT JOIN streams_cadence_test e
ON e.archive_id = rps.archive_id AND e.timestamp = rps.timestamp
GROUP BY rps.archive_id
)
, errors_joined AS(
SELECT
arc.archive_id,
{{ pivot(
'error_type',
dbt_utils.get_column_values(table= ref('fct_error_notifications'), column= 'error_type'
, where= "error_type IN ('video_muted_error', 'storage_full')"),
agg= 'sum',
quote_identifiers= False,
bool= True,
prefix='has_'
) }}
FROM {{ ref('fct_error_notifications') }} e
JOIN archives arc
ON e.anonymous_id = arc.anonymous_id
AND e.timestamp BETWEEN arc.recording_started_timestamp AND arc.recording_stopped_timestamp
GROUP BY arc.archive_id
)
, live_calls_features AS( SELECT * FROM {{ ref('live_calls_features') }})
, record_media_error AS( SELECT * FROM {{ ref('fct_record_media_error_pivoted') }})
, recordings AS (
SELECT
r.*,
a.is_enterprise -- asessing whether the studio where the recording was created belongs to a currently enterprise account
FROM {{ ref('dim_recordings') }} r
LEFT JOIN {{ ref('dim_studios') }} s
ON r.studio_id = s.studio_id
LEFT JOIN {{ ref('dim_productions') }} p
ON s.production_id = p.production_id
LEFT JOIN {{ ref('dim_accounts') }} a
ON p.account_id = a.account_id
WHERE file_type = 'local'
)
, screen_share AS (SELECT DISTINCT session_id FROM {{ ref('dim_recordings') }} WHERE source = 'mediaboard-screenshare')
, identity AS (SELECT * FROM {{ ref('segment__identity') }} )
, studios AS (SELECT * FROM {{ ref('dim_studios') }} )
, users AS (SELECT * FROM {{ ref('dim_users') }} )
, final AS(
SELECT
s.archive_id,
s.session_id,
s.stream_id,
s.live_call_id,
s.recording_id,
s.studio_id,
s.client_id,
s.user_id,
s.user_id IS NOT NULL AS is_user,
r.is_enterprise,
s.recording_started_timestamp,
s.recording_stopped_timestamp,
s.recording_type,
s.recording_stopped_reason,
s.expected_duration_in_seconds,
s.studio_role,
r.duration_in_seconds,
r.duration_in_seconds = 0 AS duration_is_zero,
TRUNCATE(s.expected_duration_in_seconds - r.duration_in_seconds, 0) AS duration_difference,
r.client_status,
s.server_timestamp,
s.recorder_version,
SPLIT_PART(s.recorder_version, '.', 1) AS recorder_major_version,
SPLIT_PART(s.recorder_version, '.', 2) AS recorder_minor_version,
r.filename,
COUNT(s.archive_id) OVER(PARTITION BY s.session_id, s.client_id) = 1 AS is_only_archive_for_client_id_in_session,
COALESCE(lcf.mic_connected_during_recording, FALSE) AS mic_connected_during_recording,
COALESCE(lcf.camera_connected_during_recording, FALSE) AS camera_connected_during_recording,
COALESCE(lcf.mic_disconnected_during_recording, FALSE) AS mic_disconnected_during_recording,
COALESCE(lcf.camera_disconnected_during_recording, FALSE) AS camera_disconnected_during_recording,
-- ads.mic_connected_actually_set_during_recording,
-- ads.camera_connected_actually_set_during_recording,
lcf.number_of_initial_mics_connected,
lcf.number_of_initial_cameras_connected,
wr.number_participants_in_waiting_room_during_recording,
lcf.avg_mic_latency,
lcf.var_mic_latency,
lcf.mic_permission_changed,
lcf.camera_permission_changed,
lcf.initial_echo_cancellations_settings,
lcf.initial_operating_system AS operating_system,
lcf.initial_browser_type_version,
lcf.initial_graphic_card AS graphic_card,
lcf.initial_gpu_vendor AS gpu_vendor,
lcf.initial_device_permission_camera AS device_permission_camera,
lcf.initial_device_permission_mic AS device_permission_mic,
lcf.initial_hardware_concurrency AS hardware_concurrency,
lcf.initial_device_permission_notifactions AS device_permission_notifactions,
lcf.min_charging_level_during_recorded,
lcf.max_charging_level_during_recorded,
lcf.has_client_charged_during_recording,
lcf.var_perc_storage_space_available,
lcf.min_perc_storage_space_available,
lcf.max_storage_used,
lcf.min_storage_used,
lcf.var_used_storage_space,
lcf.var_available_storage_space,
lcf.storage_range,
lcf.max_storage_bins,
lcf.min_storage_bins,
lcf.range_storage_bins,
lcf.initial_storage_estimate_js_storage_usage,
lcf.initial_perc_storage_space_available,
lcf.initial_storage_bins,
rps.stream_count,
rps.proportion_stream_low_fps,
rps.proportion_stream_fps_equal_zero,
rps.any_stream_is_low_fps,
rps.all_stream_is_low_fps,
rps.recording_resolution,
rps.var_uploading_speed,
rps.min_uploading_speed,
rps.max_uploading_speed,
rps.var_downloading_speed,
rps.min_downloading_speed,
rps.max_downloading_speed,
rps.var_frames_per_second,
rps.stddev_frames_per_second,
rps.min_frames_per_second,
rps.avg_frames_per_second,
rps.max_upload_progress_uploaded_bytes,
rps.max_upload_progress_upload_percentage,
rps.got_stuck,
rps.got_stuck_at_zero,
rps.got_stuck_at_middle,
rps.got_stuck_at_end,
INITCAP(TRIM(REGEXP_REPLACE(s.audio_input_media_device_label, '\\(....:....\\)'))) AS audio_input_media_device_label,
INITCAP(TRIM(REGEXP_REPLACE(s.video_input_media_device_label, '\\(....:....\\)'))) AS video_input_media_device_label,
{{ star(ref('fct_record_media_error_pivoted'), relation_alias='rme', except=['archive_id'], prefix= 'media_error_') }},
ej.has_video_muted_error,
ej.has_storage_full AS has_storage_full_error,
r.is_first_recording,
r.auto_gain_control_enabled,
r.echo_cancelation_enabled,
r.noise_suppression_enabled,
r.persistence,
r.user_feature_flags_auto_adapt_encoder,
r.user_feature_flags_auto_adapt_resolution,
r.user_feature_flags_safe_media_stream,
r.user_feature_flags_snapshots,
r.user_feature_flags_web_recorder_raw_parts,
r.user_feature_flags_single_stream,
ss.session_id IS NOT NULL AS session_has_screen_shared,
s.studio_owner_id,
s.studio_owner_email,
rps.archive_id IS NULL AS has_no_associated_recording_progression_stream_event,
rps.archive_has_stream_w_cadence_problem AS has_stream_w_cadence_problem,
r.archive_id IS NULL AS has_no_associated_mongo_recording,
lcf.live_call_id IS NULL AS has_no_associated_live_call_data,
r.duration_in_seconds AS recording_duration_in_seconds,
r.shift_estimation,
r.git_version_tag,
HOUR(s.recording_started_timestamp) AS recording_started_hour_of_the_day,
ABS(TRUNCATE(s.expected_duration_in_seconds - r.duration_in_seconds, 0)) AS abs_duration_diff,
CASE
WHEN abs_duration_diff < 2 THEN 'A. <2 Seconds'
WHEN abs_duration_diff <= 5 THEN 'B. 2-5 Seconds'
WHEN abs_duration_diff <= 10 THEN 'C. 6-10 Seconds'
WHEN abs_duration_diff <= 30 THEN 'D. 11-30 Seconds'
WHEN abs_duration_diff <= 60 THEN 'E. 31-60 Seconds'
WHEN abs_duration_diff < 60*3 THEN 'F. 1-2 Minutes'
WHEN abs_duration_diff < 60*6 THEN 'G. 3-5 Minutes'
WHEN abs_duration_diff < 60*11 THEN 'H. 6-10 Minutes'
WHEN abs_duration_diff < 60*31 THEN 'I. 11-30 Minutes'
WHEN abs_duration_diff < 60*61 THEN 'J. 31-60 Minutes'
ELSE 'K. >60 Minutes'
END AS time_difference_bucket,
(abs_duration_diff > {{ fcs_criteria_in_seconds }}) AND NOT duration_is_zero AS file_cut_short,
BOOLOR_AGG((client_status = 'uploaded' OR client_status IS NULL) AND file_cut_short) OVER(PARTITION BY s.session_id) AS session_has_file_cut_short,
CASE
WHEN DAYOFWEEK(s.recording_started_timestamp) = 0 AND HOUR(s.recording_started_timestamp) BETWEEN 0 AND 6 THEN 'Sun 1'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 0 AND HOUR(s.recording_started_timestamp) BETWEEN 6 AND 12 THEN 'Sun 2'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 0 AND HOUR(s.recording_started_timestamp) BETWEEN 12 AND 18 THEN 'Sun 3'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 0 AND HOUR(s.recording_started_timestamp) BETWEEN 18 AND 24 THEN 'Sun 4'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 1 AND HOUR(s.recording_started_timestamp) BETWEEN 0 AND 6 THEN 'Mon 1'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 1 AND HOUR(s.recording_started_timestamp) BETWEEN 6 AND 12 THEN 'Mon 2'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 1 AND HOUR(s.recording_started_timestamp) BETWEEN 12 AND 18 THEN 'Mon 3'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 1 AND HOUR(s.recording_started_timestamp) BETWEEN 18 AND 24 THEN 'Mon 4'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 2 AND HOUR(s.recording_started_timestamp) BETWEEN 0 AND 6 THEN 'Tue 1'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 2 AND HOUR(s.recording_started_timestamp) BETWEEN 6 AND 12 THEN 'Tue 2'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 2 AND HOUR(s.recording_started_timestamp) BETWEEN 12 AND 18 THEN 'Tue 3'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 2 AND HOUR(s.recording_started_timestamp) BETWEEN 18 AND 24 THEN 'Tue 4'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 3 AND HOUR(s.recording_started_timestamp) BETWEEN 0 AND 6 THEN 'Wed 1'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 3 AND HOUR(s.recording_started_timestamp) BETWEEN 6 AND 12 THEN 'Wed 2'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 3 AND HOUR(s.recording_started_timestamp) BETWEEN 12 AND 18 THEN 'Wed 3'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 3 AND HOUR(s.recording_started_timestamp) BETWEEN 18 AND 24 THEN 'Wed 4'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 4 AND HOUR(s.recording_started_timestamp) BETWEEN 0 AND 6 THEN 'Thu 1'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 4 AND HOUR(s.recording_started_timestamp) BETWEEN 6 AND 12 THEN 'Thu 2'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 4 AND HOUR(s.recording_started_timestamp) BETWEEN 12 AND 18 THEN 'Thu 3'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 4 AND HOUR(s.recording_started_timestamp) BETWEEN 18 AND 24 THEN 'Thu 4'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 5 AND HOUR(s.recording_started_timestamp) BETWEEN 0 AND 6 THEN 'Fri 1'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 5 AND HOUR(s.recording_started_timestamp) BETWEEN 6 AND 12 THEN 'Fri 2'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 5 AND HOUR(s.recording_started_timestamp) BETWEEN 12 AND 18 THEN 'Fri 3'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 5 AND HOUR(s.recording_started_timestamp) BETWEEN 18 AND 24 THEN 'Fri 4'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 6 AND HOUR(s.recording_started_timestamp) BETWEEN 0 AND 6 THEN 'Sat 1'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 6 AND HOUR(s.recording_started_timestamp) BETWEEN 6 AND 12 THEN 'Sat 2'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 6 AND HOUR(s.recording_started_timestamp) BETWEEN 12 AND 18 THEN 'Sat 3'
WHEN DAYOFWEEK(s.recording_started_timestamp) = 6 AND HOUR(s.recording_started_timestamp) BETWEEN 18 AND 24 THEN 'Sat 4'
ELSE 'NA'
END AS day_and_part_of_the_day,
s.batch
FROM archives s
LEFT JOIN live_calls_features lcf
ON s.live_call_id = lcf.live_call_id
AND s.client_id = lcf.client_id
LEFT JOIN waiting_room wr
ON wr.archive_id = s.archive_id
LEFT JOIN rec_prog_stream rps
ON rps.archive_id = s.archive_id
LEFT JOIN record_media_error rme
ON rme.archive_id = s.archive_id
LEFT JOIN errors_joined ej
ON ej.archive_id = s.archive_id
LEFT JOIN recordings r
ON r.archive_id = s.archive_id
LEFT JOIN screen_share ss
ON ss.session_id = s.session_id
-- WHERE (r.client_status = 'uploaded'
-- OR r.client_status IS NULL)
)
SELECT *
FROM final
{{ deduplicate (key='archive_id',
time_column='recording_started_timestamp', criteria='first') }}