Regex match percent
The query for this check calculates the percentage of matching data with given regex from user. It performed by using SQL function:
- REGEXP_CONTAINS() in bigquery.
- REGEXP_LIKE() in snowflake.
These functions return true if data matching with given regex, else functions return false.
Successfully classified records are assigned value of 1, and any other values, 0. Those values are then summed
(so the counting of valid values is effectively performed), divided by the number of records, and multiplicated by
a 100.0 so that the results is in percent.
Jinja Template
{% import '/dialects/bigquery.sql.jinja2' as lib with context -%}
SELECT CASE
WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
ELSE 100.0 * SUM(
CASE
WHEN REGEXP_CONTAINS(
{{ lib.render_target_column('analyzed_table') }},
{{ lib.render_regex(parameters.named_regex, parameters.custom_regex) }}
) IS TRUE THEN 1
ELSE 0
END
) / COUNT({{ lib.render_target_column('analyzed_table') }})
END AS actual_value
{{- lib.render_dimension_projections('analyzed_table') }}
{{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table() }} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}
{% import '/dialects/snowflake.sql.jinja2' as lib with context -%}
SELECT
CASE
WHEN COUNT({{ lib.render_target_column('analyzed_table') }}) = 0 THEN NULL
ELSE 100.0 * SUM(
CASE
WHEN REGEXP_LIKE({{lib.render_target_column('analyzed_table')}}, {{ lib.render_regex(parameters.named_regex, parameters.custom_regex) }}) THEN 1 ELSE 0 END) / COUNT({{ lib.render_target_column('analyzed_table') }})
END AS actual_value
{{- lib.render_dimension_projections('analyzed_table') }}
{{- lib.render_time_dimension_projection('analyzed_table') }}
FROM {{ lib.render_target_table()}} AS analyzed_table
{{- lib.render_where_clause() -}}
{{- lib.render_group_by() -}}
{{- lib.render_order_by() -}}