Merge branch 'develop' of 10.10.31.70:/ervu-dashboard-etl into develop

This commit is contained in:
r.gaztdinov 2025-06-17 13:03:25 +03:00
commit 27fd94ddcb
3 changed files with 608 additions and 0 deletions

View file

@ -0,0 +1,272 @@
<?xml version="1.0" encoding="UTF-8"?>
<pipeline>
<info>
<name>main_profile</name>
<name_sync_with_filename>Y</name_sync_with_filename>
<description/>
<extended_description/>
<pipeline_version/>
<pipeline_type>Normal</pipeline_type>
<parameters>
</parameters>
<capture_transform_performance>N</capture_transform_performance>
<transform_performance_capturing_delay>1000</transform_performance_capturing_delay>
<transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit>
<created_user>-</created_user>
<created_date>2025/06/17 10:17:01.438</created_date>
<modified_user>-</modified_user>
<modified_date>2025/06/17 10:17:01.438</modified_date>
</info>
<notepads>
</notepads>
<order>
<hop>
<from>Table input main profile</from>
<to>Table output</to>
<enabled>N</enabled>
</hop>
<hop>
<from>Table input main profile</from>
<to>Value mapper main profile</to>
<enabled>Y</enabled>
</hop>
<hop>
<from>Value mapper main profile</from>
<to>Insert / update main profile</to>
<enabled>Y</enabled>
</hop>
</order>
<transform>
<name>Table input main profile</name>
<type>TableInput</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<connection>ervu-dashboard</connection>
<execute_each_row>N</execute_each_row>
<limit>0</limit>
<sql>SELECT
c.recruitment_id,
ROUND(AVG(DATE_PART('year', AGE(CURRENT_DATE, c.birth_date)))::numeric, 0) AS avg_age,
MODE() WITHIN GROUP (ORDER BY c.gender) AS most_common_gender,
MODE() WITHIN GROUP (ORDER BY c.employment) AS most_common_employment,
ROUND(AVG(COALESCE(child_counts.child_count, 0))::numeric, 0) AS avg_children,
MODE() WITHIN GROUP (ORDER BY education_groups.highest_group) AS most_common_education_group,
current_date recording_date,
'Department' as schema
FROM citizen c
LEFT JOIN (
SELECT
recruit_id,
COUNT(*) AS child_count
FROM citizen_child
WHERE death_date IS NULL
AND DATE_PART('year', AGE(CURRENT_DATE, birth_date)) &lt; 18
GROUP BY recruit_id
) child_counts ON c.recruit_id = child_counts.recruit_id
LEFT JOIN (
SELECT
recruit_id,
MIN(CASE
WHEN education_level_code IN ('1', '2', '3', '4', '5') THEN 1 -- Высшее
WHEN education_level_code IN ('10', '11') THEN 2 -- Среднее проф.
WHEN education_level_code IN ('7', '9') THEN 3 -- Общее
ELSE 4 -- Прочее/неизвестное
END) AS highest_group
FROM citizen_foreign_education_diploma
GROUP BY recruit_id
) education_groups ON c.recruit_id = education_groups.recruit_id
--join appeals.appeals_list al on c.system_esia_id = al.system_esia_id
GROUP BY c.recruitment_id
HAVING c.recruitment_id IS NOT NULL;</sql>
<variables_active>N</variables_active>
<attributes/>
<GUI>
<xloc>304</xloc>
<yloc>256</yloc>
</GUI>
</transform>
<transform>
<name>Table output</name>
<type>TableOutput</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<commit>1000</commit>
<connection>ervu-dashboard</connection>
<fields>
<field>
<column_name>gender</column_name>
<stream_name>most_common_gender</stream_name>
</field>
<field>
<column_name>age</column_name>
<stream_name>avg_age</stream_name>
</field>
<field>
<column_name>education</column_name>
<stream_name>most_common_education_group</stream_name>
</field>
<field>
<column_name>employment</column_name>
<stream_name>most_common_employment</stream_name>
</field>
<field>
<column_name>child_min_18</column_name>
<stream_name>avg_children</stream_name>
</field>
<field>
<column_name>schema</column_name>
<stream_name>schema</stream_name>
</field>
<field>
<column_name>recruitment_id</column_name>
<stream_name>recruitment_id</stream_name>
</field>
</fields>
<ignore_errors>N</ignore_errors>
<only_when_have_rows>N</only_when_have_rows>
<partitioning_daily>N</partitioning_daily>
<partitioning_enabled>N</partitioning_enabled>
<partitioning_field/>
<partitioning_monthly>Y</partitioning_monthly>
<return_field/>
<return_keys>N</return_keys>
<schema>appeals</schema>
<specify_fields>Y</specify_fields>
<table>main_profile</table>
<tablename_field/>
<tablename_in_field>N</tablename_in_field>
<tablename_in_table>Y</tablename_in_table>
<truncate>N</truncate>
<use_batch>Y</use_batch>
<attributes/>
<GUI>
<xloc>560</xloc>
<yloc>400</yloc>
</GUI>
</transform>
<transform>
<name>Insert / update main profile</name>
<type>InsertUpdate</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<commit>100</commit>
<connection>ervu-dashboard</connection>
<lookup>
<key>
<condition>=</condition>
<field>recruitment_id</field>
<name>recruitment_id</name>
<name2/>
</key>
<key>
<condition>=</condition>
<field>recording_date</field>
<name>recording_date</name>
<name2/>
</key>
<schema>appeals</schema>
<table>main_profile</table>
<value>
<name>age</name>
<rename>avg_age</rename>
<update>Y</update>
</value>
<value>
<name>child_min_18</name>
<rename>avg_children</rename>
<update>Y</update>
</value>
<value>
<name>education</name>
<rename>most_common_education_group</rename>
<update>Y</update>
</value>
<value>
<name>employment</name>
<rename>most_common_employment</rename>
<update>Y</update>
</value>
<value>
<name>gender</name>
<rename>most_common_gender</rename>
<update>Y</update>
</value>
<value>
<name>recording_date</name>
<rename>recording_date</rename>
<update>Y</update>
</value>
<value>
<name>recruitment_id</name>
<rename>recruitment_id</rename>
<update>Y</update>
</value>
<value>
<name>schema</name>
<rename>schema</rename>
<update>Y</update>
</value>
</lookup>
<update_bypassed>N</update_bypassed>
<attributes/>
<GUI>
<xloc>800</xloc>
<yloc>256</yloc>
</GUI>
</transform>
<transform>
<name>Value mapper main profile</name>
<type>ValueMapper</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<field_to_use>most_common_education_group</field_to_use>
<fields>
<field>
<source_value>1</source_value>
<target_value>Высшее</target_value>
</field>
<field>
<source_value>2</source_value>
<target_value>Среднее проф.</target_value>
</field>
<field>
<source_value>3</source_value>
<target_value>Общее</target_value>
</field>
</fields>
<non_match_default/>
<target_field/>
<attributes/>
<GUI>
<xloc>528</xloc>
<yloc>256</yloc>
</GUI>
</transform>
<transform_error_handling>
</transform_error_handling>
<attributes/>
</pipeline>

View file

@ -0,0 +1,126 @@
<?xml version="1.0" encoding="UTF-8"?>
<workflow>
<name>main_profile</name>
<name_sync_with_filename>Y</name_sync_with_filename>
<description/>
<extended_description/>
<workflow_version/>
<created_user>-</created_user>
<created_date>2025/06/17 10:16:58.535</created_date>
<modified_user>-</modified_user>
<modified_date>2025/06/17 10:16:58.535</modified_date>
<parameters>
</parameters>
<actions>
<action>
<name>Start</name>
<description/>
<type>SPECIAL</type>
<attributes/>
<DayOfMonth>1</DayOfMonth>
<hour>12</hour>
<intervalMinutes>60</intervalMinutes>
<intervalSeconds>0</intervalSeconds>
<minutes>0</minutes>
<repeat>N</repeat>
<schedulerType>0</schedulerType>
<weekDay>1</weekDay>
<parallel>N</parallel>
<xloc>192</xloc>
<yloc>368</yloc>
<attributes_hac/>
</action>
<action>
<name>main_profile.hpl</name>
<description/>
<type>PIPELINE</type>
<attributes/>
<add_date>N</add_date>
<add_time>N</add_time>
<clear_files>N</clear_files>
<clear_rows>N</clear_rows>
<create_parent_folder>N</create_parent_folder>
<exec_per_row>N</exec_per_row>
<filename>${PROJECT_HOME}/info_recruits/main_profile/main_profile.hpl</filename>
<logext/>
<logfile/>
<loglevel>Basic</loglevel>
<parameters>
<pass_all_parameters>Y</pass_all_parameters>
</parameters>
<params_from_previous>N</params_from_previous>
<run_configuration>local</run_configuration>
<set_append_logfile>N</set_append_logfile>
<set_logfile>N</set_logfile>
<wait_until_finished>Y</wait_until_finished>
<parallel>N</parallel>
<xloc>336</xloc>
<yloc>368</yloc>
<attributes_hac/>
</action>
<action>
<name>nlvl_main_profile.hpl</name>
<description/>
<type>PIPELINE</type>
<attributes/>
<add_date>N</add_date>
<add_time>N</add_time>
<clear_files>N</clear_files>
<clear_rows>N</clear_rows>
<create_parent_folder>N</create_parent_folder>
<exec_per_row>N</exec_per_row>
<filename>${PROJECT_HOME}/info_recruits/main_profile/nlvl_main_profile.hpl</filename>
<logext/>
<logfile/>
<loglevel>Basic</loglevel>
<parameters>
<pass_all_parameters>Y</pass_all_parameters>
</parameters>
<params_from_previous>N</params_from_previous>
<run_configuration>local</run_configuration>
<set_append_logfile>N</set_append_logfile>
<set_logfile>N</set_logfile>
<wait_until_finished>Y</wait_until_finished>
<parallel>N</parallel>
<xloc>496</xloc>
<yloc>368</yloc>
<attributes_hac/>
</action>
<action>
<name>Success</name>
<description/>
<type>SUCCESS</type>
<attributes/>
<parallel>N</parallel>
<xloc>656</xloc>
<yloc>368</yloc>
<attributes_hac/>
</action>
</actions>
<hops>
<hop>
<from>Start</from>
<to>main_profile.hpl</to>
<enabled>Y</enabled>
<evaluation>Y</evaluation>
<unconditional>Y</unconditional>
</hop>
<hop>
<from>main_profile.hpl</from>
<to>nlvl_main_profile.hpl</to>
<enabled>Y</enabled>
<evaluation>Y</evaluation>
<unconditional>N</unconditional>
</hop>
<hop>
<from>nlvl_main_profile.hpl</from>
<to>Success</to>
<enabled>Y</enabled>
<evaluation>Y</evaluation>
<unconditional>N</unconditional>
</hop>
</hops>
<notepads>
</notepads>
<attributes/>
</workflow>

View file

@ -0,0 +1,210 @@
<?xml version="1.0" encoding="UTF-8"?>
<pipeline>
<info>
<name>nlvl_main_profile</name>
<name_sync_with_filename>Y</name_sync_with_filename>
<description/>
<extended_description/>
<pipeline_version/>
<pipeline_type>Normal</pipeline_type>
<parameters>
</parameters>
<capture_transform_performance>N</capture_transform_performance>
<transform_performance_capturing_delay>1000</transform_performance_capturing_delay>
<transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit>
<created_user>-</created_user>
<created_date>2025/06/17 10:49:22.576</created_date>
<modified_user>-</modified_user>
<modified_date>2025/06/17 10:49:22.576</modified_date>
</info>
<notepads>
</notepads>
<order>
<hop>
<from>Table input nlvl main profile</from>
<to>Insert / update nlvl main profile</to>
<enabled>Y</enabled>
</hop>
</order>
<transform>
<name>Table input nlvl main profile</name>
<type>TableInput</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<connection>ervu-dashboard</connection>
<execute_each_row>N</execute_each_row>
<limit>0</limit>
<sql>--- next_level_org
WITH latest_hierarchy AS (
-- Берем только записи с максимальной версией для каждого idm_id
SELECT DISTINCT ON (idm_id) *
FROM ervu_dashboard.recruitment
ORDER BY idm_id
),
latest_date AS (
-- Определяем последнюю дату загрузки
SELECT MAX(recording_date) AS max_date FROM appeals.main_profile
),
filtered_records AS (
-- Фильтруем данные, оставляя только записи с последней датой загрузки
SELECT id_main_profile, gender, age, child_min_18, education, employment, recording_date, recruitment_id, "schema" FROM appeals.main_profile
WHERE recording_date = (SELECT max_date FROM latest_date)
),
hierarchy_cte AS (
-- Строим связи между уровнями (Department → Organization → Region → Ministry)
SELECT
h.idm_id AS department_id,
h.parent_id AS organization_id,
h2.parent_id AS region_id,
h3.parent_id AS ministry_id
FROM latest_hierarchy h
LEFT JOIN latest_hierarchy h2 ON h.parent_id = h2.idm_id -- Organization
LEFT JOIN latest_hierarchy h3 ON h2.parent_id = h3.idm_id -- Region
WHERE h.schema = 'Department' -- Начинаем с Department
),
aggregated_counts AS (
-- Агрегируем для Organization
SELECT
h.organization_id AS level_id,
ROUND(AVG(age)::numeric, 0) avg_age,
MODE() WITHIN GROUP (ORDER BY gender) AS most_common_gender,
MODE() WITHIN GROUP (ORDER BY employment) AS most_common_employment,
ROUND(AVG(child_min_18)::numeric, 0) AS avg_child_min_18,
MODE() WITHIN GROUP (ORDER BY education) AS most_common_education,
MAX(r.recording_date) as recording_date,
'Organization' AS level
FROM filtered_records r
JOIN hierarchy_cte h ON r.recruitment_id = h.department_id
WHERE h.organization_id IS NOT NULL
GROUP BY h.organization_id
UNION ALL
-- Агрегируем для Region
SELECT
h.region_id AS level_id,
ROUND(AVG(age)::numeric, 0) avg_age,
MODE() WITHIN GROUP (ORDER BY gender) AS most_common_gender,
MODE() WITHIN GROUP (ORDER BY employment) AS most_common_employment,
ROUND(AVG(child_min_18)::numeric, 0) AS avg_child_min_18,
MODE() WITHIN GROUP (ORDER BY education) AS most_common_education,
MAX(r.recording_date) as recording_date,
'Region' AS level
FROM filtered_records r
JOIN hierarchy_cte h ON r.recruitment_id = h.department_id
WHERE h.region_id IS NOT NULL
GROUP BY h.region_id
UNION ALL
-- Агрегируем для Ministry
SELECT
h.ministry_id AS level_id,
ROUND(AVG(age)::numeric, 0) avg_age,
MODE() WITHIN GROUP (ORDER BY gender) AS most_common_gender,
MODE() WITHIN GROUP (ORDER BY employment) AS most_common_employment,
ROUND(AVG(child_min_18)::numeric, 0) AS avg_child_min_18,
MODE() WITHIN GROUP (ORDER BY education) AS most_common_education,
MAX(r.recording_date) as recording_date,
'Ministry' AS level
FROM filtered_records r
JOIN hierarchy_cte h ON r.recruitment_id = h.department_id
WHERE h.ministry_id IS NOT NULL
GROUP BY h.ministry_id
)
SELECT * FROM aggregated_counts;
</sql>
<variables_active>N</variables_active>
<attributes/>
<GUI>
<xloc>608</xloc>
<yloc>352</yloc>
</GUI>
</transform>
<transform>
<name>Insert / update nlvl main profile</name>
<type>InsertUpdate</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<commit>100</commit>
<connection>ervu-dashboard</connection>
<lookup>
<key>
<condition>=</condition>
<field>recruitment_id</field>
<name>level_id</name>
<name2/>
</key>
<key>
<condition>=</condition>
<field>recording_date</field>
<name>recording_date</name>
<name2/>
</key>
<schema>appeals</schema>
<table>main_profile</table>
<value>
<name>age</name>
<rename>avg_age</rename>
<update>Y</update>
</value>
<value>
<name>child_min_18</name>
<rename>avg_child_min_18</rename>
<update>Y</update>
</value>
<value>
<name>education</name>
<rename>most_common_education</rename>
<update>Y</update>
</value>
<value>
<name>employment</name>
<rename>most_common_employment</rename>
<update>Y</update>
</value>
<value>
<name>gender</name>
<rename>most_common_gender</rename>
<update>Y</update>
</value>
<value>
<name>recording_date</name>
<rename>recording_date</rename>
<update>Y</update>
</value>
<value>
<name>schema</name>
<rename>level</rename>
<update>Y</update>
</value>
<value>
<name>recruitment_id</name>
<rename>level_id</rename>
<update>Y</update>
</value>
</lookup>
<update_bypassed>N</update_bypassed>
<attributes/>
<GUI>
<xloc>784</xloc>
<yloc>368</yloc>
</GUI>
</transform>
<transform_error_handling>
</transform_error_handling>
<attributes/>
</pipeline>