ervu-dashboard-etl/map_v1/predictive_metrics/data_collection.hpl

<?xml version="1.0" encoding="UTF-8"?>
<pipeline>
  <info>
    <name>data_collection</name>
    <name_sync_with_filename>Y</name_sync_with_filename>
    <description/>
    <extended_description/>
    <pipeline_version/>
    <pipeline_type>Normal</pipeline_type>
    <parameters>
    </parameters>
    <capture_transform_performance>N</capture_transform_performance>
    <transform_performance_capturing_delay>1000</transform_performance_capturing_delay>
    <transform_performance_capturing_size_limit>100</transform_performance_capturing_size_limit>
    <created_user>-</created_user>
    <created_date>2025/02/27 12:45:14.184</created_date>
    <modified_user>-</modified_user>
    <modified_date>2025/02/27 12:45:14.184</modified_date>
  </info>
  <notepads>
  </notepads>
  <order>
    <hop>
      <from>Table input</from>
      <to>Merge join</to>
      <enabled>Y</enabled>
    </hop>
    <hop>
      <from>Table input 2</from>
      <to>Merge join</to>
      <enabled>Y</enabled>
    </hop>
    <hop>
      <from>Merge join</from>
      <to>Table output</to>
      <enabled>Y</enabled>
    </hop>
  </order>
  <transform>
    <name>Merge join</name>
    <type>MergeJoin</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
    <partitioning>
      <method>none</method>
      <schema_name/>
    </partitioning>
    <join_type>INNER</join_type>
    <keys_1>
      <key>recruitment_id</key>
    </keys_1>
    <keys_2>
      <key>idm_id</key>
    </keys_2>
    <transform1>Table input</transform1>
    <transform2>Table input 2</transform2>
    <attributes/>
    <GUI>
      <xloc>768</xloc>
      <yloc>368</yloc>
    </GUI>
  </transform>
  <transform>
    <name>Table input</name>
    <type>TableInput</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
    <partitioning>
      <method>none</method>
      <schema_name/>
    </partitioning>
    <connection>postgres.person_registry</connection>
    <execute_each_row>N</execute_each_row>
    <limit>0</limit>
    <sql>WITH year_series AS (
  SELECT generate_series(
    date_trunc('year', CURRENT_DATE) - INTERVAL '54 years',
    date_trunc('year', CURRENT_DATE) - INTERVAL '1 year',
    INTERVAL '1 year'
  )::date AS year_date
)
SELECT
  COALESCE(r.target_recruitment_id, r.current_recruitment_id) as recruitment_id,
  ys.year_date AS year,
  COALESCE(COUNT(r.id) FILTER (
    WHERE r.military_registration_date >= ys.year_date
    AND r.military_registration_date &lt; ys.year_date + INTERVAL '1 year'
  ), 0) AS count_all,
  COALESCE(COUNT(r.id) FILTER (
    WHERE r.military_registration_date >= ys.year_date
    AND r.military_registration_date &lt; ys.year_date + INTERVAL '1 year'
    AND DATE_PART('year', AGE(r.military_registration_date, r.birth_date)) &lt; 18
  ), 0) AS reaching_17_age,
  COALESCE(COUNT(r.id) FILTER (
    WHERE r.military_registration_date >= ys.year_date
    AND r.military_registration_date &lt; ys.year_date + INTERVAL '1 year'
    AND r.gender = 'FEMALE'
  ), 0) AS women_military_specialty,
  'Department' as schema
FROM year_series ys
LEFT JOIN recruits r ON 1=1
GROUP BY ys.year_date, COALESCE(r.target_recruitment_id, r.current_recruitment_id)
ORDER BY recruitment_id, year DESC;</sql>
    <variables_active>N</variables_active>
    <attributes/>
    <GUI>
      <xloc>544</xloc>
      <yloc>304</yloc>
    </GUI>
  </transform>
  <transform>
    <name>Table input 2</name>
    <type>TableInput</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
    <partitioning>
      <method>none</method>
      <schema_name/>
    </partitioning>
    <connection>ervu-dashboard</connection>
    <execute_each_row>N</execute_each_row>
    <limit>0</limit>
    <sql>-- Берем только записи с максимальной версией для каждого idm_id
SELECT DISTINCT ON (idm_id) idm_id
FROM ervu_dashboard.recruitment
WHERE schema = 'Department'
ORDER BY idm_id, updated_at DESC
</sql>
    <variables_active>N</variables_active>
    <attributes/>
    <GUI>
      <xloc>544</xloc>
      <yloc>496</yloc>
    </GUI>
  </transform>
  <transform>
    <name>Table output</name>
    <type>TableOutput</type>
    <description/>
    <distribute>Y</distribute>
    <custom_distribution/>
    <copies>1</copies>
    <partitioning>
      <method>none</method>
      <schema_name/>
    </partitioning>
    <commit>1000</commit>
    <connection>ervu-dashboard</connection>
    <fields>
      <field>
        <column_name>recruitment_id</column_name>
        <stream_name>recruitment_id</stream_name>
      </field>
      <field>
        <column_name>recording_date</column_name>
        <stream_name>year</stream_name>
      </field>
      <field>
        <column_name>count_all</column_name>
        <stream_name>count_all</stream_name>
      </field>
      <field>
        <column_name>reaching_17_age</column_name>
        <stream_name>reaching_17_age</stream_name>
      </field>
      <field>
        <column_name>women_military_specialty</column_name>
        <stream_name>women_military_specialty</stream_name>
      </field>
      <field>
        <column_name>schema</column_name>
        <stream_name>schema</stream_name>
      </field>
    </fields>
    <ignore_errors>N</ignore_errors>
    <only_when_have_rows>N</only_when_have_rows>
    <partitioning_daily>N</partitioning_daily>
    <partitioning_enabled>N</partitioning_enabled>
    <partitioning_monthly>Y</partitioning_monthly>
    <return_keys>N</return_keys>
    <schema>forecast</schema>
    <specify_fields>Y</specify_fields>
    <table>registered_citizens</table>
    <tablename_in_field>N</tablename_in_field>
    <tablename_in_table>Y</tablename_in_table>
    <truncate>Y</truncate>
    <use_batch>Y</use_batch>
    <attributes/>
    <GUI>
      <xloc>960</xloc>
      <yloc>368</yloc>
    </GUI>
  </transform>
  <transform_error_handling>
  </transform_error_handling>
  <attributes/>
</pipeline>