predictive_data Y Normal N 1000 100 - 2025/02/27 13:48:40.269 - 2025/02/27 13:48:40.269 Table input Table output Y Table input TableInput Y 1 none ervu-dashboard N 0 -- Адаптированный запрос для таблицы forecast.registered_citizens с учетом указанной структуры -- recording_date - год наблюдения -- recruitment_id - идентификатор военкомата -- count_all - всего на учете -- reaching_17_age - поставлено на учет по достижении 17 лет -- women_military_specialty - женщины, получившие ВУС -- schema - уровень военкомата -- 1. Проверяем наличие данных за весь необходимый период и создаем полную сетку лет WITH RECURSIVE years AS ( -- Теперь синтаксис правильный SELECT EXTRACT(YEAR FROM CURRENT_DATE)::int AS year, 'Текущий год' AS prediction_type UNION ALL SELECT year + 1, 'Прогнозный год' FROM years WHERE year < EXTRACT(YEAR FROM CURRENT_DATE)::int + 49 -- +49, т.к. первый год уже включён ), year_range AS ( SELECT EXTRACT(YEAR FROM CURRENT_DATE) - 54 AS min_year, -- Для первых двух показателей EXTRACT(YEAR FROM CURRENT_DATE) - 34 AS min_year_females, -- Для женщин с ВУС EXTRACT(YEAR FROM CURRENT_DATE) AS max_year ), all_years AS ( SELECT generate_series(min_year, max_year) AS year FROM year_range ), all_years_females AS ( SELECT generate_series(min_year_females, max_year) AS year FROM year_range ), all_regions AS ( SELECT DISTINCT recruitment_id, schema FROM forecast.registered_citizens ), -- 2. Создаем полную сетку регион-год для всех показателей full_grid_males AS ( SELECT r.recruitment_id, r.schema, y.year FROM all_regions r CROSS JOIN all_years y ), full_grid_females AS ( SELECT r.recruitment_id, r.schema, y.year FROM all_regions r CROSS JOIN all_years_females y ), -- 3. Объединяем сетку с имеющимися данными, чтобы выявить пропуски males_data AS ( SELECT g.recruitment_id, g.schema, g.year, COALESCE(h.count_all, NULL) AS count_all, COALESCE(h.reaching_17_age, NULL) AS reaching_17_age FROM full_grid_males g LEFT JOIN forecast.registered_citizens h ON g.recruitment_id = h.recruitment_id AND g.year = EXTRACT(YEAR FROM h.recording_date) ), females_data AS ( SELECT g.recruitment_id, g.schema, g.year, COALESCE(h.women_military_specialty, NULL) AS women_military_specialty FROM full_grid_females g LEFT JOIN forecast.registered_citizens h ON g.recruitment_id = h.recruitment_id AND g.year = EXTRACT(YEAR FROM h.recording_date) ), -- 4. Рассчитываем статистики для выявления выбросов males_stats AS ( SELECT recruitment_id, schema, AVG(count_all) AS avg_count_all, STDDEV(count_all) AS stddev_count_all, -- σ для фильтрации выбросов AVG(reaching_17_age) AS avg_reaching_17_age, STDDEV(reaching_17_age) AS stddev_reaching_17_age -- σ для фильтрации выбросов FROM males_data WHERE count_all IS NOT NULL AND reaching_17_age IS NOT NULL GROUP BY recruitment_id, schema ), females_stats AS ( SELECT recruitment_id, schema, AVG(women_military_specialty) AS avg_women_military_specialty, STDDEV(women_military_specialty) AS stddev_women_military_specialty -- σ для фильтрации выбросов FROM females_data WHERE women_military_specialty IS NOT NULL GROUP BY recruitment_id, schema ), -- 5. Выявляем выбросы и помечаем их для интерполяции males_outliers AS ( SELECT m.recruitment_id, m.schema, m.year, m.count_all, CASE WHEN m.count_all IS NULL OR ABS(m.count_all - s.avg_count_all) > s.stddev_count_all THEN TRUE ELSE FALSE END AS count_all_is_outlier, m.reaching_17_age, CASE WHEN m.reaching_17_age IS NULL OR ABS(m.reaching_17_age - s.avg_reaching_17_age) > s.stddev_reaching_17_age THEN TRUE ELSE FALSE END AS reaching_17_age_is_outlier FROM males_data m JOIN males_stats s ON m.recruitment_id = s.recruitment_id AND m.schema = s.schema ), females_outliers AS ( SELECT f.recruitment_id, f.schema, f.year, f.women_military_specialty, CASE WHEN f.women_military_specialty IS NULL OR ABS(f.women_military_specialty - s.avg_women_military_specialty) > s.stddev_women_military_specialty THEN TRUE ELSE FALSE END AS women_military_specialty_is_outlier FROM females_data f JOIN females_stats s ON f.recruitment_id = s.recruitment_id AND f.schema = s.schema ), -- 6. Подготовка данных для интерполяции (находим ближайшие НЕ выбросы) males_interpolation_prep AS ( SELECT mo.recruitment_id, mo.schema, mo.year, mo.count_all, mo.count_all_is_outlier, mo.reaching_17_age, mo.reaching_17_age_is_outlier, -- Для интерполяции "Всего на учете" ищем ближайшие не-выбросы (SELECT MAX(year) FROM males_outliers WHERE recruitment_id = mo.recruitment_id AND schema = mo.schema AND year < mo.year AND NOT count_all_is_outlier) AS prev_year_count_all, (SELECT MIN(year) FROM males_outliers WHERE recruitment_id = mo.recruitment_id AND schema = mo.schema AND year > mo.year AND NOT count_all_is_outlier) AS next_year_count_all, -- Для интерполяции "Поставлено на учет по достижении 17 лет" ищем ближайшие не-выбросы (SELECT MAX(year) FROM males_outliers WHERE recruitment_id = mo.recruitment_id AND schema = mo.schema AND year < mo.year AND NOT reaching_17_age_is_outlier) AS prev_year_reaching_17_age, (SELECT MIN(year) FROM males_outliers WHERE recruitment_id = mo.recruitment_id AND schema = mo.schema AND year > mo.year AND NOT reaching_17_age_is_outlier) AS next_year_reaching_17_age FROM males_outliers mo ), females_interpolation_prep AS ( SELECT fo.recruitment_id, fo.schema, fo.year, fo.women_military_specialty, fo.women_military_specialty_is_outlier, -- Для интерполяции "Женщины, получившие ВУС" ищем ближайшие не-выбросы (SELECT MAX(year) FROM females_outliers WHERE recruitment_id = fo.recruitment_id AND schema = fo.schema AND year < fo.year AND NOT women_military_specialty_is_outlier) AS prev_year_women_military_specialty, (SELECT MIN(year) FROM females_outliers WHERE recruitment_id = fo.recruitment_id AND schema = fo.schema AND year > fo.year AND NOT women_military_specialty_is_outlier) AS next_year_women_military_specialty FROM females_outliers fo ), -- 7. Выполняем линейную интерполяцию для выбросов и отсутствующих значений males_interpolated AS ( SELECT m.recruitment_id, m.schema, m.year, CASE WHEN m.count_all_is_outlier AND m.prev_year_count_all IS NOT NULL AND m.next_year_count_all IS NOT NULL THEN -- Linear interpolation formula: y = y1 + ((x - x1) / (x2 - x1)) * (y2 - y1) (SELECT p.count_all FROM males_outliers p WHERE p.recruitment_id = m.recruitment_id AND p.schema = m.schema AND p.year = m.prev_year_count_all) + ((m.year - m.prev_year_count_all)::float / (m.next_year_count_all - m.prev_year_count_all)) * ((SELECT n.count_all FROM males_outliers n WHERE n.recruitment_id = m.recruitment_id AND n.schema = m.schema AND n.year = m.next_year_count_all) - (SELECT p.count_all FROM males_outliers p WHERE p.recruitment_id = m.recruitment_id AND p.schema = m.schema AND p.year = m.prev_year_count_all)) ELSE m.count_all END AS count_all, CASE WHEN m.reaching_17_age_is_outlier AND m.prev_year_reaching_17_age IS NOT NULL AND m.next_year_reaching_17_age IS NOT NULL THEN -- Linear interpolation formula: y = y1 + ((x - x1) / (x2 - x1)) * (y2 - y1) (SELECT p.reaching_17_age FROM males_outliers p WHERE p.recruitment_id = m.recruitment_id AND p.schema = m.schema AND p.year = m.prev_year_reaching_17_age) + ((m.year - m.prev_year_reaching_17_age)::float / (m.next_year_reaching_17_age - m.prev_year_reaching_17_age)) * ((SELECT n.reaching_17_age FROM males_outliers n WHERE n.recruitment_id = m.recruitment_id AND n.schema = m.schema AND n.year = m.next_year_reaching_17_age) - (SELECT p.reaching_17_age FROM males_outliers p WHERE p.recruitment_id = m.recruitment_id AND p.schema = m.schema AND p.year = m.prev_year_reaching_17_age)) ELSE m.reaching_17_age END AS reaching_17_age FROM males_interpolation_prep m ), females_interpolated AS ( SELECT f.recruitment_id, f.schema, f.year, CASE WHEN f.women_military_specialty_is_outlier AND f.prev_year_women_military_specialty IS NOT NULL AND f.next_year_women_military_specialty IS NOT NULL THEN -- Linear interpolation formula: y = y1 + ((x - x1) / (x2 - x1)) * (y2 - y1) (SELECT p.women_military_specialty FROM females_outliers p WHERE p.recruitment_id = f.recruitment_id AND p.schema = f.schema AND p.year = f.prev_year_women_military_specialty) + ((f.year - f.prev_year_women_military_specialty)::float / (f.next_year_women_military_specialty - f.prev_year_women_military_specialty)) * ((SELECT n.women_military_specialty FROM females_outliers n WHERE n.recruitment_id = f.recruitment_id AND n.schema = f.schema AND n.year = f.next_year_women_military_specialty) - (SELECT p.women_military_specialty FROM females_outliers p WHERE p.recruitment_id = f.recruitment_id AND p.schema = f.schema AND p.year = f.prev_year_women_military_specialty)) ELSE f.women_military_specialty END AS women_military_specialty FROM females_interpolation_prep f ), -- 8. Объединяем интерполированные данные combined_data AS ( SELECT m.recruitment_id, m.schema, m.year, m.count_all, m.reaching_17_age, f.women_military_specialty FROM males_interpolated m LEFT JOIN females_interpolated f ON m.recruitment_id = f.recruitment_id AND m.schema = f.schema AND m.year = f.year ), -- 9. Рассчитываем параметры линейной регрессии average_values AS ( SELECT recruitment_id, schema, year, count_all, reaching_17_age, women_military_specialty, -- Вычисляем средние значения отдельно AVG(year) FILTER (WHERE count_all IS NOT NULL) OVER (PARTITION BY recruitment_id, schema) AS avg_x_count_all, AVG(count_all) FILTER (WHERE count_all IS NOT NULL) OVER (PARTITION BY recruitment_id, schema) AS avg_y_count_all, AVG(year) FILTER (WHERE reaching_17_age IS NOT NULL) OVER (PARTITION BY recruitment_id, schema) AS avg_x_reaching_17_age, AVG(reaching_17_age) FILTER (WHERE reaching_17_age IS NOT NULL) OVER (PARTITION BY recruitment_id, schema) AS avg_y_reaching_17_age, AVG(year) FILTER (WHERE women_military_specialty IS NOT NULL) OVER (PARTITION BY recruitment_id, schema) AS avg_x_women_military_specialty, AVG(women_military_specialty) FILTER (WHERE women_military_specialty IS NOT NULL) OVER (PARTITION BY recruitment_id, schema) AS avg_y_women_military_specialty FROM combined_data ), regression_stats AS ( SELECT recruitment_id, schema, -- count_all COUNT(*) FILTER (WHERE count_all IS NOT NULL) AS n_count_all, AVG(year) FILTER (WHERE count_all IS NOT NULL) AS avg_x_count_all, AVG(count_all) FILTER (WHERE count_all IS NOT NULL) AS avg_y_count_all, SUM((year - avg_x_count_all) * (count_all - avg_y_count_all)) FILTER (WHERE count_all IS NOT NULL) AS sum_xy_count_all, SUM(POWER(year - avg_x_count_all, 2)) FILTER (WHERE count_all IS NOT NULL) AS sum_x_squared_count_all, -- reaching_17_age COUNT(*) FILTER (WHERE reaching_17_age IS NOT NULL) AS n_reaching_17_age, AVG(year) FILTER (WHERE reaching_17_age IS NOT NULL) AS avg_x_reaching_17_age, AVG(reaching_17_age) FILTER (WHERE reaching_17_age IS NOT NULL) AS avg_y_reaching_17_age, SUM((year - avg_x_reaching_17_age) * (reaching_17_age - avg_y_reaching_17_age)) FILTER (WHERE reaching_17_age IS NOT NULL) AS sum_xy_reaching_17_age, SUM(POWER(year - avg_x_reaching_17_age, 2)) FILTER (WHERE reaching_17_age IS NOT NULL) AS sum_x_squared_reaching_17_age, -- women_military_specialty COUNT(*) FILTER (WHERE women_military_specialty IS NOT NULL) AS n_women_military_specialty, AVG(year) FILTER (WHERE women_military_specialty IS NOT NULL) AS avg_x_women_military_specialty, AVG(women_military_specialty) FILTER (WHERE women_military_specialty IS NOT NULL) AS avg_y_women_military_specialty, SUM((year - avg_x_women_military_specialty) * (women_military_specialty - avg_y_women_military_specialty)) FILTER (WHERE women_military_specialty IS NOT NULL) AS sum_xy_women_military_specialty, SUM(POWER(year - avg_x_women_military_specialty, 2)) FILTER (WHERE women_military_specialty IS NOT NULL) AS sum_x_squared_women_military_specialty FROM average_values GROUP BY recruitment_id, schema ), -- 10. Вычисляем параметры уравнения линейной регрессии regression_params AS ( SELECT recruitment_id, schema, -- Параметры для count_all CASE WHEN sum_x_squared_count_all <> 0 THEN sum_xy_count_all / sum_x_squared_count_all ELSE 0 END AS k_count_all, avg_y_count_all - (CASE WHEN sum_x_squared_count_all <> 0 THEN sum_xy_count_all / sum_x_squared_count_all ELSE 0 END) * avg_x_count_all AS b_count_all, -- Параметры для reaching_17_age CASE WHEN sum_x_squared_reaching_17_age <> 0 THEN sum_xy_reaching_17_age / sum_x_squared_reaching_17_age ELSE 0 END AS k_reaching_17_age, avg_y_reaching_17_age - (CASE WHEN sum_x_squared_reaching_17_age <> 0 THEN sum_xy_reaching_17_age / sum_x_squared_reaching_17_age ELSE 0 END) * avg_x_reaching_17_age AS b_reaching_17_age, -- Параметры для women_military_specialty CASE WHEN sum_x_squared_women_military_specialty <> 0 THEN sum_xy_women_military_specialty / sum_x_squared_women_military_specialty ELSE 0 END AS k_women_military_specialty, avg_y_women_military_specialty - (CASE WHEN sum_x_squared_women_military_specialty <> 0 THEN sum_xy_women_military_specialty / sum_x_squared_women_military_specialty ELSE 0 END) * avg_x_women_military_specialty AS b_women_military_specialty FROM regression_stats ) -- 11. Рассчитываем прогнозные значения на текущий и следующий годы SELECT r.recruitment_id, r.schema, MAKE_DATE(y.year, 1, 1) AS prediction_year, ROUND(r.k_count_all * y.year + r.b_count_all) AS count_all, ROUND(r.k_reaching_17_age * y.year + r.b_reaching_17_age) AS reaching_17_age, ROUND(r.k_women_military_specialty * y.year + r.b_women_military_specialty) AS women_military_specialty, y.prediction_type FROM years y CROSS JOIN regression_params r ORDER BY r.schema, r.recruitment_id, prediction_year; N 720 304 Table output TableOutput Y 1 none 1000 ervu-dashboard recruitment_id recruitment_id schema schema recording_date prediction_year count_all count_all reaching_17_age reaching_17_age women_military_specialty women_military_specialty N N N N Y N forecast Y registered_citizens
N Y N Y 928 304