We are running Greenplum 6.28.1, which is built on PostgreSQL 9.4.26. We have a master node and 6 server nodes, all running Oracle Linux release 8.10.
We have 5 queries that progressively build temp tables that are use to generate a final result set. Those queries, in the order they are run, are:
drop table if exists hourly_time_windows;
create temp table hourly_time_windows as (
WITH
time_windows as (
select
(generate_series(0, 23, 1)::text || ' Hours'::text)::interval as window_start_interval,
(generate_series(1, 24, 1)::text || ' Hours'::text)::interval as window_end_interval
),
dates_and_users as (
select distinct
activity_date,
career_account
from network_logs_processed.hourly_internet_activity_building log
where activity_date >= '2024-02-01'::date and activity_date <= '2024-02-28'::date
)
select
activity_date,
career_account,
activity_date + window_start_interval as window_start,
activity_date + window_end_interval as window_end
from
dates_and_users, time_windows
where activity_date is not null
) distributed by (career_account);
drop table if exists hourly_activity_with_building_lag;
create temp table hourly_activity_with_building_lag as (
select
net.career_account,
net.activity_date,
net.mac_addr,
net.mobile_device,
net.window_start,
net.window_end,
net.session_start_in_hour,
net.session_end_in_hour,
net.megabytes_transferred,
upper(net.building) as building,
bac.principal_use as building_principal_use,
lag(upper(building)) over (PARTITION BY net.career_account, net.mac_addr ORDER BY session_start_in_hour) as last_building
from
(select * from network_logs_processed.hourly_internet_activity_building where activity_date between '2024-02-01'::date and '2024-02-28'::date) net
left outer join
(select distinct building_abbreviation, principal_use from adhoc.space_building_addition_current_2023_10_26
where (building_abbreviation != 'HGR6' and principal_use != 'Administrative')) bac
on
bac.building_abbreviation = upper(net.building)
) distributed by (career_account);
drop table if exists hourly_activity_with_movement_flag;
create temp table hourly_activity_with_movement_flag as (
select
data.career_account,
data.activity_date,
data.mac_addr,
data.mobile_device,
data.window_start,
data.window_end,
data.building,
(CASE WHEN data.mobile_device = 1 and building_principal_use != 'Residential' then 1 else 0 end) as not_in_residential,
(CASE WHEN data.mobile_device = 1 and data.building = class.building then 1 else 0 end) as in_class,
(CASE WHEN
sum(case when (last_building is null or last_building != data.building) then 1 else 0 end) > 0
THEN 1
ELSE 0
END) as device_moved_buildings,
sum(megabytes_transferred) as megabytes_transferred
from
ip.hourly_activity_with_building_lag data
left outer join
(
select schedule.*, banner.career_account from utility_features.student_class_schedule_detail schedule
left outer join banner_oltp.banner_lookup banner
on schedule.person_uid = banner.person_uid::numeric
) class
on
data.activity_date = class.calendar_date
and data.career_account = class.career_account
and data.building = class.building
and (
(data.session_start_in_hour between class.begin_time and class.end_time)
OR
(data.session_end_in_hour between class.begin_time and class.end_time)
OR
(data.session_end_in_hour > class.end_time and data.session_start_in_hour < class.begin_time)
)
group by 1,2,3,4,5,6,7,8,9
) distributed by (career_account);
drop table if exists included_students;
create temp table included_students as (
with
registered_students as (
select distinct
academic_period,
person_uid
from
ods.student_course_pu cg
where
gradable_ind = 'Y' and
course_credits > 0 and
campus in ('PWL','CEC') and
schedule_type not in ('IND','RES','EX') and
substring(course_identification from '[a-zA-Z]+#"[0-9]#"%' for '#')::int <= 4 and --undergrad courses only
sub_academic_period in ('1','F8','FHS','M1','M2','M3','M12','M23','S8','SHS') and
registration_status like 'R%'
),
housed_students as (
select distinct academic_period, person_uid
from utility_features.resident_student_room_details
),
full_student_list as (
select academic_period, person_uid::numeric from registered_students
UNION
select academic_period, person_uid::numeric from housed_students
)
select
full_student_list.academic_period,
full_student_list.person_uid,
banner.career_account
from
full_student_list
left outer join banner_oltp.banner_lookup banner
on full_student_list.person_uid = banner.person_uid::numeric
) distributed by (career_account);
drop table if exists aggregated_hourly_data;
create temp table aggregated_hourly_data as (
select
hourly_time_windows.career_account,
banner.puid,
banner.person_uid,
ac.academic_period,
hourly_time_windows.activity_date,
hourly_time_windows.window_start,
hourly_time_windows.window_end,
sum(not_in_residential) as not_in_residential,
sum(in_class) as in_class,
sum(device_moved_buildings) as device_moved_buildings,
sum(megabytes_transferred) as megabytes_transferred
from
hourly_time_windows
left outer join hourly_activity_with_movement_flag
on
hourly_time_windows.career_account = hourly_activity_with_movement_flag.career_account
and hourly_time_windows.window_start = hourly_activity_with_movement_flag.window_start
left outer join banner_oltp.banner_lookup banner
on
hourly_time_windows.career_account = banner.career_account
left outer join utility.academic_calendar ac
on
hourly_time_windows.window_start between ac.last_term_end_date + INTERVAL '1 day' and ac.end_date
inner join included_students
on hourly_time_windows.career_account = included_students.career_account
and ac.academic_period = included_students.academic_period
group by 1,2,3,4,5,6,7
) distributed by (career_account);
Here is my problem:
If I run each query directly and count the rows in the temp table after it is created, all five queries complete in a minute or less. (That's 60 seconds to run all 5, not 60 seconds for each of them.)
I created a plpgsql function to run those five queries. The first four completed in about the same time it takes when I run them directly, and the final row counts for all four tables were exactly the same. But I've let the function run for 30+ minutes and the fifth query still never completes.
I also tried creating a plpgsql function to run just the first four queries and then running the fifth query directly. Again, the function completes very quickly, and the temp tables it creates have the same row counts as when I run the queries directly, but the fifth query still does not complete.
I know PostgreSQL optimizes things differently when run in a function rather than individually, but I really thought that running just the first four in a function and the fifth directly would give different results.
I am kind of at my wit's end here.
Has anyone run into anything like this before?