I recently ran into the same issue. We have an Availability group where the secondary was having TempDB blow up and the primary was fine.
I created three tables and ran three stored procedures every 1-5 minutes on the secondary node. They are pretty light loads, obviously you should test them beforehand.
Here are the tables:
USE [master]
GO
/****** Object: Table [dbo].[TempDBDataGrowth] Script Date: 4/12/2018 1:13:35 PM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[TempDBDataGrowth](
[ID] [int] IDENTITY(1,1) NOT NULL,
[TimeStart] [datetime2](7) NOT NULL,
[name] [sysname] NOT NULL,
[database_id] [int] NOT NULL,
[Size] [decimal](38, 2) NULL,
[UsedSpace] [decimal](38, 2) NULL,
PRIMARY KEY CLUSTERED
(
[ID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
USE [master]
GO
/****** Object: Table [dbo].[TempDBDataGrowth] Script Date: 4/12/2018 1:13:35 PM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[TempDBDataGrowth](
[ID] [int] IDENTITY(1,1) NOT NULL,
[TimeStart] [datetime2](7) NOT NULL,
[name] [sysname] NOT NULL,
[database_id] [int] NOT NULL,
[Size] [decimal](38, 2) NULL,
[UsedSpace] [decimal](38, 2) NULL,
PRIMARY KEY CLUSTERED
(
[ID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
USE [master]
GO
/****** Object: Table [dbo].[WhoIsActive] Script Date: 4/12/2018 1:15:12 PM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_PADDING ON
GO
CREATE TABLE [dbo].[WhoIsActive](
[dd hh:mm:ss.mss] [varchar](8000) NULL,
[dd hh:mm:ss.mss (avg)] [varchar](15) NULL,
[session_id] [smallint] NOT NULL,
[sql_text] [xml] NULL,
[sql_command] [xml] NULL,
[login_name] [nvarchar](128) NOT NULL,
[wait_info] [nvarchar](4000) NULL,
[tran_log_writes] [nvarchar](4000) NULL,
[CPU] [varchar](30) NULL,
[CPU_delta] [varchar](30) NULL,
[tempdb_allocations] [varchar](30) NULL,
[tempdb_current] [varchar](30) NULL,
[tempdb_allocations_delta] [varchar](30) NULL,
[tempdb_current_delta] [varchar](30) NULL,
[blocking_session_id] [smallint] NULL,
[blocked_session_count] [varchar](30) NULL,
[reads] [varchar](30) NULL,
[reads_delta] [varchar](30) NULL,
[writes] [varchar](30) NULL,
[writes_delta] [varchar](30) NULL,
[physical_reads] [varchar](30) NULL,
[physical_reads_delta] [varchar](30) NULL,
[query_plan] [xml] NULL,
[locks] [xml] NULL,
[used_memory] [varchar](30) NULL,
[used_memory_delta] [varchar](30) NULL,
[status] [varchar](30) NOT NULL,
[tran_start_time] [datetime] NULL,
[open_tran_count] [varchar](30) NULL,
[percent_complete] [varchar](30) NULL,
[host_name] [nvarchar](128) NULL,
[database_name] [nvarchar](128) NULL,
[program_name] [nvarchar](128) NULL,
[additional_info] [xml] NULL,
[start_time] [datetime] NOT NULL,
[login_time] [datetime] NULL,
[request_id] [int] NULL,
[collection_time] [datetime] NOT NULL
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO
SET ANSI_PADDING OFF
GO
Here are the three scripts I'm using to collect information: (For sp_whoisactive, you will need to download that procedure online.)
http://whoisactive.com/
USE tempdb
insert into master.dbo.TempDBDataGrowth
(TimeStart, name, database_id, size,UsedSpace)
SELECT
getdate() as timestart,
SD.name,
MF.database_id,
SUM( CONVERT(decimal(10,2),(DF.size/128.0)) ) as Size,
SUM( CONVERT(decimal(10,2), (CAST(FILEPROPERTY(DF.name, 'SpaceUsed') AS INT)/128.0 ) ) ) AS UsedSpace
FROM sys.master_files MF JOIN sys.databases SD
ON SD.database_id = MF.database_id
JOIN sys.database_files DF
ON DF.physical_name collate DATABASE_DEFAULT = MF.physical_name collate DATABASE_DEFAULT
WHERE MF.type = 0
GROUP BY SD.name, MF.database_id;
insert into master.dbo.TempDBDataUsage
(
TimeStart, QueryExecutionContextDBID, QueryExecContextDBNAME, ModuleObjectId,
Query_Text, session_id, request_id, exec_context_id, OutStanding_user_objects_page_counts,
OutStanding_internal_objects_page_counts, start_time, command, open_transaction_count,
percent_complete, estimated_completion_time, cpu_time, total_elapsed_time, reads,
writes, logical_reads, granted_query_memory, HOST_NAME, login_name, program_name
)
SELECT
getdate() as Timestart,
st.dbid AS QueryExecutionContextDBID,
DB_NAME(st.dbid) AS QueryExecContextDBNAME,
st.objectid AS ModuleObjectId,
SUBSTRING(st.TEXT,
dmv_er.statement_start_offset/2 + 1,
(CASE WHEN dmv_er.statement_end_offset = -1
THEN LEN(CONVERT(NVARCHAR(MAX),st.TEXT)) * 2
ELSE dmv_er.statement_end_offset
END - dmv_er.statement_start_offset)/2) AS Query_Text,
dmv_tsu.session_id ,
dmv_tsu.request_id,
dmv_tsu.exec_context_id,
(dmv_tsu.user_objects_alloc_page_count - dmv_tsu.user_objects_dealloc_page_count) AS OutStanding_user_objects_page_counts,
(dmv_tsu.internal_objects_alloc_page_count - dmv_tsu.internal_objects_dealloc_page_count) AS OutStanding_internal_objects_page_counts,
dmv_er.start_time,
dmv_er.command,
dmv_er.open_transaction_count,
dmv_er.percent_complete,
dmv_er.estimated_completion_time,
dmv_er.cpu_time,
dmv_er.total_elapsed_time,
dmv_er.reads,dmv_er.writes,
dmv_er.logical_reads,
dmv_er.granted_query_memory,
dmv_es.HOST_NAME,
dmv_es.login_name,
dmv_es.program_name
FROM sys.dm_db_task_space_usage dmv_tsu
INNER JOIN sys.dm_exec_requests dmv_er
ON (dmv_tsu.session_id = dmv_er.session_id AND dmv_tsu.request_id = dmv_er.request_id)
INNER JOIN sys.dm_exec_sessions dmv_es
ON (dmv_tsu.session_id = dmv_es.session_id)
CROSS APPLY sys.dm_exec_sql_text(dmv_er.sql_handle) st
WHERE (dmv_tsu.internal_objects_alloc_page_count + dmv_tsu.user_objects_alloc_page_count) > 0
ORDER BY (dmv_tsu.user_objects_alloc_page_count - dmv_tsu.user_objects_dealloc_page_count) + (dmv_tsu.internal_objects_alloc_page_count - dmv_tsu.internal_objects_dealloc_page_count) DESC;
USE [master]
GO
DECLARE @return_value int,
@schema varchar(max)
EXEC @return_value = [dbo].[sp_WhoIsActive]
@get_full_inner_text = 1,
@get_plans = 1,
@get_outer_command = 1,
@get_transaction_info = 1,
@get_task_info = 1,
@get_locks = 1,
@get_avg_time = 1,
@get_additional_info = 1,
@find_block_leaders = 1,
@delta_interval = 1,
@destination_table = 'WhoIsActive',
@schema = @schema OUTPUT
SELECT 'Return Value' = @return_value
GO
This takes 1-2 seconds to run in my environment, I scheduled it for every 5 minutes. You should adjust as necessary, I left this as a job running for a week at 5 minute intervals and then I looked at the DB growth portions to see when my TempDB was filling. Then I looked at sp_whoisactive to see what query was running at that time.
Here are some queries I ran afterwards to help me find my culprits:
SELECT TOP 1000 *
FROM [master].[dbo].[TempDBDataGrowth]
order by UsedSpace desc
SELECT TOP 1000 *
FROM [master].[dbo].[TempDBDataUsage]
order by writes desc
SELECT TOP 1000 *
FROM [master].[dbo].[WhoIsActive]
order by tempdb_allocations desc