IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[vGetVisits]') AND type in (N'U'))
DROP TABLE [dbo].[vGetVisits]
GO
CREATE TABLE [dbo].[vGetVisits](
[id] [int] NOT NULL,
[mydate] [datetime] NOT NULL,
CONSTRAINT [PK_vGetVisits] PRIMARY KEY CLUSTERED
(
[id] ASC
)
)
GO
INSERT INTO [dbo].[vGetVisits]([id], [mydate])
VALUES
(1, '2014-01-01 11:00'),
(2, '2014-01-03 10:00'),
(3, '2014-01-04 09:30'),
(4, '2014-04-01 10:00'),
(5, '2014-05-01 11:00'),
(6, '2014-07-01 09:00'),
(7, '2014-07-31 08:00');
GO
-- Clean up
IF OBJECT_ID (N'dbo.udfLastHitRecursive', N'FN') IS NOT NULL
DROP FUNCTION udfLastHitRecursive;
GO
-- Actual Function
CREATE FUNCTION dbo.udfLastHitRecursive
( @MyDate datetime)
RETURNS TINYINT
AS
BEGIN
-- Your returned value 1 or 0
DECLARE @Returned_Value TINYINT;
SET @Returned_Value=0;
-- Prepare gaps table to be used.
WITH gaps AS
(
-- Select Date and MaxDiff from the original table
SELECT
CONVERT(Date,mydate) AS [date]
, DATEDIFF(day,ISNULL(LAG(mydate, 1) OVER (ORDER BY mydate), mydate) , mydate) AS [MaxDiff]
FROM dbo.vGetVisits
)
SELECT @Returned_Value=
(SELECT DISTINCT -- DISTINCT in case we have same date but different time
CASE WHEN
(
-- It is a first entry
[date]=(SELECT MIN(CONVERT(Date,mydate)) FROM dbo.vGetVisits))
OR
/*
--Gap between last qualifying date and entered is greater than 90
Calculate Running sum upto and including required date
and find a remainder of division by 91.
*/
((SELECT SUM(t1.MaxDiff)
FROM (SELECT [MaxDiff] FROM gaps WHERE [date]<=t2.[date]
) t1
)%91 -
/*
ISNULL added to include first value that always returns NULL
Calculate Running sum upto and NOT including required date
and find a remainder of division by 91
*/
ISNULL((SELECT SUM(t1.MaxDiff)
FROM (SELECT [MaxDiff] FROM gaps WHERE [date]<t2.[date]
) t1
)%91, 0) -- End ISNULL
<0 )
/* End Running sum upto and including required date */
OR
-- Gap between two nearest dates is greater than 90
((SELECT SUM(t1.MaxDiff)
FROM (SELECT [MaxDiff] FROM gaps WHERE [date]<=t2.[date]
) t1
) - ISNULL((SELECT SUM(t1.MaxDiff)
FROM (SELECT [MaxDiff] FROM gaps WHERE [date]<t2.[date]
) t1
), 0) > 90)
THEN 1
ELSE 0
END
AS [Qualifying]
FROM gaps t2
WHERE [date]=CONVERT(Date,@MyDate))
-- What is neccesary to return when entered date is not in dbo.vGetVisits?
RETURN @Returned_Value
END
GO
SELECT
dbo.udfLastHitRecursive(mydate) AS [Qualifying]
, [id]
, mydate
FROM dbo.vGetVisits
ORDER BY mydate
Result

Also have a look at How to Calculate Running Total in SQL Server
update: please see below the results of performance testing.
Because of the different logic used in finding "90 days gap" ypercube's and my solutions if left intact may return different results to Paul White's solution.
This is due to the use of DATEDIFF and DATEADD functions respectively.
For example:
SELECT DATEADD(DAY, 90, '2014-01-01 00:00:00.000')
returns '2014-04-01 00:00:00.000' meaning that '2014-04-01 01:00:00.000' is beyond 90 days gap
but
SELECT DATEDIFF(DAY, '2014-01-01 00:00:00.000', '2014-04-01 01:00:00.000')
Returns '90' meaning that it is still within the gap.
Consider an example of a retailer. In this case selling a perishable product that has sell by date
'2014-01-01' at '2014-01-01 23:59:59:999' is fine. So value DATEDIFF(DAY, ...) in this case is OK.
Another example is a patient waiting to be seen.
For someone who comes at '2014-01-01 00:00:00:000' and leaves at
'2014-01-01 23:59:59:999' it is 0(zero) days if DATEDIFF is used even though the actual wait was almost 24 hours.
Again patient that comes at '2014-01-01 23:59:59' and walks away at '2014-01-02 00:00:01' waited for a day if DATEDIFF is used.
But I digress.
I left DATEDIFF solutions and even performance tested those but they should really be in their own league.
Also it was noted that for the big datasets it is impossible to avoid same day values.
So if we have say 13 Million records spanning 2 years of data then we will end up
having more than one record for some days. Those records are being filtered out at
the earliest opportunity in my and ypercube's DATEDIFF solutions. Hope ypercube does not mind this.
Solutions were tested on the following table
CREATE TABLE [dbo].[vGetVisits](
[id] [int] NOT NULL,
[mydate] [datetime] NOT NULL,
)
with two different clustered indexes (mydate in this case):
CREATE CLUSTERED INDEX CI_mydate on vGetVisits(mydate)
GO
Table was populated in the following way
SET NOCOUNT ON
GO
INSERT INTO dbo.vGetVisits(id, mydate)
VALUES (1, '01/01/1800')
GO
DECLARE @i bigint
SET @i=2
DECLARE @MaxRows bigint
SET @MaxRows=13001
WHILE @i<@MaxRows
BEGIN
INSERT INTO dbo.vGetVisits(id, mydate)
VALUES (@i, DATEADD(day,FLOOR(RAND()*(3)),(SELECT MAX(mydate) FROM dbo.vGetVisits)))
SET @i=@i+1
END
For a multimillion row case INSERT was changed in a such way that 0-20 minutes entries were randomly added.
All solutions were carefully wrapped up in the following code
SET NOCOUNT ON
GO
DECLARE @StartDate DATETIME
SET @StartDate = GETDATE()
--- Code goes here
PRINT 'Total milliseconds: ' + CONVERT(varchar, DATEDIFF(ms, @StartDate, GETDATE()))
Actual codes tested (in no particular order):
Ypercube's DATEDIFF solution (YPC, DATEDIFF)
DECLARE @cd TABLE
( TheDate datetime PRIMARY KEY,
Qualify INT NOT NULL
);
DECLARE
@TheDate DATETIME,
@Qualify INT = 0,
@PreviousCheckDate DATETIME = '1799-01-01 00:00:00'
DECLARE c CURSOR
LOCAL STATIC FORWARD_ONLY READ_ONLY
FOR
SELECT
mydate
FROM
(SELECT
RowNum = ROW_NUMBER() OVER(PARTITION BY cast(mydate as date) ORDER BY mydate)
, mydate
FROM
dbo.vGetVisits) Actions
WHERE
RowNum = 1
ORDER BY
mydate;
OPEN c ;
FETCH NEXT FROM c INTO @TheDate ;
WHILE @@FETCH_STATUS = 0
BEGIN
SET @Qualify = CASE WHEN DATEDIFF(day, @PreviousCheckDate, @Thedate) > 90 THEN 1 ELSE 0 END ;
IF @Qualify=1
BEGIN
INSERT @cd (TheDate, Qualify)
SELECT @TheDate, @Qualify ;
SET @PreviousCheckDate=@TheDate
END
FETCH NEXT FROM c INTO @TheDate ;
END
CLOSE c;
DEALLOCATE c;
SELECT TheDate
FROM @cd
ORDER BY TheDate ;
Ypercube's DATEADD solution (YPC, DATEADD)
DECLARE @cd TABLE
( TheDate datetime PRIMARY KEY,
Qualify INT NOT NULL
);
DECLARE
@TheDate DATETIME,
@Next_Date DATETIME,
@Interesting_Date DATETIME,
@Qualify INT = 0
DECLARE c CURSOR
LOCAL STATIC FORWARD_ONLY READ_ONLY
FOR
SELECT
[mydate]
FROM [test].[dbo].[vGetVisits]
ORDER BY mydate
;
OPEN c ;
FETCH NEXT FROM c INTO @TheDate ;
SET @Interesting_Date=@TheDate
INSERT @cd (TheDate, Qualify)
SELECT @TheDate, @Qualify ;
WHILE @@FETCH_STATUS = 0
BEGIN
IF @TheDate>DATEADD(DAY, 90, @Interesting_Date)
BEGIN
INSERT @cd (TheDate, Qualify)
SELECT @TheDate, @Qualify ;
SET @Interesting_Date=@TheDate;
END
FETCH NEXT FROM c INTO @TheDate;
END
CLOSE c;
DEALLOCATE c;
SELECT TheDate
FROM @cd
ORDER BY TheDate ;
Paul White's solution (PW)
;WITH CTE AS
(
SELECT TOP (1)
T.[mydate]
FROM dbo.vGetVisits AS T
ORDER BY
T.[mydate]
UNION ALL
SELECT
SQ1.[mydate]
FROM
(
SELECT
T.[mydate],
rn = ROW_NUMBER() OVER (
ORDER BY T.[mydate])
FROM CTE
JOIN dbo.vGetVisits AS T
ON T.[mydate] > DATEADD(DAY, 90, CTE.[mydate])
) AS SQ1
WHERE
SQ1.rn = 1
)
SELECT
CTE.[mydate]
FROM CTE
OPTION (MAXRECURSION 0);
My DATEADD solution (PN, DATEADD)
DECLARE @cd TABLE
( TheDate datetime PRIMARY KEY
);
DECLARE @TheDate DATETIME
SET @TheDate=(SELECT MIN(mydate) as mydate FROM [dbo].[vGetVisits])
WHILE (@TheDate IS NOT NULL)
BEGIN
INSERT @cd (TheDate) SELECT @TheDate;
SET @TheDate=(
SELECT MIN(mydate) as mydate
FROM [dbo].[vGetVisits]
WHERE mydate>DATEADD(DAY, 90, @TheDate)
)
END
SELECT TheDate
FROM @cd
ORDER BY TheDate ;
My DATEDIFF Solution (PN, DATEDIFF)
DECLARE @MinDate DATETIME;
SET @MinDate=(SELECT MIN(mydate) FROM dbo.vGetVisits);
;WITH gaps AS
(
SELECT
t1.[date]
, t1.[MaxDiff]
, SUM(t1.[MaxDiff]) OVER (ORDER BY t1.[date]) AS [Running Total]
FROM
(
SELECT
mydate AS [date]
, DATEDIFF(day,LAG(mydate, 1, mydate) OVER (ORDER BY mydate) , mydate) AS [MaxDiff]
FROM
(SELECT
RowNum = ROW_NUMBER() OVER(PARTITION BY cast(mydate as date) ORDER BY mydate)
, mydate
FROM dbo.vGetVisits
) Actions
WHERE RowNum = 1
) t1
)
SELECT [date]
FROM gaps t2
WHERE
( ([Running Total])%91 - ([Running Total]- [MaxDiff])%91 <0 )
OR
( [MaxDiff] > 90)
OR
([date]=@MinDate)
ORDER BY [date]
I am using SQL Server 2012, so apologies to Mikael Eriksson, but his code won't be tested here. I would still expect his solutions with DATADIFF and DATEADD to return different values on some datasets.
And the actual results are:
