| Author |
Topic  |
|
|
SwePeso
Patron Saint of Lost Yaks
Sweden
29138 Posts |
Posted - 08/25/2007 : 17:57:30
|
Here is an algorithm that works for both SQL Server 2005 and SQL Server 2008 with full/highest compatibility mode for both.-- prepare sample data
create table #test(
a int,
d1 datetime,
d2 datetime)
insert into #test
select 1, '20070101', '20070301'
union all
select 1, '20070501', '20070901'
union all
select 2, '20070101', '20070601'
union all
select 2, '20070301', '20070801'
-- Stage the data
;WITH yak1 (a, d1, d2, recid)
as (
select a,
d1,
d2,
row_number() over (partition by a order by d1) as recid
from #test
), yak2 (a, start, stop, recid, grp)
as (
select a,
d1,
d2,
recid,
0
from yak1
where recid = 1
union all
select y1.a,
y1.d1,
CASE
WHEN y1.d2 < y2.stop THEN y2.stop
ELSE y1.d2
END,
y1.recid,
case
when y2.stop < y1.d1 then y2.grp + 1
else y2.grp
end
from yak2 as y2
inner join yak1 as y1 on y1.a = y2.a
where y1.recid = y2.recid + 1
)
-- Show the expected output
select a,
min(start),
max(stop)
from yak2
group by a,
grp
order by a,
2
OPTION (MAXRECURSION 0)
-- Clean up
drop table #test
E 12°55'05.25" N 56°04'39.16" |
Edited by - SwePeso on 08/30/2007 05:20:16
|
|
|
SwePeso
Patron Saint of Lost Yaks
Sweden
29138 Posts |
Posted - 08/26/2007 : 12:18:04
|
Get the "missing" date ranges?-- Prepare sample data
DECLARE @Sample TABLE (PlantID INT, FromDate DATETIME, ToDate DATETIME)
INSERT @Sample
SELECT 1, '20050830', '20060918' UNION ALL
SELECT 1, '20060918', '20061201' UNION ALL
SELECT 2, '20070101', '20070228' UNION ALL
SELECT 2, '20070301', '20070331' UNION ALL
SELECT 1, '20070601', '20071231' UNION ALL
SELECT 1, '20070201', '20070515'
select * from @sample
-- Stage the data
;WITH Yak1 (PlantID, FromDate, ToDate, RecID)
AS (
SELECT PlantID,
FromDate,
ToDate,
ROW_NUMBER() OVER (PARTITION BY PlantID ORDER BY FromDate) AS RecID
FROM @Sample
), Yak2 (PlantID, FromDate, ToDate, RecID, Grp)
AS (
SELECT PlantID,
FromDate,
ToDate,
RecID,
0
FROM Yak1
WHERE RecID = 1
UNION ALL
SELECT y1.PlantID,
y1.FromDate,
CASE
WHEN y1.ToDate < y2.ToDate THEN y2.ToDate
ELSE y1.ToDate
END,
y1.RecID,
CASE
WHEN y2.ToDate < y1.FromDate THEN y2.Grp + 1
ELSE y2.Grp
END
FROM Yak2 AS y2
INNER JOIN Yak1 AS y1 ON y1.PlantID = y2.PlantID
WHERE y1.RecID = y2.RecID + 1
), Yak3 (PlantID, FromDate, ToDate, RowID)
AS (
SELECT PlantID,
MIN(FromDate),
MAX(ToDate),
ROW_NUMBER() OVER (PARTITION BY PlantID ORDER BY MIN(FromDate))
FROM Yak2
GROUP BY PlantID,
Grp
), Yak4 (PlantID, FromDate, ToDate)
AS (
SELECT t1.PlantID,
t1.ToDate,
t2.FromDate
FROM Yak3 AS t1
INNER JOIN Yak3 AS t2 ON t2.PlantID = t1.PlantID AND t2.RowID - 1 = t1.RowID
)
-- Show the expected output
select * from yak4
E 12°55'05.25" N 56°04'39.16" |
Edited by - SwePeso on 08/30/2007 05:27:51 |
 |
|
|
jsmith8858
Dr. Cross Join
USA
7423 Posts |
Posted - 08/30/2007 : 15:03:38
|
Here's one for all versions of SQL, and it seems to be much more efficient than the SQL 2005/2008 versions. (Using Peso's sample DDL)
select StartDates.a, StartDates.date as d1, min(EndDates.Date) as d2
from
(
select t.a, t.d1 as Date
from #test t
left outer join #test t2 on t.a=t2.a and t.d1 > t2.d1 and t.d1 <= t2.d2
where t2.a is null
) StartDates
inner join
(
select t.a, t.d2 as Date
from #test t
left outer join #test t2 on t.a=t2.a and t.d2 >= t2.d1 and t.d2 < t2.d2
where t2.a is null
) EndDates
on
StartDates.a = EndDates.a and StartDates.Date <= EndDates.Date
group by
StartDates.a, StartDates.date
Hopefully it is somewhat easy to follow. The key is to eliminate dates that are contained within date ranges since they should not figure into a start or end of a range. That's really about it, and that's what the two derived tables are doing.
- Jeff http://weblogs.sqlteam.com/JeffS
|
Edited by - jsmith8858 on 08/30/2007 15:34:20 |
 |
|
|
jsmith8858
Dr. Cross Join
USA
7423 Posts |
Posted - 08/30/2007 : 15:16:26
|
Almost the same deal, this time for the "missing" date ranges, runs on all versions of SQL and seems to be quite more efficient that the sql 2005/2008 solution given:
select fromDates.plantID, fromDates.Date as FromDate, min(ToDates.Date) as ToDate
from
(
select s1.plantID, s1.ToDate as Date
from @sample s1
left outer join @sample s2 on s1.plantID = s2.plantID and
s1.ToDate >= s2.FromDate and
s1.ToDate < s2.ToDate
where s2.plantID is null
) fromDates
inner join
(
select s1.plantID, s1.FromDate as Date
from @sample s1
left outer join @sample s2 on s1.plantID = s2.plantID and
s1.FromDate > s2.FromDate and
s1.FromDate <= s2.ToDate
where s2.plantID is null
) ToDates
on
fromDates.PlantID = ToDates.PlantID and FromDates.Date < ToDates.Date
group by
fromDates.plantID, fromDates.Date
- Jeff http://weblogs.sqlteam.com/JeffS
|
 |
|
|
SwePeso
Patron Saint of Lost Yaks
Sweden
29138 Posts |
Posted - 08/30/2007 : 15:29:23
|
This testdata failsINSERT @Sample
SELECT 1, '20070101', '20070501' UNION ALL
SELECT 1, '20070501', '20070901' And thisINSERT @Sample
SELECT 1, '20070101', '20070501' UNION ALL
SELECT 1, '20070701', '20070901' UNION ALL
SELECT 1, '20070601', '20070801' UNION ALL
SELECT 1, '20070201', '20070301' UNION ALL
SELECT 1, '20070401', '20070601'
/*
1 2 3 4 5 6 7 8 9
|-------|
|-|
|---|
|---|
|---|
*/
E 12°55'05.25" N 56°04'39.16" |
Edited by - SwePeso on 08/30/2007 16:14:06 |
 |
|
|
jsmith8858
Dr. Cross Join
USA
7423 Posts |
Posted - 08/30/2007 : 15:34:45
|
Thanks, Peso!
Is that sample data for the first one or the second one?
For the first one -- fixed easily as noted in red.
As for the second one -- I am confused. based on your sample data, what should that return? It returns the same thing as yours -- no results, since there are no "missing date ranges".
Could you do me a favor and be a little more specific? Thanks!!
- Jeff
http://weblogs.sqlteam.com/JeffS
|
Edited by - jsmith8858 on 08/30/2007 15:43:05 |
 |
|
|
SwePeso
Patron Saint of Lost Yaks
Sweden
29138 Posts |
Posted - 08/30/2007 : 16:11:25
|
Still reverse engineering your "collapse" algorithm.
Now I understand it  Try this sample data, JeffINSERT #Test
SELECT 1, '20070401', '20070701' UNION ALL
SELECT 1, '20070501', '20070601' UNION ALL
SELECT 1, '20070101', '20070301' UNION ALL
SELECT 1, '20070201', '20070501' UNION ALL
SELECT 2, '20070101', '20070301' UNION ALL
SELECT 2, '20070501', '20070901' UNION ALL
SELECT 2, '20070101', '20070601' UNION ALL
SELECT 2, '20070301', '20070801'
/*
1 2 3 4 5 6 7 8 9
*****************
|-----|
|-|
|---|
|-----|
*****************
|---|
|-------|
|---------|
|---------|
*****************
*/
E 12°55'05.25" N 56°04'39.16" |
Edited by - SwePeso on 08/30/2007 16:19:00 |
 |
|
|
jsmith8858
Dr. Cross Join
USA
7423 Posts |
Posted - 08/30/2007 : 16:22:42
|
Same result as yours, one row returned ...
The algorithm is very simple:
Let X = all start dates NOT contained in a date range Let Y = all end dates NOT contained in a date range
So, X = all of our starting points.
For each X, find the Y with the lowest date greater than X's date that has the same ID
That's it!
I know that these days I mostly yell at posters and say "presentation layer!" and things like that, but I actually am not too shabby at writing T-SQL ... I just don't have as much time or patience to write out answers lately considering that we are never given DDL or sample data ... plus we have you around!
- Jeff http://weblogs.sqlteam.com/JeffS
|
Edited by - jsmith8858 on 08/30/2007 16:31:41 |
 |
|
|
SwePeso
Patron Saint of Lost Yaks
Sweden
29138 Posts |
Posted - 08/30/2007 : 16:31:50
|
Must been a copy and paste error. Now I get the correct result. Well done!
I like having these discussion forth and back and trying to find flaws in each other's algorithms.
E 12°55'05.25" N 56°04'39.16" |
Edited by - SwePeso on 08/30/2007 16:39:38 |
 |
|
|
SwePeso
Patron Saint of Lost Yaks
Sweden
29138 Posts |
Posted - 08/30/2007 : 16:47:47
|
Run some tests in SQL Profiler for test data posted last. CPU DURATION READS WRITES
Jeff 0 1 210 0
Peso 0 1 162 0
E 12°55'05.25" N 56°04'39.16" |
 |
|
|
jsmith8858
Dr. Cross Join
USA
7423 Posts |
Posted - 08/30/2007 : 16:57:30
|
quote:
I like having these discussion forth and back and trying to find flaws in each other's algorithms.
Agreed! That's pretty much why I enjoy coming to this site.
It might be interesting to try the different methods with a decent amount of indexed data to see how things look.
- Jeff http://weblogs.sqlteam.com/JeffS
|
Edited by - jsmith8858 on 08/30/2007 16:58:17 |
 |
|
|
SwePeso
Patron Saint of Lost Yaks
Sweden
29138 Posts |
Posted - 08/30/2007 : 17:18:25
|
Tests done!CREATE TABLE #Test
(
a INT,
d1 DATETIME,
d2 DATETIME
)
GO
INSERT #Test
(
a,
d1
)
SELECT TOP 1000 ABS(CHECKSUM(NEWID())) % 2,
25000 + ABS(CHECKSUM(NEWID())) % 25000
FROM syscolumns AS c1
CROSS JOIN syscolumns AS c2
GO
UPDATE #Test
SET d2 = DATEADD(DAY, ABS(CHECKSUM(NEWID())) % 30, d1)
GO
CREATE CLUSTERED INDEX ix_test on #Test (a, d1, d2)
GO Average result from profiler for 20 runs CPU DUR READS WRITES
Jeff 19406 19998 551549 0
Peso 844 916 19605 0
E 12°55'05.25" N 56°04'39.16" |
Edited by - SwePeso on 08/31/2007 01:16:12 |
 |
|
|
SwePeso
Patron Saint of Lost Yaks
Sweden
29138 Posts |
Posted - 08/30/2007 : 17:46:37
|
New tests!CREATE TABLE #Test
(
a INT,
d1 DATETIME,
d2 DATETIME
)
GO
INSERT #Test
(
a,
d1
)
SELECT TOP 2000 ABS(CHECKSUM(NEWID())) % 5,
25000 + ABS(CHECKSUM(NEWID())) % 25000
FROM syscolumns AS c1
CROSS JOIN syscolumns AS c2
GO
UPDATE #Test
SET d2 = DATEADD(DAY, ABS(CHECKSUM(NEWID())) % 30, d1)
GO
CREATE CLUSTERED INDEX ix_test ON #Test (a, d1, d2)
GO Average result from profiler for 10 runs CPU DUR READS WRITES
Jeff 32219 33920 905521 1
Peso 2266 2368 36630 0
E 12°55'05.25" N 56°04'39.16" |
Edited by - SwePeso on 08/31/2007 01:11:17 |
 |
|
|
jsmith8858
Dr. Cross Join
USA
7423 Posts |
Posted - 08/30/2007 : 19:01:27
|
Wow! That's a huge difference! So much for the initial execution plan. I'll have to investigate this when I get a chance.
- Jeff http://weblogs.sqlteam.com/JeffS
|
 |
|
|
SwePeso
Patron Saint of Lost Yaks
Sweden
29138 Posts |
Posted - 08/31/2007 : 01:28:32
|
To both our defences, the tests show that the time and reads scales linear, not exponential 
-- 1k CPU DURATION READS WRITES
Jeff 0 1 210 0
Peso 0 1 162 0
Peso 2 0 77 31 0
-- 2k CPU DURATION READS WRITES
Jeff 19,406 19,998 551,549 0
Peso 844 916 19,605 0
Peso 2 15 72 56 0
-- 3k CPU DURATION READS WRITES
Jeff 39,359 41,557 956,584 3
Peso 5,172 5,381 62,984 0
Peso 2 16 81 91 0
-- 4k CPU DURATION READS WRITES
Jeff 52,235 55,708 1,237,692 0
Peso 9,172 9,492 99,976 0
Peso 2 31 76 106 0
-- 5k CPU DURATION READS WRITES
Jeff 95,438 100,383 2,202,898 0
Peso 13,953 14,985 140,024 0
Peso 2 31 78 166 0
-- 6k CPU DURATION READS WRITES
Jeff 107,328 112,983 2,420,560 0
Peso 20,968 23,352 192,023 1
Peso 2 31 118 233 0
-- 10k CPU DURATION READS WRITES
Peso 2 48 97 290 0
-- 100k CPU DURATION READS WRITES
Peso 2 140 273 3,002 0
--1000k CPU DURATION READS WRITES
Peso 2 1,593 1,598 15,322 0
E 12°55'05.25" N 56°04'39.16" |
Edited by - SwePeso on 04/07/2011 13:06:59 |
 |
|
|
SwePeso
Patron Saint of Lost Yaks
Sweden
29138 Posts |
|
|
Michael Valentine Jones
Yak DBA Kernel (pronounced Colonel)
USA
6997 Posts |
Posted - 05/13/2008 : 11:55:41
|
Hate to be dense (stupid) here, but can you give a little more explanation about the problem you are trying to solve?
Before I read through all the code, it would be helpful to know what it is supposed to do.
CODO ERGO SUM |
 |
|
|
SwePeso
Patron Saint of Lost Yaks
Sweden
29138 Posts |
Posted - 05/13/2008 : 12:53:35
|
With collapsed date ranges, the purpose is the merge overlapping date ranges as a few possible.1 2 3 4 5 6 7 8 9
*****************
|-----|
|-|
|---|
|-----|
***************** See for example these four date ranges. They can be merged as one because they overlap. Result should be one date range 1-7.
In this example, the four ranges can only be merged into two since there is a gap.1 2 3 4 5 6 7 8 9
*****************
|----|
|--|
|---|
|---|
*****************Result is 1-4 and 5-8. This example above also produces a gap between 4-5.
This is what the blog post is about, how to efficiently and fast get the wanted results. Either the collapsed (merged) date ranges, or the missing (gaps) date ranges.
E 12°55'05.25" N 56°04'39.16" |
 |
|
|
lmontgomery74
Starting Member
1 Posts |
Posted - 12/22/2010 : 18:18:54
|
| Great post guys... Thanks! |
 |
|
|
SwePeso
Patron Saint of Lost Yaks
Sweden
29138 Posts |
Posted - 04/07/2011 : 11:32:32
|
Here is an algorithm that will work not only with days only, but also with millisecond precision and several years in the intervals.;WITH cteSource(UserName, theTime, theGrp)
AS (
SELECT u.UserName,
u.theTime,
(DENSE_RANK() OVER (PARTITION BY u.UserName ORDER BY u.theTime) - 1) / 2 AS theGrp -- Create artificial groups depending
-- on the times. Omit duplicates.
FROM (
-- Get all usernames and first start time and last endtime
SELECT a AS UserName,
MIN(d1) AS minStartTime,
MAX(d2) AS maxEndTIme
FROM #Test
GROUP BY a
) AS usr
-- This only get the intermediate gaps
OUTER APPLY (
SELECT s.StartTime,
e.EndTime
FROM (
-- Get all starttimes sorted
SELECT s.d1 AS StartTime,
ROW_NUMBER() OVER (ORDER BY s.d1) AS SeqID
FROM #Test AS s
WHERE s.a = usr.UserName
) AS s
INNER JOIN (
-- Get all endtimes sorted
SELECT s.d2 AS EndTime,
ROW_NUMBER() OVER (ORDER BY s.d2) + 1 AS SeqID
FROM #Test AS s
WHERE s.a = usr.UserName
) AS e ON e.SeqID = s.SeqID -- Match previous end time time against this starttime
WHERE e.EndTime < s.StartTime -- If EndTime is less than starttime, this is a gap
) AS bnd
UNPIVOT (
-- Since the bnd table only get the intermediate gaps,
-- add the first start time and last end time to the intervals
theTime
FOR theCol IN (usr.minStartTime, usr.maxEndTime, bnd.StartTime, bnd.EndTime)
) AS u
)
-- For each artifical group, display the earliest and latest time
SELECT UserName,
MIN(theTime) AS StartTime,
MAX(theTime) AS EndTime
FROM cteSource
GROUP BY UserName,
theGrp;
N 56°04'39.26" E 12°55'05.63" |
Edited by - SwePeso on 04/07/2011 12:52:00 |
 |
|
| |
Topic  |
|
|
|