SQL Server Forums
Profile | Register | Active Topics | Members | Search | Forum FAQ
 
Register Now and get your question answered!
Username:
Password:
Save Password
Forgot your Password?

 All Forums
 General SQL Server Forums
 Script Library
 Collapse date ranges
 New Topic  Reply to Topic
 Printer Friendly
Author Previous Topic Topic Next Topic  

SwePeso
Patron Saint of Lost Yaks

Sweden
30281 Posts

Posted - 08/25/2007 :  17:57:30  Show Profile  Visit SwePeso's Homepage  Reply with Quote
Here is an algorithm that works for both SQL Server 2005 and SQL Server 2008 with full/highest compatibility mode for both.
-- prepare sample data
create table #test(
   a int,
   d1 datetime,
   d2 datetime)

insert into #test
select 1, '20070101', '20070301'
union all
select 1, '20070501', '20070901'
union all
select 2, '20070101', '20070601'
union all
select 2, '20070301', '20070801'

-- Stage the data
;WITH yak1 (a, d1, d2, recid)
as (
	select	a,
		d1,
		d2,
		row_number() over (partition by a order by d1) as recid
	from	#test
), yak2 (a, start, stop, recid, grp)
as (
	select	a,
		d1,
		d2,
		recid,
		0
	from	yak1
	where	recid = 1

	union all

	select		y1.a,
			y1.d1,
			CASE
				WHEN y1.d2 < y2.stop THEN y2.stop
				ELSE y1.d2
			END,
			y1.recid,
			case
				when y2.stop < y1.d1 then y2.grp + 1
				else y2.grp
			end
	from		yak2 as y2
	inner join	yak1 as y1 on y1.a = y2.a
	where		y1.recid = y2.recid + 1
)

-- Show the expected output
select		a,
		min(start),
		max(stop)
from		yak2
group by	a,
		grp
order by	a,
		2
OPTION		(MAXRECURSION 0)

-- Clean up
drop table #test


E 12°55'05.25"
N 56°04'39.16"

Edited by - SwePeso on 08/30/2007 05:20:16

SwePeso
Patron Saint of Lost Yaks

Sweden
30281 Posts

Posted - 08/26/2007 :  12:18:04  Show Profile  Visit SwePeso's Homepage  Reply with Quote
Get the "missing" date ranges?
-- Prepare sample data
DECLARE	@Sample TABLE (PlantID INT, FromDate DATETIME, ToDate DATETIME)

INSERT	@Sample
SELECT	1, '20050830', '20060918' UNION ALL
SELECT	1, '20060918', '20061201' UNION ALL
SELECT	2, '20070101', '20070228' UNION ALL
SELECT	2, '20070301', '20070331' UNION ALL
SELECT	1, '20070601', '20071231' UNION ALL
SELECT	1, '20070201', '20070515'

select * from @sample

-- Stage the data
;WITH Yak1 (PlantID, FromDate, ToDate, RecID)
AS (
	SELECT	PlantID,
		FromDate,
		ToDate,
		ROW_NUMBER() OVER (PARTITION BY PlantID ORDER BY FromDate) AS RecID
	FROM	@Sample
), Yak2 (PlantID, FromDate, ToDate, RecID, Grp)
AS (
	SELECT	PlantID,
		FromDate,
		ToDate,
		RecID,
		0
	FROM	Yak1
	WHERE	RecID = 1

	UNION ALL

	SELECT		y1.PlantID,
			y1.FromDate,
			CASE
				WHEN y1.ToDate < y2.ToDate THEN y2.ToDate
				ELSE y1.ToDate
			END,
			y1.RecID,
			CASE
				WHEN y2.ToDate < y1.FromDate THEN y2.Grp + 1
				ELSE y2.Grp
			END
	FROM		Yak2 AS y2
	INNER JOIN	Yak1 AS y1 ON y1.PlantID = y2.PlantID
	WHERE		y1.RecID = y2.RecID + 1
), Yak3 (PlantID, FromDate, ToDate, RowID)
AS (
	SELECT		PlantID,
			MIN(FromDate),
			MAX(ToDate),
			ROW_NUMBER() OVER (PARTITION BY PlantID ORDER BY MIN(FromDate))
	FROM		Yak2
	GROUP BY	PlantID,
			Grp
), Yak4 (PlantID, FromDate, ToDate)
AS (
	SELECT		t1.PlantID,
			t1.ToDate,
			t2.FromDate
	FROM		Yak3 AS t1
	INNER JOIN	Yak3 AS t2 ON t2.PlantID = t1.PlantID AND t2.RowID - 1 = t1.RowID
)

-- Show the expected output
select * from yak4



E 12°55'05.25"
N 56°04'39.16"

Edited by - SwePeso on 08/30/2007 05:27:51
Go to Top of Page

jsmith8858
Dr. Cross Join

USA
7423 Posts

Posted - 08/30/2007 :  15:03:38  Show Profile  Visit jsmith8858's Homepage  Reply with Quote
Here's one for all versions of SQL, and it seems to be much more efficient than the SQL 2005/2008 versions. (Using Peso's sample DDL)


select StartDates.a, StartDates.date as d1, min(EndDates.Date) as d2
from
(	
	select t.a, t.d1 as Date
	from #test t
	left outer join #test t2 on t.a=t2.a and t.d1 > t2.d1 and t.d1 <= t2.d2
	where t2.a is null
) StartDates
inner join
(
	select t.a, t.d2 as Date
    from #test t
	left outer join #test t2 on t.a=t2.a and t.d2 >= t2.d1 and t.d2 < t2.d2
	where t2.a is null
) EndDates 
on
  StartDates.a = EndDates.a and StartDates.Date <= EndDates.Date
group by
	StartDates.a, StartDates.date


Hopefully it is somewhat easy to follow. The key is to eliminate dates that are contained within date ranges since they should not figure into a start or end of a range. That's really about it, and that's what the two derived tables are doing.

- Jeff
http://weblogs.sqlteam.com/JeffS

Edited by - jsmith8858 on 08/30/2007 15:34:20
Go to Top of Page

jsmith8858
Dr. Cross Join

USA
7423 Posts

Posted - 08/30/2007 :  15:16:26  Show Profile  Visit jsmith8858's Homepage  Reply with Quote
Almost the same deal, this time for the "missing" date ranges, runs on all versions of SQL and seems to be quite more efficient that the sql 2005/2008 solution given:


select fromDates.plantID, fromDates.Date as FromDate, min(ToDates.Date) as ToDate
from
(
	select s1.plantID, s1.ToDate as Date
	from @sample s1
	left outer join @sample s2 on s1.plantID = s2.plantID and 
                                      s1.ToDate >= s2.FromDate and 
                                      s1.ToDate < s2.ToDate
	where s2.plantID is null
) fromDates
inner join
(
	select s1.plantID, s1.FromDate as Date
	from @sample s1
	left outer join @sample s2 on s1.plantID = s2.plantID and 
                                      s1.FromDate > s2.FromDate and 
                                      s1.FromDate <= s2.ToDate 
	where s2.plantID is null
) ToDates
on
  fromDates.PlantID = ToDates.PlantID and FromDates.Date < ToDates.Date
group by 
  fromDates.plantID, fromDates.Date


- Jeff
http://weblogs.sqlteam.com/JeffS
Go to Top of Page

SwePeso
Patron Saint of Lost Yaks

Sweden
30281 Posts

Posted - 08/30/2007 :  15:29:23  Show Profile  Visit SwePeso's Homepage  Reply with Quote
This testdata fails
INSERT	@Sample
SELECT	1, '20070101', '20070501' UNION ALL
SELECT	1, '20070501', '20070901'
And this
INSERT	@Sample
SELECT	1, '20070101', '20070501' UNION ALL
SELECT	1, '20070701', '20070901' UNION ALL
SELECT	1, '20070601', '20070801' UNION ALL
SELECT	1, '20070201', '20070301' UNION ALL
SELECT	1, '20070401', '20070601'
/*
1 2 3 4 5 6 7 8 9
|-------|
  |-|
      |---|
          |---|
            |---|
*/


E 12°55'05.25"
N 56°04'39.16"

Edited by - SwePeso on 08/30/2007 16:14:06
Go to Top of Page

jsmith8858
Dr. Cross Join

USA
7423 Posts

Posted - 08/30/2007 :  15:34:45  Show Profile  Visit jsmith8858's Homepage  Reply with Quote
Thanks, Peso!

Is that sample data for the first one or the second one?

For the first one -- fixed easily as noted in red.

As for the second one -- I am confused. based on your sample data, what should that return? It returns the same thing as yours -- no results, since there are no "missing date ranges".

Could you do me a favor and be a little more specific? Thanks!!

- Jeff

http://weblogs.sqlteam.com/JeffS

Edited by - jsmith8858 on 08/30/2007 15:43:05
Go to Top of Page

SwePeso
Patron Saint of Lost Yaks

Sweden
30281 Posts

Posted - 08/30/2007 :  16:11:25  Show Profile  Visit SwePeso's Homepage  Reply with Quote
Still reverse engineering your "collapse" algorithm.

Now I understand it
Try this sample data, Jeff
INSERT	#Test
SELECT	1, '20070401', '20070701' UNION ALL
SELECT	1, '20070501', '20070601' UNION ALL
SELECT	1, '20070101', '20070301' UNION ALL
SELECT	1, '20070201', '20070501' UNION ALL
SELECT	2, '20070101', '20070301' UNION ALL
SELECT	2, '20070501', '20070901' UNION ALL
SELECT	2, '20070101', '20070601' UNION ALL
SELECT	2, '20070301', '20070801'
/*
1 2 3 4 5 6 7 8 9
*****************
      |-----|
        |-|
|---|          
  |-----|
*****************
|---|
        |-------|
|---------|
    |---------|
*****************
*/


E 12°55'05.25"
N 56°04'39.16"

Edited by - SwePeso on 08/30/2007 16:19:00
Go to Top of Page

jsmith8858
Dr. Cross Join

USA
7423 Posts

Posted - 08/30/2007 :  16:22:42  Show Profile  Visit jsmith8858's Homepage  Reply with Quote
Same result as yours, one row returned ...

The algorithm is very simple:

Let X = all start dates NOT contained in a date range
Let Y = all end dates NOT contained in a date range

So, X = all of our starting points.

For each X, find the Y with the lowest date greater than X's date that has the same ID

That's it!

I know that these days I mostly yell at posters and say "presentation layer!" and things like that, but I actually am not too shabby at writing T-SQL ... I just don't have as much time or patience to write out answers lately considering that we are never given DDL or sample data ... plus we have you around!


- Jeff
http://weblogs.sqlteam.com/JeffS

Edited by - jsmith8858 on 08/30/2007 16:31:41
Go to Top of Page

SwePeso
Patron Saint of Lost Yaks

Sweden
30281 Posts

Posted - 08/30/2007 :  16:31:50  Show Profile  Visit SwePeso's Homepage  Reply with Quote
Must been a copy and paste error. Now I get the correct result.
Well done!

I like having these discussion forth and back and trying to find flaws in each other's algorithms.


E 12°55'05.25"
N 56°04'39.16"

Edited by - SwePeso on 08/30/2007 16:39:38
Go to Top of Page

SwePeso
Patron Saint of Lost Yaks

Sweden
30281 Posts

Posted - 08/30/2007 :  16:47:47  Show Profile  Visit SwePeso's Homepage  Reply with Quote
Run some tests in SQL Profiler for test data posted last.
	CPU	DURATION	READS	WRITES
Jeff	0	1		210	0
Peso	0	1		162	0



E 12°55'05.25"
N 56°04'39.16"
Go to Top of Page

jsmith8858
Dr. Cross Join

USA
7423 Posts

Posted - 08/30/2007 :  16:57:30  Show Profile  Visit jsmith8858's Homepage  Reply with Quote
quote:

I like having these discussion forth and back and trying to find flaws in each other's algorithms.



Agreed! That's pretty much why I enjoy coming to this site.

It might be interesting to try the different methods with a decent amount of indexed data to see how things look.

- Jeff
http://weblogs.sqlteam.com/JeffS

Edited by - jsmith8858 on 08/30/2007 16:58:17
Go to Top of Page

SwePeso
Patron Saint of Lost Yaks

Sweden
30281 Posts

Posted - 08/30/2007 :  17:18:25  Show Profile  Visit SwePeso's Homepage  Reply with Quote
Tests done!
CREATE TABLE	#Test
		(
			a INT,
			d1 DATETIME,
			d2 DATETIME
		)
GO
INSERT		#Test
		(
			a,
			d1
		)
SELECT TOP 1000	ABS(CHECKSUM(NEWID())) % 2,
		25000 + ABS(CHECKSUM(NEWID())) % 25000
FROM		syscolumns AS c1
CROSS JOIN	syscolumns AS c2
GO
UPDATE	#Test
SET	d2 = DATEADD(DAY, ABS(CHECKSUM(NEWID())) % 30, d1)
GO
CREATE CLUSTERED INDEX ix_test on #Test (a, d1, d2)
GO
Average result from profiler for 20 runs
	CPU	DUR	READS	WRITES
Jeff	19406	19998	551549	0
Peso	  844	  916	 19605	0


E 12°55'05.25"
N 56°04'39.16"

Edited by - SwePeso on 08/31/2007 01:16:12
Go to Top of Page

SwePeso
Patron Saint of Lost Yaks

Sweden
30281 Posts

Posted - 08/30/2007 :  17:46:37  Show Profile  Visit SwePeso's Homepage  Reply with Quote
New tests!
CREATE TABLE	#Test
		(
			a INT,
			d1 DATETIME,
			d2 DATETIME
		)
GO
INSERT		#Test
		(
			a,
			d1
		)
SELECT TOP 2000	ABS(CHECKSUM(NEWID())) % 5,
		25000 + ABS(CHECKSUM(NEWID())) % 25000
FROM		syscolumns AS c1
CROSS JOIN	syscolumns AS c2
GO
UPDATE	#Test
SET	d2 = DATEADD(DAY, ABS(CHECKSUM(NEWID())) % 30, d1)
GO
CREATE CLUSTERED INDEX ix_test ON #Test (a, d1, d2)
GO
Average result from profiler for 10 runs
	CPU	DUR	READS	WRITES
Jeff	32219	33920	905521	1	
Peso	 2266	 2368	 36630	0


E 12°55'05.25"
N 56°04'39.16"

Edited by - SwePeso on 08/31/2007 01:11:17
Go to Top of Page

jsmith8858
Dr. Cross Join

USA
7423 Posts

Posted - 08/30/2007 :  19:01:27  Show Profile  Visit jsmith8858's Homepage  Reply with Quote
Wow! That's a huge difference! So much for the initial execution plan. I'll have to investigate this when I get a chance.

- Jeff
http://weblogs.sqlteam.com/JeffS
Go to Top of Page

SwePeso
Patron Saint of Lost Yaks

Sweden
30281 Posts

Posted - 08/31/2007 :  01:28:32  Show Profile  Visit SwePeso's Homepage  Reply with Quote
To both our defences, the tests show that the time and reads scales linear, not exponential

-- 1k	CPU	DURATION	READS		WRITES
Jeff	    0	     1		    210		0
Peso	    0	     1		    162		0
Peso 2	    0	    77		     31		0

-- 2k	CPU	DURATION	READS		WRITES
Jeff	19,406	19,998		551,549		0
Peso	   844	   916		 19,605		0
Peso 2	    15	    72		     56		0

-- 3k	CPU	DURATION	READS		WRITES
Jeff	39,359	41,557		956,584		3
Peso	 5,172	 5,381		 62,984		0
Peso 2	    16	    81		     91		0

-- 4k	CPU	DURATION	READS		WRITES
Jeff	52,235	55,708		1,237,692	0
Peso	 9,172	 9,492		   99,976	0
Peso 2	    31	    76		      106	0

-- 5k	CPU	DURATION	READS		WRITES
Jeff	95,438	100,383		2,202,898	0
Peso	13,953	 14,985		  140,024	0
Peso 2	    31	     78		      166	0

-- 6k	CPU	DURATION	READS		WRITES
Jeff	107,328	112,983		2,420,560	0
Peso	 20,968	 23,352		  192,023	1
Peso 2	     31	    118		      233	0

-- 10k	CPU	DURATION	READS		WRITES
Peso 2	     48	     97		      290	0

-- 100k	CPU	DURATION	READS		WRITES
Peso 2	    140	    273		    3,002	0

--1000k	CPU	DURATION	READS		WRITES
Peso 2	  1,593	  1,598		   15,322	0


E 12°55'05.25"
N 56°04'39.16"

Edited by - SwePeso on 04/07/2011 13:06:59
Go to Top of Page

SwePeso
Patron Saint of Lost Yaks

Sweden
30281 Posts

Posted - 05/13/2008 :  10:17:37  Show Profile  Visit SwePeso's Homepage  Reply with Quote
Also see
http://weblogs.sqlteam.com/peterl/archive/2008/05/13/Lightning-fast-collapsed-date-ranges-and-missing-date-ranges.aspx



E 12°55'05.25"
N 56°04'39.16"
Go to Top of Page

Michael Valentine Jones
Yak DBA Kernel (pronounced Colonel)

USA
7020 Posts

Posted - 05/13/2008 :  11:55:41  Show Profile  Reply with Quote
Hate to be dense (stupid) here, but can you give a little more explanation about the problem you are trying to solve?

Before I read through all the code, it would be helpful to know what it is supposed to do.






CODO ERGO SUM
Go to Top of Page

SwePeso
Patron Saint of Lost Yaks

Sweden
30281 Posts

Posted - 05/13/2008 :  12:53:35  Show Profile  Visit SwePeso's Homepage  Reply with Quote
With collapsed date ranges, the purpose is the merge overlapping date ranges as a few possible.
1 2 3 4 5 6 7 8 9
*****************
      |-----|
        |-|
|---|
  |-----|
*****************

See for example these four date ranges. They can be merged as one because they overlap.
Result should be one date range 1-7.

In this example, the four ranges can only be merged into two since there is a gap.
1 2 3 4 5 6 7 8 9
*****************
        |----|
           |--|
|---|
  |---|
*****************
Result is 1-4 and 5-8.
This example above also produces a gap between 4-5.

This is what the blog post is about, how to efficiently and fast get the wanted results.
Either the collapsed (merged) date ranges, or the missing (gaps) date ranges.



E 12°55'05.25"
N 56°04'39.16"
Go to Top of Page

lmontgomery74
Starting Member

1 Posts

Posted - 12/22/2010 :  18:18:54  Show Profile  Reply with Quote
Great post guys... Thanks!
Go to Top of Page

SwePeso
Patron Saint of Lost Yaks

Sweden
30281 Posts

Posted - 04/07/2011 :  11:32:32  Show Profile  Visit SwePeso's Homepage  Reply with Quote
Here is an algorithm that will work not only with days only, but also with millisecond precision and several years in the intervals.
;WITH cteSource(UserName, theTime, theGrp)
AS (
	SELECT		u.UserName,
			u.theTime,
			(DENSE_RANK() OVER (PARTITION BY u.UserName ORDER BY u.theTime) - 1) / 2 AS theGrp	-- Create artificial groups depending
														-- on the times. Omit duplicates.
	FROM		(
				-- Get all usernames and first start time and last endtime
				SELECT		a AS UserName,
						MIN(d1) AS minStartTime,
						MAX(d2) AS maxEndTIme
				FROM		#Test
				GROUP BY	a
			) AS usr
			-- This only get the intermediate gaps
	OUTER APPLY	(
				SELECT		s.StartTime,
						e.EndTime
				FROM		(
							-- Get all starttimes sorted
							SELECT	s.d1 AS StartTime,
								ROW_NUMBER() OVER (ORDER BY s.d1) AS SeqID
							FROM	#Test AS s
							WHERE	s.a = usr.UserName
						) AS s
				INNER JOIN	(
							-- Get all endtimes sorted
							SELECT	s.d2 AS EndTime,
								ROW_NUMBER() OVER (ORDER BY s.d2) + 1 AS SeqID
							FROM	#Test AS s
							WHERE	s.a = usr.UserName
						) AS e ON e.SeqID = s.SeqID	-- Match previous end time time against this starttime
				WHERE		e.EndTime < s.StartTime		-- If EndTime is less than starttime, this is a gap
			) AS bnd
	UNPIVOT		(
				-- Since the bnd table only get the intermediate gaps,
				-- add the first start time and last end time to the intervals
				theTime
				FOR theCol IN (usr.minStartTime, usr.maxEndTime, bnd.StartTime, bnd.EndTime)
			) AS u
)
-- For each artifical group, display the earliest and latest time
SELECT		UserName,
		MIN(theTime) AS StartTime,
		MAX(theTime) AS EndTime
FROM		cteSource
GROUP BY	UserName,
		theGrp;


N 56°04'39.26"
E 12°55'05.63"

Edited by - SwePeso on 04/07/2011 12:52:00
Go to Top of Page
  Previous Topic Topic Next Topic  
 New Topic  Reply to Topic
 Printer Friendly
Jump To:
SQL Server Forums © 2000-2009 SQLTeam Publishing, LLC Go To Top Of Page
This page was generated in 0.28 seconds. Powered By: Snitz Forums 2000