SQL Server Forums
Profile | Register | Active Topics | Members | Search | Forum FAQ
 
Register Now and get your question answered!
Username:
Password:
Save Password
Forgot your Password?

 All Forums
 SQL Server 2012 Forums
 Transact-SQL (2012)
 reduce unnecessary records
 New Topic  Reply to Topic
 Printer Friendly
Author Previous Topic Topic Next Topic  

sql_server_dba
Posting Yak Master

167 Posts

Posted - 08/21/2013 :  20:22:22  Show Profile  Reply with Quote
I have a history table which has the records that are not really necessary. Can anyone please help me how to reduce these records?

ItemNumber Start Date ItemValue End Date
111 12/15/2010 9:26 2 8/17/2011 14:06
111 8/17/2011 14:06 2 8/27/2011 3:36
111 8/27/2011 3:36 2 12/31/2099 0:00
222 12/15/2010 9:26 2 4/13/2011 18:36
222 4/13/2011 18:36 2 8/17/2011 14:06
222 8/17/2011 14:06 3 10/1/2011 19:40
222 10/1/2011 19:40 2 12/31/2099 0:00
333 12/15/2010 9:26 4 7/3/2011 1:37
333 7/2/2011 20:37 4 7/3/2011 1:37
333 7/3/2011 1:37 4 2/19/2012 1:36
333 2/19/2012 1:36 5 12/31/2099 0:00

Required output is below....
ItemNumber Start Date ItemValue End Date
111 12/15/2010 9:26 2 12/31/2099 0:00
222 12/15/2010 9:26 2 8/17/2011 14:06
222 8/17/2011 14:06 3 10/1/2011 19:40
222 10/1/2011 19:40 2 12/31/2099 0:00
333 12/15/2010 9:26 4 2/19/2012 1:36
333 2/19/2012 1:36 5 12/31/2099 0:00

bandi
Flowing Fount of Yak Knowledge

India
2224 Posts

Posted - 08/22/2013 :  00:38:01  Show Profile  Reply with Quote
On which logic those records should be reduced?
Explain the output with logic....

--
Chandu
Go to Top of Page

sql_server_dba
Posting Yak Master

167 Posts

Posted - 08/22/2013 :  01:14:03  Show Profile  Reply with Quote
Its the itemNumber and itemValue. For every change in ItemValue for an ItemNumber, a new record should be inserted. But in the current table,even when there was no change in the ItemValue, there is a new record. Hope this helps.
Go to Top of Page

bandi
Flowing Fount of Yak Knowledge

India
2224 Posts

Posted - 08/22/2013 :  02:14:59  Show Profile  Reply with Quote
Use case statement with LEAD() function
SELECT * 
FROM (SELECT *,CASE WHEN ItemValue = LEAD(ItemValue) OVER(PARTITION BY ItemNumber ORDER BY StartDate) THEN 0 ELSE 1 ENS Flag
         FROM TableNAme)T
WHERE T.Flag = 1

NOTE: LEAD() OVER() is available in MSSQL 2012
Refer this link for LAG() & LEAD() functions
http://www.itdeveloperzone.com/2012/04/lead-and-lag-functions-in-sql-server.html
--
Chandu

Edited by - bandi on 08/22/2013 02:16:56
Go to Top of Page

lazerath
Constraint Violating Yak Guru

USA
343 Posts

Posted - 08/22/2013 :  10:27:21  Show Profile  Reply with Quote
Bandi, I don't believe your solution solves the problem. sql_server_dba: you also want to collapse the date periods?
Go to Top of Page

lazerath
Constraint Violating Yak Guru

USA
343 Posts

Posted - 08/22/2013 :  11:11:15  Show Profile  Reply with Quote


DECLARE @StartingData TABLE (ItemNumber INT,StartDate DATETIME,ItemValue MONEY,EndDate DATETIME);
DECLARE @SimpleLEADResult TABLE (ItemNumber INT,StartDate DATETIME,ItemValue MONEY,EndDate DATETIME);
DECLARE @NewResult TABLE (ItemNumber INT,StartDate DATETIME,ItemValue MONEY,EndDate DATETIME);
DECLARE @DesiredResult TABLE (ItemNumber INT,StartDate DATETIME,ItemValue MONEY,EndDate DATETIME);

INSERT @StartingData
VALUES
(111,'12/15/2010 9:26',2,'8/17/2011 14:06'),
(111,'8/17/2011 14:06',2,'8/27/2011 3:36'),
(111,'8/27/2011 3:36',2,'12/31/2099 0:00'),
(222,'12/15/2010 9:26',2,'4/13/2011 18:36'),
(222,'4/13/2011 18:36',2,'8/17/2011 14:06'),
(222,'8/17/2011 14:06',3,'10/1/2011 19:40'),
(222,'10/1/2011 19:40',2,'12/31/2099 0:00'),
(333,'12/15/2010 9:26',4,'7/3/2011 1:37'),
(333,'7/2/2011 20:37',4,'7/3/2011 1:37'),
(333,'7/3/2011 1:37',4,'2/19/2012 1:36'),
(333,'2/19/2012 1:36',5,'12/31/2099 0:00');

INSERT @DesiredResult
VALUES
(111,'12/15/2010 9:26',2,'12/31/2099 0:00'),
(222,'12/15/2010 9:26',2,'8/17/2011 14:06'),
(222,'8/17/2011 14:06',3,'10/1/2011 19:40'),
(222,'10/1/2011 19:40',2,'12/31/2099 0:00'),
(333,'12/15/2010 9:26',4,'2/19/2012 1:36'),
(333,'2/19/2012 1:36',5,'12/31/2099 0:00');

INSERT @SimpleLEADResult
SELECT ItemNumber,StartDate,ItemValue,EndDate
FROM (SELECT *, CASE WHEN ItemValue = LEAD(ItemValue) OVER(PARTITION BY ItemNumber ORDER BY StartDate) THEN 0 ELSE 1 END AS Flag
		FROM @StartingData) T
WHERE T.Flag = 1;

/*
ItemNumber	StartDate	ItemValue	EndDate	Flag
111	2011-08-27 03:36:00.000	2.00	2099-12-31 00:00:00.000	1
222	2011-04-13 18:36:00.000	2.00	2011-08-17 14:06:00.000	1
222	2011-08-17 14:06:00.000	3.00	2011-10-01 19:40:00.000	1
222	2011-10-01 19:40:00.000	2.00	2099-12-31 00:00:00.000	1
333	2011-07-03 01:37:00.000	4.00	2012-02-19 01:36:00.000	1
333	2012-02-19 01:36:00.000	5.00	2099-12-31 00:00:00.000	1
*/

SELECT * FROM @DesiredResult EXCEPT SELECT * FROM @SimpleLEADResult;

/*  -- Rows from @DesiredResult that aren't the same as @SimpleLEADResult;
ItemNumber	StartDate	ItemValue	EndDate
111	2010-12-15 09:26:00.000	2.00	2099-12-31 00:00:00.000
222	2010-12-15 09:26:00.000	2.00	2011-08-17 14:06:00.000
333	2010-12-15 09:26:00.000	4.00	2012-02-19 01:36:00.000
*/

WITH cte1
AS
(
SELECT		*,
		ROW_NUMBER() OVER(PARTITION BY ItemNumber ORDER BY StartDate) AS SEQUENCE,
		CASE WHEN ItemValue = LEAD(ItemValue) OVER(PARTITION BY ItemNumber ORDER BY StartDate) THEN 0 ELSE 1 END AS ChangeFlag
FROM		@StartingData
)
,		cte2
AS
(
SELECT		*,
		MIN(CASE ChangeFlag WHEN 1 THEN SEQUENCE END) OVER(PARTITION BY ItemNumber ORDER BY SEQUENCE ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) SeqGroup
FROM		cte1
)
INSERT		@NewResult
SELECT		c.ItemNumber,	MIN(c.StartDate) AS StartDate, c.ItemValue,	MAX(c.EndDate) AS EndDate
FROM		cte2 AS c
GROUP BY	c.ItemNumber, c.ItemValue, c.SeqGroup
ORDER BY	c.ItemNumber, MIN(c.StartDate)

/*
ItemNumber	StartDate	ItemValue	EndDate
111	2010-12-15 09:26:00.000	2.00	2099-12-31 00:00:00.000
222	2010-12-15 09:26:00.000	2.00	2011-08-17 14:06:00.000
222	2011-08-17 14:06:00.000	3.00	2011-10-01 19:40:00.000
222	2011-10-01 19:40:00.000	2.00	2099-12-31 00:00:00.000
333	2010-12-15 09:26:00.000	4.00	2012-02-19 01:36:00.000
333	2012-02-19 01:36:00.000	5.00	2099-12-31 00:00:00.000
*/

SELECT * FROM @DesiredResult EXCEPT SELECT * FROM @NewResult

/*  -- Rows from @DesiredResult that aren't the same as @NewResult;
(0 row(s) affected)
*/


* Edit: I found a bug I introduced by putting aliases on cte2. Fixed.

Edited by - lazerath on 08/22/2013 11:30:36
Go to Top of Page

lazerath
Constraint Violating Yak Guru

USA
343 Posts

Posted - 08/22/2013 :  11:20:54  Show Profile  Reply with Quote
Also, while this solves your problem from the logical sense, it does not solve it from the logistical sense. How big is this history table? If you are working on systems with millions or billions of rows, you cannot simply run this query against it. You will likely need to batch this as well as a strategy to handle changes while the batch process is running. If your table is small and you can handle a brief outage, you might be able to get away with locking the table and doing a MERGE.
Go to Top of Page

sql_server_dba
Posting Yak Master

167 Posts

Posted - 08/22/2013 :  11:53:54  Show Profile  Reply with Quote
Lazerath...seems like Bandi solution solves the problem(Not sure if i missing anything there). Yeah, i want to collapse the periods too but only if the ItemValue changes. BTW..even you solution works with CTE's.

For example...for ItemNumber 111, item value didn't changed so i wanted to collapse all 3 records into one. But for ItemNumber 222...itemValue changed from 2 to 3 and then from 3 to 2. So i need 3 records for ItemNumber 222.

Edited by - sql_server_dba on 08/22/2013 12:06:13
Go to Top of Page

lazerath
Constraint Violating Yak Guru

USA
343 Posts

Posted - 08/22/2013 :  12:04:58  Show Profile  Reply with Quote
Take a look at the code I posted. You can run the whole thing and it walks you through the differences between Bandi's solution and the end state you were looking for. The major difference is that Bandi's solution did not collapse the records, all it did was identify the last item value record before a change. This is key to the final solution, but is not the solution in and of itself.

For instance, for item 111, you indicated this should be the result:
111 12/15/2010 9:26 2 12/31/2099 0:00

However, in Bandi's solution, it provides this:
111 8/27/2011 3:36 2 12/31/2099 0:00

Notice the Start Date. Bandi's solution does not collapse the 3 records into 1, it simply returns the last record. I had to add code to properly group sequential history records that did not change. This was accomplished with this code:

WITH cte1
AS
(
SELECT		*,
		ROW_NUMBER() OVER(PARTITION BY ItemNumber ORDER BY StartDate) AS SEQUENCE,
		CASE WHEN ItemValue = LEAD(ItemValue) OVER(PARTITION BY ItemNumber ORDER BY StartDate) THEN 0 ELSE 1 END AS ChangeFlag
FROM		@StartingData
)
,		cte2
AS
(
SELECT		*,
		MIN(CASE ChangeFlag WHEN 1 THEN SEQUENCE END) OVER(PARTITION BY ItemNumber ORDER BY SEQUENCE ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) SeqGroup
FROM		cte1
)
SELECT		c.ItemNumber,	MIN(c.StartDate) AS StartDate, c.ItemValue,	MAX(c.EndDate) AS EndDate
FROM		cte2 AS c
GROUP BY	c.ItemNumber, c.ItemValue, c.SeqGroup
ORDER BY	c.ItemNumber, MIN(c.StartDate)


cte1 gets the change boundaries as Bandi provided but adds in a Sequence. cte2 returns the MIN sequence with a change that is greater than or equal to the current record. This allows us to group sequential entries that do not change and produce the result you requested.
Go to Top of Page

sql_server_dba
Posting Yak Master

167 Posts

Posted - 08/22/2013 :  12:08:29  Show Profile  Reply with Quote
Good catch....Thanks Lazerath.
Go to Top of Page

lazerath
Constraint Violating Yak Guru

USA
343 Posts

Posted - 08/22/2013 :  12:23:10  Show Profile  Reply with Quote
No problem, glad to help.

As it just so happens, I found an optimization to eliminate the GROUP BY and replace the Sequence with a different boundary identifier:


WITH 		cte1
AS
(
SELECT		c.ItemNumber,c.StartDate,c.ItemValue,c.EndDate,
		CASE WHEN c.ItemValue = LAG(c.ItemValue) OVER(PARTITION BY c.ItemNumber ORDER BY c.StartDate) THEN 0 ELSE 1 END AS isChangeBoundaryBeg,
		CASE WHEN c.ItemValue = LEAD(c.ItemValue) OVER(PARTITION BY c.ItemNumber ORDER BY c.StartDate) THEN 0 ELSE 1 END AS isChangeBoundaryEnd
FROM		@StartingData AS c
)
/* --SELECT * FROM cte1
ItemNumber	StartDate	ItemValue	EndDate	isChangeBoundaryBeg	isChangeBoundaryEnd
111	2010-12-15 09:26:00.000	2.00	2011-08-17 14:06:00.000	1	0
111	2011-08-17 14:06:00.000	2.00	2011-08-27 03:36:00.000	0	0
111	2011-08-27 03:36:00.000	2.00	2099-12-31 00:00:00.000	0	1
222	2010-12-15 09:26:00.000	2.00	2011-04-13 18:36:00.000	1	0
222	2011-04-13 18:36:00.000	2.00	2011-08-17 14:06:00.000	0	1
222	2011-08-17 14:06:00.000	3.00	2011-10-01 19:40:00.000	1	1
222	2011-10-01 19:40:00.000	2.00	2099-12-31 00:00:00.000	1	1
333	2010-12-15 09:26:00.000	4.00	2011-07-03 01:37:00.000	1	0
333	2011-07-02 20:37:00.000	4.00	2011-07-03 01:37:00.000	0	0
333	2011-07-03 01:37:00.000	4.00	2012-02-19 01:36:00.000	0	1
333	2012-02-19 01:36:00.000	5.00	2099-12-31 00:00:00.000	1	1
*/
,		cte2
AS
(
SELECT		c.ItemNumber,
		MAX(CASE c.isChangeBoundaryBeg WHEN 1 THEN c.StartDate END) OVER(PARTITION BY c.ItemNumber ORDER BY c.StartDate ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS StartDate,
		c.ItemValue,
		c.EndDate,
		c.isChangeBoundaryEnd
FROM		cte1 as c
)
/* --SELECT * FROM cte2
ItemNumber	StartDate	ItemValue	EndDate	isChangeBoundaryEnd
111	2010-12-15 09:26:00.000	2.00	2011-08-17 14:06:00.000	0
111	2010-12-15 09:26:00.000	2.00	2011-08-27 03:36:00.000	0
111	2010-12-15 09:26:00.000	2.00	2099-12-31 00:00:00.000	1
222	2010-12-15 09:26:00.000	2.00	2011-04-13 18:36:00.000	0
222	2010-12-15 09:26:00.000	2.00	2011-08-17 14:06:00.000	1
222	2011-08-17 14:06:00.000	3.00	2011-10-01 19:40:00.000	1
222	2011-10-01 19:40:00.000	2.00	2099-12-31 00:00:00.000	1
333	2010-12-15 09:26:00.000	4.00	2011-07-03 01:37:00.000	0
333	2010-12-15 09:26:00.000	4.00	2011-07-03 01:37:00.000	0
333	2010-12-15 09:26:00.000	4.00	2012-02-19 01:36:00.000	1
333	2012-02-19 01:36:00.000	5.00	2099-12-31 00:00:00.000	1
*/
INSERT		@NewResult
SELECT		c.ItemNumber,c.StartDate,c.ItemValue,c.EndDate
FROM		cte2 AS c
WHERE		c.isChangeBoundaryEnd = 1
ORDER BY	c.ItemNumber, c.StartDate

/*
ItemNumber	StartDate	ItemValue	EndDate
111	2010-12-15 09:26:00.000	2.00	2099-12-31 00:00:00.000
222	2010-12-15 09:26:00.000	2.00	2011-08-17 14:06:00.000
222	2011-08-17 14:06:00.000	3.00	2011-10-01 19:40:00.000
222	2011-10-01 19:40:00.000	2.00	2099-12-31 00:00:00.000
333	2010-12-15 09:26:00.000	4.00	2012-02-19 01:36:00.000
333	2012-02-19 01:36:00.000	5.00	2099-12-31 00:00:00.000
*/


* Edit: Added data to show internal CTE results

Edited by - lazerath on 08/22/2013 12:45:29
Go to Top of Page

sql_server_dba
Posting Yak Master

167 Posts

Posted - 08/22/2013 :  17:10:00  Show Profile  Reply with Quote
Awesome...this is great!!! Thanks.
Go to Top of Page
  Previous Topic Topic Next Topic  
 New Topic  Reply to Topic
 Printer Friendly
Jump To:
SQL Server Forums © 2000-2009 SQLTeam Publishing, LLC Go To Top Of Page
This page was generated in 0.08 seconds. Powered By: Snitz Forums 2000