JOIN to a row that does not yet exist

Please start any new threads on our new site at https://forums.sqlteam.com. We've got lots of great SQL Server experts to answer whatever question you can come up with.

All Forums

SQL Server 2000 Forums

SQL Server Development (2000)

JOIN to a row that does not yet exist

Author

Topic

jmottle
Starting Member

2 Posts

Posted - 2007-03-18 : 05:04:11

I'm working on a voting script for an image competition, but have run into a bit of a snag.

For simplicity of the description there are two tables:

Table1
ID
NID
ImageName

Table2
ID
NID
VoterID
Score

The application will call Table 1 to list all of the records and allow the user to submit a score for each record one at a time, which will then be stored in Table 2.

This is easy if I just display each record in Table 1 in order, but what I want to do it display them randomly, so that every image has a chance of being scored should the voter not score all images.

My question is how do I create a recordset that displays the images from Table 1 AND have the query check Table 2 to see that the image has not yet been voted upon by that voterID?

The only thing I could think of is create a loop that is run the first time each user acceses the voting page, which would write every NID from Table 1 into Table2 along with thier VoterID. This way I can JOIN the two tables.

Given that there will be around 200-300 images and potentially 500-1000 voters, it strikes me that this is not a very eloquent way to do this as it writes rows that may never have a vote assigned to them.

Any help would be greatly appreciated.

Cheers,
Jeff

SwePeso
Patron Saint of Lost Yaks

30421 Posts

Posted - 2007-03-18 : 05:07:38

SELECT Table1.*
FROM Table1
WHERE NOT EXISTS (SELECT NULL FROM Table2 WHERE Table2.NID = Table1.NID AND Table2.VoterID = @VoterID)

Peter Larsson
Helsingborg, Sweden

jmottle
Starting Member

2 Posts

Posted - 2007-03-18 : 05:17:20

Excellent Thank you!

rockmoose
SQL Natt Alfen

3279 Posts

Posted - 2007-03-18 : 06:57:48

Is there one syntax per developer for the NOT EXISTS?

WHERE NOT EXISTS(SELECT NULL ....)
WHERE NOT EXISTS(SELECT 0 ....)
WHERE NOT EXISTS(SELECT 1 ....)
WHERE NOT EXISTS(SELECT table1.ID ....)

Seen them all!
Come on, consensus!
This is one case where * is the way to go:
WHERE NOT EXISTS(SELECT * ....)

rockmoose

SwePeso
Patron Saint of Lost Yaks

30421 Posts

Posted - 2007-03-18 : 07:25:49

Is * faster? Do the SQL Server do a column lookup?

Peter Larsson
Helsingborg, Sweden

rockmoose
SQL Natt Alfen

3279 Posts

Posted - 2007-03-18 : 08:11:53

quote:
Originally posted by Peso

Is * faster? Do the SQL Server do a column lookup?

Peter Larsson
Helsingborg, Sweden

Mainly a syntactical issue.

It is possible that column lookup is done in the parse step, but I don't know. But any column lookups would be unnecessary, and if they are done I think it would be an implementation flaw.

As for empirical tests, I have never seen any performance difference.
And no difference in the plans.

rockmoose

SwePeso
Patron Saint of Lost Yaks

30421 Posts

Posted - 2007-03-18 : 10:35:07

I created this test code

SET NOCOUNT ON

-- Prepare sample data
CREATE TABLE	#Table1
		(
			NID INT PRIMARY KEY CLUSTERED
		)

INSERT	#Table1
SELECT	Number
FROM	F_TABLE_NUMBER_RANGE(0, 99999)

CREATE TABLE	#Table2
		(
			ID INT PRIMARY KEY CLUSTERED,
			NID INT,
			VoterID INT
		)

INSERT	#Table2
SELECT	Number,
	ABS(CAST(CAST(NEWID() AS VARBINARY(32)) AS BIGINT)) % 100000,
	ABS(CAST(CAST(NEWID() AS VARBINARY(32)) AS BIGINT)) % 200
FROM	F_TABLE_NUMBER_RANGE(0, 999999)

CREATE TABLE	#Times
		(
			TryName VARCHAR(20),
			ms INT
		)
GO

-- Start time testing
DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

DECLARE	@i INT,
	@dt DATETIME,
	@d INT

SET	@i = 101

WHILE @i > 1
	BEGIN
		SET	@dt = CURRENT_TIMESTAMP

		SELECT	@d = COUNT(*)
		FROM	#Table1 AS t1
		WHERE	NOT EXISTS (SELECT t2.NID FROM #Table2 AS t2 WHERE t2.NID = t1.NID AND t2.VoterID = 5)

		INSERT	#Times
		VALUES	('Try 1 with t2.NID', DATEDIFF(MILLISECOND, @dt, CURRENT_TIMESTAMP))

		SET	@i = @i - 1
	END
GO

DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

DECLARE	@i INT,
	@dt DATETIME,
	@d INT

SET	@i = 101

WHILE @i > 1
	BEGIN
		SET	@dt = CURRENT_TIMESTAMP

		SELECT	@d = COUNT(*)
		FROM	#Table1 AS t1
		WHERE	NOT EXISTS (SELECT 0 FROM #Table2 AS t2 WHERE t2.NID = t1.NID AND t2.VoterID = 5)

		INSERT	#Times
		VALUES	('Try 2 with 0', DATEDIFF(MILLISECOND, @dt, CURRENT_TIMESTAMP))

		SET	@i = @i - 1
	END
GO

DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

DECLARE	@i INT,
	@dt DATETIME,
	@d INT

SET	@i = 101

WHILE @i > 1
	BEGIN
		SET	@dt = CURRENT_TIMESTAMP

		SELECT	@d = COUNT(*)
		FROM	#Table1 AS t1
		WHERE	NOT EXISTS (SELECT 1 FROM #Table2 AS t2 WHERE t2.NID = t1.NID AND t2.VoterID = 5)

		INSERT	#Times
		VALUES	('Try 3 with 1', DATEDIFF(MILLISECOND, @dt, CURRENT_TIMESTAMP))

		SET	@i = @i - 1
	END
GO

DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

DECLARE	@i INT,
	@dt DATETIME,
	@d INT

SET	@i = 101

WHILE @i > 1
	BEGIN
		SET	@dt = CURRENT_TIMESTAMP

		SELECT	@d = COUNT(*)
		FROM	#Table1 AS t1
		WHERE	NOT EXISTS (SELECT NULL FROM #Table2 AS t2 WHERE t2.NID = t1.NID AND t2.VoterID = 5)

		INSERT	#Times
		VALUES	('Try 4 with NULL', DATEDIFF(MILLISECOND, @dt, CURRENT_TIMESTAMP))

		SET	@i = @i - 1
	END
GO

DBCC FREEPROCCACHE
DBCC DROPCLEANBUFFERS

DECLARE	@i INT,
	@dt DATETIME,
	@d INT

SET	@i = 101

WHILE @i > 1
	BEGIN
		SET	@dt = CURRENT_TIMESTAMP

		SELECT	@d = COUNT(*)
		FROM	#Table1 AS t1
		WHERE	NOT EXISTS (SELECT * FROM #Table2 AS t2 WHERE t2.NID = t1.NID AND t2.VoterID = 5)

		INSERT	#Times
		VALUES	('Try 5 with *', DATEDIFF(MILLISECOND, @dt, CURRENT_TIMESTAMP))

		SET	@i = @i - 1
	END
GO

And then wrote some code to see the statistics like this

-- Show normal statistics
SELECT		TryName,
		COUNT(*) AS [COUNT],
		MIN(ms) AS [MIN],
		MAX(ms) AS [MAX],
		AVG(ms) AS [AVG],
		STDEV(ms) AS [STDEV]
FROM		#Times
GROUP BY	TryName
ORDER BY	1

-- Show distribution
SELECT		ms,
		SUM(CASE WHEN TryName = 'Try 1 with t2.NID' THEN 1 ELSE 0 END) AS [Try 1 with t2.NID],
		SUM(CASE WHEN TryName = 'Try 2 with 0' THEN 1 ELSE 0 END) AS [Try 2 with 0],
		SUM(CASE WHEN TryName = 'Try 3 with 1' THEN 1 ELSE 0 END) AS [Try 3 with 1],
		SUM(CASE WHEN TryName = 'Try 4 with NULL' THEN 1 ELSE 0 END) AS [Try 4 with NULL],
		SUM(CASE WHEN TryName = 'Try 5 with *' THEN 1 ELSE 0 END) AS [Try 5 with *]
FROM		#Times
GROUP BY	ms
ORDER BY	1

These are the two outputs I got. You may get other values, but they should be somewhat consistent.
BTW, I am running on SQL Server 2005 Developer Edition 9.00.3152.00

TryName			COUNT	MIN	MAX	AVG	STDEV
-----------------	-----	---	---	---	----------------
Try 1 with t2.NID	100	123	160	138	8.39213195724899
Try 2 with 0		100	123	170	140	9.45216559014041
Try 3 with 1		100	123	156	138	6.94579300037365
Try 4 with NULL		100	123	156	138	6.89806736219163	-- Smallest spread of time
Try 5 with *		100	123	156	141	7.23359167252670


ms	Try 1 with t2.NID	Try 2 with 0	Try 3 with 1	Try 4 with NULL		Try 5 with *
---	-----------------	------------	------------	---------------		------------
123	11			9		9		9			4
126	9			6		8		6			3
140	58			54		65		64			61
143	14			14		15		17			17
153	1			3		0		1			3
156	6			11		3		3			12
160	1			2		0		0			0
170	0			1		0		0			0

The second output is interpreted like this

Try 1 has 20% of the total times at the two fastest times
Try 2 has 15% of the total times at the two fastest times
Try 3 has 17% of the total times at the two fastest times
Try 4 has 15% of the total times at the two fastest times
Try 5 has 7% of the total times at the two fastest times

and

Try 1 has 8% of the total times at the four slowest times
Try 2 has 17% of the total times at the four slowest times
Try 3 has 3% of the total times at the four slowest times
Try 4 has 4% of the total times at the four slowest times
Try 5 has 15% of the total times at the four slowest times

Peter Larsson
Helsingborg, Sweden

EDIT: Typo spotted by Rockmoose

rockmoose
SQL Natt Alfen

3279 Posts

Posted - 2007-03-18 : 11:46:17

Hi Peso,

In you sample code, the last test *, actually used NULL.

But I did do run your code both on SQL2000 + SQL2005 and the differences are just statistical noise.
Depending on which run I choose, I can claim that either method is faster than the other..

It is still a syntactical issue. * is the only method mentioned in the SQL standard (afaik).
NULL, 0 and any other construction would just be a hack to come around a particular DBMS implementation flaw.
Ok, if there really is a flaw, then a workaround might be warranted, but in the case of SQL2000-> it is not.

I can't make any assertions for any other dbms versions.

rockmoose

SwePeso
Patron Saint of Lost Yaks

30421 Posts

Posted - 2007-03-18 : 12:44:13

I run the test again with 1000 iterations per method.

TryName			COUNT	MIN	MAX	AVG	STDEV
-----------------	-----	---	---	---	----------------
Try 1 with t2.NID	1000	123	343	145	16.3989183949590
Try 2 with 0		1000	123	576	177	46.0588051574564
Try 3 with 1		1000	123	500	154	35.1403417544368
Try 4 with NULL		1000	123	173	137	9.12899965136568	-- Smallest spred of time
Try 5 with *		1000	123	466	141	17.6453881715230

ms	Try 1 with t2.NID	Try 2 with 0	Try 3 with 1	Try 4 with NULL		Try 5 with *
---	-----------------	------------	------------	---------------		------------
123	55			5		56		134			107
126	42			8		65		132			109
136	0			0		0		1			0
140	489			104		409		539			509
143	122			23		89		128			108
153	38			44		36		12			17
156	171			201		111		40			105
160	9			13		12		4			8
170	20			133		38		4			8
173	33			181		45		6			12
186	8			104		39		0			4
190	1			32		12		0			1
200	0			4		5		0			0
203	3			46		24		0			3
206	0			0		1		0			0
216	2			12		4		0			0
220	3			17		12		0			3
233	0			12		2		0			3
236	0			6		3		0			0
246	0			1		0		0			0
250	0			13		11		0			1
263	0			2		1		0			0
266	0			3		8		0			0
280	2			3		4		0			1
283	0			4		4		0			0
296	1			4		2		0			0
300	0			1		1		0			0
310	0			1		2		0			0
313	0			2		0		0			0
326	0			3		0		0			0
330	0			0		1		0			0
343	1			1		0		0			0
356	0			1		0		0			0
360	0			2		0		0			0
373	0			1		0		0			0
376	0			1		0		0			0
390	0			2		0		0			0
420	0			1		0		0			0
423	0			1		1		0			0
436	0			3		0		0			0
466	0			1		0		0			1
483	0			1		1		0			0
500	0			0		1		0			0
563	0			2		0		0			0
576	0			1		0		0			0

Even if I delete the "abnormal" times for each method, I still get this

TryName			COUNT	MIN	MAX	AVG	STDEV
-----------------	-----	---	---	---	----------------
Try 1 with t2.NID	987	123	186	144	11.5294503399708
Try 2 with 0		898	123	203	166	17.3861444199841
Try 3 with 1		941	123	203	148	18,2964600882312
Try 4 with NULL		1000	123	173	137	9.12899965136568	-- Smallest spred of time
Try 5 with *		987	123	186	139	10.7961069275478


ms	Try 1 with t2.NID	Try 2 with 0	Try 3 with 1	Try 4 with NULL		Try 5 with *
---	-----------------	------------	------------	---------------		------------
123	55			5		56		134			107
126	42			8		65		132			109
136	0			0		0		1			0
140	489			104		409		539			509
143	122			23		89		128			108
153	38			44		36		12			17
156	171			201		111		40			105
160	9			13		12		4			8
170	20			133		38		4			8
173	33			181		45		6			12
186	8			104		39		0			4
190	0			32		12		0			0
200	0			4		5		0			0
203	0			46		24		0			0

If asterisk (*) is standard, then it is right thing to use.

Peter Larsson
Helsingborg, Sweden

rockmoose
SQL Natt Alfen

3279 Posts

Posted - 2007-03-18 : 19:25:49

I still think it is statistical noise

And that (*) is the right way to go.

rockmoose

Subscribe to SQLTeam.com

SQLTeam.com Articles via RSS

SQLTeam.com Weblog via RSS

- Advertisement -

Resources