According to this SO answer, we have:
SELECT DISTINCT s.id
FROM students s
LEFT JOIN grades g ON g.student_id = s.id
WHERE g.student_id IS NOT NULL
Which uses a LEFT JOIN. But it can be implemented with a "semijoin" like this:
SELECT s.id
FROM students s
WHERE EXISTS (SELECT 1 FROM grades g
WHERE g.student_id = s.id)
How do you write these two queries in JavaScript, given two collections students and grades? The first one I think is implemented with a "nested loop join" (or could be optimized with an index join data structure, but I don't know how to implement that just yet).
const query1Results = nestedJoin(students, grades, (s, g) => {
return Boolean(g.student_id && g.student_id === s.id)
}).map(([s]) => s.id)
function nestedJoin(R, S, compare) {
const out = []
for (const r of R) {
for (const s of S) {
if (compare(r, s)) {
out.push([ r, s ])
}
}
}
return out
}
How would you do the second one though?
const query2Results = semiJoin(students, grades, (s, g) => {
return g.student_id === s.id
}).map(([s]) => s.id)
function semiJoin(R, S) {
const out = []
for (const r of R) {
for (const s of S) {
if (compare(r, s)) {
out.push([ r, s ])
}
}
}
return out
}
Basically, I implemented it the same way.
const students = [
{ id: 1 },
{ id: 2 },
{ id: 3 },
{ id: 4 },
{ id: 5 },
{ id: 6 },
]
const grades = [
{ id: 1, student_id: 1 },
{ id: 2, student_id: 1 },
{ id: 3, student_id: 1 },
{ id: 4, student_id: 1 },
{ id: 5, student_id: 1 },
{ id: 6, student_id: 1 },
{ id: 10, student_id: 2 },
{ id: 20, student_id: 2 },
{ id: 30, student_id: 2 },
{ id: 40, student_id: 2 },
{ id: 50, student_id: 2 },
{ id: 60, student_id: 2 },
{ id: 100, student_id: 3 },
{ id: 200, student_id: 3 },
{ id: 300, student_id: 3 },
{ id: 400, student_id: 3 },
{ id: 500, student_id: 3 },
{ id: 600, student_id: 3 },
]
const expectedOutput = [
1, 2, 3
]
CodePudding user response:
This really does depend on your table design and indexes but assuming you have one on grades.student_id, the JS equivalent might look like
const students = [{"id":1},{"id":2},{"id":3},{"id":4},{"id":5},{"id":6}]
const grades = [{"id":1,"student_id":1},{"id":2,"student_id":1},{"id":3,"student_id":1},{"id":4,"student_id":1},{"id":5,"student_id":1},{"id":6,"student_id":1},{"id":10,"student_id":2},{"id":20,"student_id":2},{"id":30,"student_id":2},{"id":40,"student_id":2},{"id":50,"student_id":2},{"id":60,"student_id":2},{"id":100,"student_id":3},{"id":200,"student_id":3},{"id":300,"student_id":3},{"id":400,"student_id":3},{"id":500,"student_id":3},{"id":600,"student_id":3}]
// index
const idxGradesStudentId = new Set(grades.map(({ student_id }) => student_id))
function semiJoin(R, S) {
// WHERE EXISTS
return R.filter(({ id }) => idxGradesStudentId.has(id))
.map(({ id }) => id) // SELECT
}
console.log(semiJoin(students, grades))
So rather than a nested loop, this uses the index on grades.student_id (represented by a Set) to evaluate the EXISTS clause (Set.prototype.has()) which has the benefit of O(1) lookup time complexity.
