Interview sql

🚀 Find Duplicate Records

SELECT col1, col2, COUNT(*)
FROM table_name
GROUP BY col1, col2
HAVING COUNT(*) > 1;

🚀 Second Highest Salary

SELECT MAX(salary)
FROM employees
WHERE salary < (SELECT MAX(salary) FROM employees);

🚀 Nth Highest Salary

SELECT salary
FROM (
SELECT salary, DENSE_RANK() OVER (ORDER BY salary DESC) rnk
FROM employees
) t
WHERE rnk = N;

🚀 Top 3 Sales per Region

SELECT *
FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY region ORDER BY sales DESC) rn
FROM sales
) t
WHERE rn <= 3;

🚀 Consecutive Purchases

SELECT customer_id
FROM (
SELECT customer_id, order_date,
LAG(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) prev_date
FROM orders
) t
WHERE order_date = prev_date + INTERVAL '1' DAY;

🚀Remove Duplicates

DELETE FROM table_name
WHERE id NOT IN (
SELECT MIN(id)
FROM table_name
GROUP BY col1, col2
);

🚀 Write a query to find the department with the highest average salary for employees who have been with the company for more than 2 years.

-emp : name , id , sal , hiredt , deptid

-dept : deptid , deptname

SELECT d.deptid, d.deptname FROM emp e JOIN dept d ON e.deptid = d.deptid WHERE e.hiredt <= ADD_MONTHS(CURRENT_DATE, -24) GROUP BY d.deptid, d.deptname QUALIFY RANK() OVER (ORDER BY AVG(e.sal) DESC) = 1;


🔁 Alternative (Without QUALIFY)

SELECT deptid, deptname FROM ( SELECT d.deptid, d.deptname, AVG(e.sal) AS avg_sal, RANK() OVER (ORDER BY AVG(e.sal) DESC) AS rnk FROM emp e JOIN dept d ON e.deptid = d.deptid WHERE e.hiredt <= ADD_MONTHS(CURRENT_DATE, -24) GROUP BY d.deptid, d.deptname ) t WHERE rnk = 1;

🚀 JOIN Tables

Table: A

id
1
1
2
NULL


Table: B

id
1
1
3
NULL

🧠 Quick Memory

👉 Duplicates → Multiply
👉 NULL → No match


JOIN TYPERESULT COUNTWHY
INNER JOIN4Only matching → (1×1 duplicates → 2×2 = 4)
LEFT JOIN6All A rows + matches → 4 (for 1s) + 2 (2, NULL)
RIGHT JOIN6All B rows + matches → 4 (for 1s) + 2 (3, NULL)
FULL JOIN8All rows from both → 4 matches + 2 (A unmatched) + 2 (B unmatched)




🧾 Input Tables

Table A

id
1
1
1
1

👉 4 rows (all 1)

Table B

id
1
1
1

👉 3 rows (all 1)

🔗 Join Logic

👉 Every row in A matches with every row in B
👉 So:

👉 Total rows = 4 × 3 = 12





🚀 Second highest salary in each dept


SELECT deptid, sal
FROM (
    SELECT deptid, sal,
           DENSE_RANK() OVER (PARTITION BY deptid ORDER BY sal DESC) AS rnk
    FROM emp
) t
WHERE rnk = 2;


🚀 ✅ Swap Gender Using UPDATE

UPDATE emp
SET gender = CASE 
                WHEN gender = 'M' THEN 'F'
                WHEN gender = 'F' THEN 'M'
             END;


🚀 Find Customers Who Appeared More Than Once

SELECT customer_id, COUNT(*) AS cnt
FROM orders
GROUP BY customer_id
HAVING COUNT(*) > 1;

👉 Customers with multiple orders


🚀  Find Customers Who Purchased More Than Once on Different Days

SELECT customer_id
FROM orders
GROUP BY customer_id
HAVING COUNT(DISTINCT order_date) > 1;

🚀  Find First-Time vs Repeat Customers

SELECT customer_id,
CASE
WHEN COUNT(*) = 1 THEN 'New'
ELSE 'Repeat'
END AS customer_type
FROM orders
GROUP BY customer_id;

🚀  Find Customers Who Purchased Every Month

SELECT customer_id
FROM orders
GROUP BY customer_id
HAVING COUNT(DISTINCT EXTRACT(MONTH FROM order_date)) = 12;

🚀  Find Customers Who Purchased Again Within 7 Days

SELECT customer_id
FROM (
SELECT customer_id, order_date,
LAG(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) prev_date
FROM orders
) t
WHERE order_date <= prev_date + INTERVAL '7' DAY;

🚀  Find Customers Who Did NOT Repeat

SELECT customer_id
FROM orders
GROUP BY customer_id
HAVING COUNT(*) = 1;

🚀  Find Most Frequent Customer

SELECT customer_id
FROM orders
GROUP BY customer_id
ORDER BY COUNT(*) DESC
FETCH FIRST 1 ROW ONLY;

🚀  Find Customers Who Bought Same Product Multiple Times

SELECT customer_id, product_id
FROM orders
GROUP BY customer_id, product_id
HAVING COUNT(*) > 1;

🚀 Find Customers With Increasing Purchases

SELECT customer_id
FROM (
SELECT customer_id, amount,
LAG(amount) OVER (PARTITION BY customer_id ORDER BY order_date) prev_amt
FROM orders
) t
WHERE amount > prev_amt;



🚀 Pivot vs CASE WHEN + SUM()

👉 Both are used to convert rows → columns (pivoting)

🧾 Sample Table: sales

regionamount
East100
West200
East150
West300

🔹 1️⃣ Using CASE WHEN + SUM()

SELECT
SUM(CASE WHEN region = 'East' THEN amount END) AS East,
SUM(CASE WHEN region = 'West' THEN amount END) AS West
FROM sales;

✅ Output

EastWest
250500

🔹 2️⃣ Using PIVOT

SELECT *
FROM sales
PIVOT (
SUM(amount)
FOR region IN ('East', 'West')
);

⚖️ Key Differences

FeaturePIVOTCASE + SUM
SyntaxSimpleMore verbose
FlexibilityLimitedVery flexible
Dynamic columnsHardEasier
DB supportNot all DBsWorks everywhere
ReadabilityCleanerSlightly complex

🔥 When to Use What?

✅ Use PIVOT when:

  • Few known columns
  • Need cleaner query

✅ Use CASE WHEN when:

  • Dynamic values
  • Complex conditions
  • Better control

Employee visiting office floor

🧾 Sample Table: visits

emp_idfloorvisit_time
1110:00
1210:10
1110:20
2311:00
2311:10

🚀 1️⃣ Most Visited Floor per Employee
SELECT emp_id, floor
FROM (
SELECT emp_id, floor, COUNT(*) cnt,
RANK() OVER (PARTITION BY emp_id ORDER BY COUNT(*) DESC) rnk
FROM visits
GROUP BY emp_id, floor
) t
WHERE rnk = 1;

🚀 2️⃣ First Floor Visited

SELECT emp_id,
FIRST_VALUE(floor) OVER (PARTITION BY emp_id ORDER BY visit_time) AS first_floor
FROM visits;

🚀 3️⃣ Last Floor Visited

SELECT emp_id,
LAST_VALUE(floor) OVER (
PARTITION BY emp_id
ORDER BY visit_time
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_floor
FROM visits;

🚀 4️⃣ Consecutive Same Floor Visits

SELECT emp_id, floor
FROM (
SELECT emp_id, floor,
LAG(floor) OVER (PARTITION BY emp_id ORDER BY visit_time) prev_floor
FROM visits
) t
WHERE floor = prev_floor;

🚀 5️⃣ Floor Change Detection

SELECT emp_id, floor, visit_time
FROM (
SELECT emp_id, floor, visit_time,
LAG(floor) OVER (PARTITION BY emp_id ORDER BY visit_time) prev_floor
FROM visits
) t
WHERE floor != prev_floor OR prev_floor IS NULL;

🚀 6️⃣ Total Floors Visited per Employee

SELECT emp_id, COUNT(DISTINCT floor) AS total_floors
FROM visits
GROUP BY emp_id;

👉 “Find FIRST total floor visits and resources used per employee”
(usually means: first floor visited + total visits + total distinct floors)




"new vs. repeat customer" problem













Comments

Popular posts from this blog

work

Git

Docker/Airflow