跳到主要内容

SQL 面试高频题精选

问题

数据分析面试中常考的 SQL 题有哪些?怎么快速解题?

答案

本文精选了 15 道数据分析岗位高频 SQL 面试题,覆盖聚合、窗口函数、CTE、子查询等核心知识点。每道题都提供题意分析、解题思路和完整 SQL

面试技巧
  1. 先理解题意:明确输入/输出,确认边界条件
  2. 说出思路:先口述再写 SQL,展示分析能力
  3. 从简到繁:先写核心逻辑,再优化
  4. 测试边缘情况:NULL、空表、重复数据

题目 1:第 N 高的薪水

题意:编写 SQL 查询第 N 高的薪水。如果不存在第 N 高,返回 NULL。

-- 方法 1:DENSE_RANK(推荐,处理并列情况)
SELECT DISTINCT salary AS nth_salary
FROM (
SELECT salary, DENSE_RANK() OVER (ORDER BY salary DESC) AS rk
FROM employees
) t
WHERE rk = 3; -- N=3

-- 方法 2:子查询
-- "比我高的去重薪资恰好有 N-1 个"
SELECT DISTINCT salary
FROM employees e1
WHERE 2 = ( -- N-1 = 2
SELECT COUNT(DISTINCT salary)
FROM employees e2
WHERE e2.salary > e1.salary
);

-- 方法 3:LIMIT + OFFSET
SELECT DISTINCT salary
FROM employees
ORDER BY salary DESC
LIMIT 1 OFFSET 2; -- OFFSET = N-1
-- 注意:这种方式在不存在第 N 高时返回空而非 NULL
为什么用 DENSE_RANK 而非 ROW_NUMBER?

如果两个人薪资相同都是第 2 高,ROW_NUMBER 会给他们不同的排名(2 和 3),而 DENSE_RANK 会给他们相同的排名(都是 2),下一个人的排名是 3。薪水排名通常希望并列不跳号,所以用 DENSE_RANK


题目 2:连续 N 天登录

题意:找出连续登录 3 天及以上的用户 ID。

-- 差值分组法
WITH ranked AS (
SELECT user_id, login_date,
login_date - INTERVAL ROW_NUMBER() OVER (
PARTITION BY user_id ORDER BY login_date
) DAY AS group_date
FROM (SELECT DISTINCT user_id, DATE(login_time) AS login_date FROM logins) t
)
SELECT DISTINCT user_id
FROM ranked
GROUP BY user_id, group_date
HAVING COUNT(*) >= 3;

解题要点:先去重(同一天多次登录算一天),然后用日期减行号分组。


题目 3:每个部门薪资前 3 名

题意:查询每个部门中薪资排名前 3 的员工。

SELECT department, name, salary, rk
FROM (
SELECT
department, name, salary,
DENSE_RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS rk
FROM employees
) t
WHERE rk <= 3;

变体:如果要求严格前 3 名(不含并列),用 ROW_NUMBER 替代 DENSE_RANK


题目 4:同比环比计算

题意:计算每月销售额的环比增长率和同比增长率。

WITH monthly AS (
SELECT
DATE_FORMAT(order_date, '%Y-%m') AS month,
SUM(amount) AS revenue
FROM orders
GROUP BY DATE_FORMAT(order_date, '%Y-%m')
)
SELECT
month,
revenue,
-- 环比:与上月对比
LAG(revenue) OVER (ORDER BY month) AS prev_month,
ROUND((revenue - LAG(revenue) OVER (ORDER BY month))
/ NULLIF(LAG(revenue) OVER (ORDER BY month), 0) * 100, 2) AS mom_pct,
-- 同比:与去年同月对比
LAG(revenue, 12) OVER (ORDER BY month) AS prev_year,
ROUND((revenue - LAG(revenue, 12) OVER (ORDER BY month))
/ NULLIF(LAG(revenue, 12) OVER (ORDER BY month), 0) * 100, 2) AS yoy_pct
FROM monthly;

题目 5:找出从未下过单的用户

题意:查找注册了但从未下过单的用户。

-- 方法 1:LEFT JOIN + IS NULL(推荐,通常最快)
SELECT u.id, u.name
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE o.id IS NULL;

-- 方法 2:NOT EXISTS
SELECT u.id, u.name
FROM users u
WHERE NOT EXISTS (SELECT 1 FROM orders o WHERE o.user_id = u.id);

-- 方法 3:NOT IN(注意 NULL 陷阱!)
SELECT id, name FROM users
WHERE id NOT IN (SELECT user_id FROM orders WHERE user_id IS NOT NULL);
NOT IN 的 NULL 陷阱

如果子查询结果包含 NULL,NOT IN 会返回空集!

-- 假设 orders 中有 user_id = NULL 的记录
SELECT * FROM users WHERE id NOT IN (SELECT user_id FROM orders);
-- 返回空集!因为 id NOT IN (1, 2, NULL) 对任何 id 都不为 TRUE

题目 6:累计百分比(帕累托分析)

题意:计算每个产品的销售额占比和累计占比,找出贡献 80% 销售额的产品。

WITH product_sales AS (
SELECT product_id, SUM(amount) AS total_sales
FROM orders GROUP BY product_id
),
ranked AS (
SELECT
product_id,
total_sales,
ROUND(total_sales / SUM(total_sales) OVER () * 100, 2) AS pct,
ROUND(SUM(total_sales) OVER (ORDER BY total_sales DESC)
/ SUM(total_sales) OVER () * 100, 2) AS cumulative_pct
FROM product_sales
)
SELECT *,
CASE WHEN cumulative_pct <= 80 THEN '核心产品' ELSE '长尾产品' END AS category
FROM ranked
ORDER BY total_sales DESC;

题目 7:行转列

题意:原始数据是每行一个学生一门课的成绩,转为一行显示所有科目。

原始数据:

studentsubjectscore
张三语文90
张三数学85
张三英语92

期望结果:

student语文数学英语
张三908592
SELECT 
student,
MAX(CASE WHEN subject = '语文' THEN score END) AS '语文',
MAX(CASE WHEN subject = '数学' THEN score END) AS '数学',
MAX(CASE WHEN subject = '英语' THEN score END) AS '英语'
FROM scores
GROUP BY student;

题目 8:连续增长

题意:找出连续 3 个月营收增长的月份。

WITH monthly AS (
SELECT
DATE_FORMAT(order_date, '%Y-%m') AS month,
SUM(amount) AS revenue
FROM orders GROUP BY month
),
with_growth AS (
SELECT month, revenue,
revenue > LAG(revenue) OVER (ORDER BY month) AS is_growth
FROM monthly
),
grouped AS (
SELECT month, revenue, is_growth,
-- 对"是否增长"做差值分组
ROW_NUMBER() OVER (ORDER BY month)
- ROW_NUMBER() OVER (PARTITION BY is_growth ORDER BY month) AS grp
FROM with_growth
)
SELECT month, revenue
FROM grouped
WHERE is_growth = TRUE
GROUP BY grp, month, revenue
HAVING COUNT(*) OVER (PARTITION BY grp) >= 3;

-- 更简洁的写法:用 LAG 检查前 2 个月
WITH monthly AS (
SELECT DATE_FORMAT(order_date, '%Y-%m') AS month, SUM(amount) AS revenue
FROM orders GROUP BY month
)
SELECT month, revenue
FROM (
SELECT month, revenue,
LAG(revenue, 1) OVER (ORDER BY month) AS prev1,
LAG(revenue, 2) OVER (ORDER BY month) AS prev2
FROM monthly
) t
WHERE revenue > prev1 AND prev1 > prev2;

题目 9:中位数

题意:计算员工薪资的中位数。

-- 方法 1:ROW_NUMBER + 总行数(适用于所有数据库)
WITH ranked AS (
SELECT salary,
ROW_NUMBER() OVER (ORDER BY salary) AS rn,
COUNT(*) OVER () AS total
FROM employees
)
SELECT AVG(salary) AS median_salary
FROM ranked
WHERE rn IN (FLOOR((total + 1) / 2), CEIL((total + 1) / 2));

-- 方法 2:PERCENTILE_CONT(PostgreSQL)
-- SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY salary) FROM employees;

解题要点

  • 奇数行:中间那个值
  • 偶数行:中间两个值的平均
  • FLOOR((n+1)/2)CEIL((n+1)/2) 恰好覆盖两种情况

题目 10:树形结构查询

题意:给定员工表(有 manager_id 字段),查询每个员工的完整管理链。

-- 递归 CTE
WITH RECURSIVE hierarchy AS (
-- 基础条件:顶级管理者(没有上级)
SELECT id, name, manager_id, name AS chain, 1 AS level
FROM employees WHERE manager_id IS NULL

UNION ALL

-- 递归:找到当前层的下级
SELECT e.id, e.name, e.manager_id,
CONCAT(h.chain, ' → ', e.name),
h.level + 1
FROM employees e
JOIN hierarchy h ON e.manager_id = h.id
)
SELECT * FROM hierarchy ORDER BY chain;

题目 11:最大连续子序列和

题意:表中存储每天的利润(可能为负),找出最大连续天数的利润总和。

-- Kadane 算法的 SQL 版本
WITH daily AS (
SELECT date, profit,
SUM(profit) OVER (ORDER BY date) AS cumsum,
MIN(SUM(profit) OVER (ORDER BY date)) OVER (
ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
) AS min_cumsum_before
FROM daily_profit
)
SELECT MAX(cumsum - COALESCE(min_cumsum_before, 0)) AS max_subarray_sum
FROM daily;

题目 12:相邻行比较

题意:找出温度比前一天高的日期。

-- 方法 1:LAG(推荐)
SELECT date, temperature
FROM (
SELECT date, temperature,
LAG(temperature) OVER (ORDER BY date) AS prev_temp
FROM weather
) t
WHERE temperature > prev_temp;

-- 方法 2:自连接
SELECT w1.date, w1.temperature
FROM weather w1
JOIN weather w2 ON w1.date = DATE_ADD(w2.date, INTERVAL 1 DAY)
WHERE w1.temperature > w2.temperature;

题目 13:互相关注的用户

题意:关注表 follows(follower_id, followee_id),找出互相关注的用户对。

-- 方法 1:自连接
SELECT a.follower_id AS user1, a.followee_id AS user2
FROM follows a
JOIN follows b ON a.follower_id = b.followee_id AND a.followee_id = b.follower_id
WHERE a.follower_id < a.followee_id; -- 去重:只取一对

-- 方法 2:EXISTS
SELECT DISTINCT follower_id AS user1, followee_id AS user2
FROM follows f1
WHERE EXISTS (
SELECT 1 FROM follows f2
WHERE f2.follower_id = f1.followee_id AND f2.followee_id = f1.follower_id
)
AND follower_id < followee_id;

题目 14:分组后取最新记录

题意:取每个用户最新的一条订单。

-- 方法 1:ROW_NUMBER(推荐)
SELECT user_id, order_id, amount, order_date
FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY order_date DESC) AS rn
FROM orders
) t
WHERE rn = 1;

-- 方法 2:相关子查询
SELECT * FROM orders o1
WHERE order_date = (
SELECT MAX(order_date) FROM orders o2 WHERE o2.user_id = o1.user_id
);
-- 注意:如果同一天有多条订单,方法 2 会返回多条

题目 15:留存率计算

题意:计算每天新注册用户的次日留存率。

WITH new_users AS (
SELECT user_id, MIN(DATE(login_time)) AS register_date
FROM logins GROUP BY user_id
),
day1_active AS (
SELECT n.register_date,
COUNT(DISTINCT n.user_id) AS new_count,
COUNT(DISTINCT l.user_id) AS retained_count
FROM new_users n
LEFT JOIN logins l ON n.user_id = l.user_id
AND DATE(l.login_time) = DATE_ADD(n.register_date, INTERVAL 1 DAY)
GROUP BY n.register_date
)
SELECT
register_date,
new_count AS '新增用户',
retained_count AS '次日活跃',
ROUND(retained_count / NULLIF(new_count, 0) * 100, 2) AS '次日留存率%'
FROM day1_active
ORDER BY register_date;

解题模式总结

题型核心技巧关键函数
排名/TopN窗口函数分区排序ROW_NUMBER/DENSE_RANK + PARTITION BY
连续/间断差值分组法ROW_NUMBER + DATE_SUB
同比环比偏移取值LAG/LEAD
行转列条件聚合CASE WHEN + MAX/SUM
累计计算窗口聚合SUM() OVER (ORDER BY ...)
不存在/反向查找LEFT JOIN + NULL / NOT EXISTSLEFT JOIN ... WHERE IS NULL
树形/层次递归 CTEWITH RECURSIVE
分组取最值窗口函数 + 过滤ROW_NUMBER() OVER (PARTITION BY ...) = 1

相关链接