Hive Query操作示例
记录Hive CLI中常见的Query操作示例, 备用!
1.SELECT … FROM Clauses
? SELECT name, salary FROM employees;
? SELECT e.name, e.salary FROM employees e;
? SELECT name, subordinates[0] FROM employees; //For array
? SELECT name, deductions["State Taxes"] FROM employees; //For map
? SELECT name, address.city FROM employees; //For struct
2.Specify Columns with Regular Expressions
? SELECT symbol, `price.*` FROM stocks;
?
? SELECT upper(name), salary, deductions["Federal Taxes"],
? round(salary * (1 - deductions["Federal Taxes"])) FROM employees;
?
? SELECT count(*), avg(salary) FROM employees;
? SELECT count(DISTINCT symbol) FROM stocks;
? SELECT count(DISTINCT ymd), count(DISTINCT volume) FROM stocks;
3.LIMIT Clause
? SELECT upper(name), salary, deductions["Federal Taxes"],
? round(salary * (1 - deductions["Federal Taxes"])) FROM employees LIMIT 2;
4.Column Aliases
? SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,
? round(salary * (1 - deductions["Federal Taxes"])) as salary_minus_fed_taxes
? FROM employees LIMIT 2;
5.Nested SELECT Statements
? FROM (
? SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,
? round(salary * (1 - deductions["Federal Taxes"])) as salary_minus_fed_taxes
? FROM employees
? ) e
? SELECT e.name, e.salary_minus_fed_taxes
? WHERE e.salary_minus_fed_taxes > 70000;
6.CASE … WHEN … THEN Statements
? SELECT name, salary,
??? CASE
??? WHEN salary < 50000.0 THEN 'low'
??? WHEN salary >= 50000.0 AND salary < 70000.0 THEN 'middle'
??? WHEN salary >= 70000.0 AND salary < 100000.0 THEN 'high'
??? ELSE 'very high'
? END AS bracket FROM employees;
7.WHERE Clauses
? SELECT * FROM employees WHERE country = 'US' AND state = 'CA';
?
? SELECT name, salary, deductions["Federal Taxes"],
? salary * (1 - deductions["Federal Taxes"])
? FROM employees
? WHERE round(salary * (1 - deductions["Federal Taxes"])) > 70000;
?
? SELECT e.* FROM
? (SELECT name, salary, deductions["Federal Taxes"] as ded,
? salary * (1 - deductions["Federal Taxes"]) as salary_minus_fed_taxes
? FROM employees) e
? WHERE round(e.salary_minus_fed_taxes) > 70000;
?
? SELECT name, salary, deductions['Federal Taxes'] FROM employees
? WHERE deductions['Federal Taxes'] > cast(0.2 AS FLOAT);
8.LIKE and RLIKE
? SELECT name, address.street FROM employees WHERE address.street LIKE '%Ave.';
? SELECT name, address.city FROM employees WHERE address.city LIKE 'O%';
? SELECT name, address.street FROM employees WHERE address.street LIKE '%Chi%';
?
? SELECT name, address.street FROM employees
? WHERE address.street RLIKE '.*(Chicago|Ontario).*';
?
? SELECT name, address FROM employees
? WHERE address.street LIKE '%Chicago%' OR address.street LIKE '%Ontario%';
9.GROUP BY Clauses
? SELECT year(ymd), avg(price_close) FROM stocks
? WHERE exchange = 'NASDAQ' AND symbol = 'AAPL'
? GROUP BY year(ymd);
10.HAVING Clauses
? SELECT year(ymd), avg(price_close) FROM stocks
? WHERE exchange = 'NASDAQ' AND symbol = 'AAPL'
? GROUP BY year(ymd)
? HAVING avg(price_close) > 50.0;
?
? //下面的嵌套查询效果等价于上面的SQL
? SELECT s2.year, s2.avg FROM
? (SELECT year(ymd) AS year, avg(price_close) AS avg FROM stocks
? WHERE exchange = 'NASDAQ' AND symbol = 'AAPL'
? GROUP BY year(ymd)) s2
? WHERE s2.avg > 50.0;
11.JOIN Statements
? (1) Inner Join
? SELECT a.ymd, a.price_close, b.price_close
? FROM stocks a JOIN stocks b ON a.ymd = b.ymd
? WHERE a.symbol = 'AAPL' AND b.symbol = 'IBM';
?
? SELECT s.ymd, s.symbol, s.price_close, d.dividend
? FROM stocks s JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol
? WHERE s.symbol = 'AAPL';
?
? SELECT a.ymd, a.price_close, b.price_close , c.price_close
? FROM stocks a JOIN stocks b ON a.ymd = b.ymd
? JOIN stocks c ON a.ymd = c.ymd
? WHERE a.symbol = 'AAPL' AND b.symbol = 'IBM' AND c.symbol = 'GE';
? (2) LEFT OUTER JOIN
? SELECT s.ymd, s.symbol, s.price_close, d.dividend
? FROM stocks s LEFT OUTER JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol
? WHERE s.symbol = 'AAPL';
? (3) RIGHT OUTER JOIN
? SELECT s.ymd, s.symbol, s.price_close, d.dividend
? FROM dividends d RIGHT OUTER JOIN stocks s ON d.ymd = s.ymd AND d.symbol = s.symbol
? WHERE s.symbol = 'AAPL';
? (4) FULL OUTER JOIN
? SELECT s.ymd, s.symbol, s.price_close, d.dividend
? FROM dividends d FULL OUTER JOIN stocks s ON d.ymd = s.ymd AND d.symbol = s.symbol
? WHERE s.symbol = 'AAPL';
? (5) LEFT SEMI-JOIN
? SELECT s.ymd, s.symbol, s.price_close
? FROM stocks s LEFT SEMI JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol;
? (6) Cartesian Product JOINs
? SELECTS * FROM stocks JOIN dividends;
12.ORDER BY and SORT BY
? SELECT s.ymd, s.symbol, s.price_close
? FROM stocks s
? ORDER BY s.ymd ASC, s.symbol DESC;
?
? SELECT s.ymd, s.symbol, s.price_close
? FROM stocks s
? SORT BY s.ymd ASC, s.symbol DESC;
13.DISTRIBUTE BY with SORT BY
? SELECT s.ymd, s.symbol, s.price_close
? FROM stocks s
? DISTRIBUTE BY s.symbol
? SORT BY s.symbol ASC, s.ymd ASC;
14.CLUSTER BY
? SELECT s.ymd, s.symbol, s.price_close
? FROM stocks s
? CLUSTER BY s.symbol;
15.UNION ALL
? SELECT log.ymd, log.level, log.message
? FROM (
??? SELECT l1.ymd, l1.level,
??? l1.message, 'Log1' AS source
??? FROM log1 l1
??? UNION ALL
??? SELECT l2.ymd, l2.level,
??? l2.message, 'Log2' AS source
??? FROM log1 l2
? ) log
? SORT BY log.ymd ASC;
? FROM (
??? FROM src SELECT src.key, src.value WHERE src.key < 100
??? UNION ALL
??? FROM src SELECT src.* WHERE src.key > 110
? ) unioninput
? INSERT OVERWRITE DIRECTORY '/tmp/union.out' SELECT unioninput.*