내용 |
CREATE EXTERNAL TABLE tweets (line STRING)
LOCATION '/user/flume/tweets';
SELECT * FROM tweets LIMIT 1;
SELECT REGEXP_EXTRACT(line, "\"text\":.+,\"place\"", 0)
FROM tweets LIMIT 1;
SELECT SUBSTRING(REGEXP_EXTRACT(line, "\"text\":.+,\"place\"", 0), 9)
FROM tweets LIMIT 1;
SELECT REGEXP_REPLACE(SUBSTRING(REGEXP_EXTRACT(line, "\"text\":.+,\"place\"", 0), 9), "\",\"place\".*", "")
FROM tweets LIMIT 10;
CREATE TABLE tweets_text AS
SELECT REGEXP_REPLACE(SUBSTRING(REGEXP_EXTRACT(line, "\"text\":.+,\"place\"", 0), 9), "\",\"place\".*", "") AS text
FROM tweets
LIMIT 10;
SELECT word, count(1) AS cnt
FROM (SELECT EXPLODE(SPLIT(REGEXP_REPLACE(text, "http.*", ""), '[ .,-:\"\'\t\n]')) AS word
FROM tweets_text) w
GROUP BY word
HAVING LENGTH(word) > 2
ORDER BY cnt DESC LIMIT 100;
|