내용

글번호 163
작성자 heojk
작성일 2016-10-26 16:16:42
제목 트위터 데이터 분석하기 쿼리문
내용 CREATE EXTERNAL TABLE tweets (line STRING) LOCATION '/user/flume/tweets'; SELECT * FROM tweets LIMIT 1; SELECT REGEXP_EXTRACT(line, "\"text\":.+,\"place\"", 0) FROM tweets LIMIT 1; SELECT SUBSTRING(REGEXP_EXTRACT(line, "\"text\":.+,\"place\"", 0), 9) FROM tweets LIMIT 1; SELECT REGEXP_REPLACE(SUBSTRING(REGEXP_EXTRACT(line, "\"text\":.+,\"place\"", 0), 9), "\",\"place\".*", "") FROM tweets LIMIT 10; CREATE TABLE tweets_text AS SELECT REGEXP_REPLACE(SUBSTRING(REGEXP_EXTRACT(line, "\"text\":.+,\"place\"", 0), 9), "\",\"place\".*", "") AS text FROM tweets LIMIT 10; SELECT word, count(1) AS cnt FROM (SELECT EXPLODE(SPLIT(REGEXP_REPLACE(text, "http.*", ""), '[ .,-:\"\'\t\n]')) AS word FROM tweets_text) w GROUP BY word HAVING LENGTH(word) > 2 ORDER BY cnt DESC LIMIT 100;