hive를 통한 hadoop 데이터 등록 클라우드/빅데이터/가상화

[ hive 접속 및 쿼리 실행 ]
$ beeline
Beeline version 1.2.1000.2.5.0.0-1245 by Apache Hive
beeline> !connect jdbc:hive2://localhost:10000
Connecting to jdbc:hive2://localhost:10000
Enter username for jdbc:hive2://localhost:10000: hdfs
Enter password for jdbc:hive2://localhost:10000: 
Connected to: Spark SQL (version 1.6.2)
Driver: Hive JDBC (version 1.2.1000.2.5.0.0-1245)
Transaction isolation: TRANSACTION_REPEATABLE_READ
0: jdbc:hive2://localhost:10000> show tables;

[ thrift 모듈 설치(1) ]
pip install hive_utils

[ thrift 클라이언트(2) ]
#!/usr/bin/env python
# -*-coding:utf-8-*-

import sys

from hive_service import ThriftHive
from hive_service.ttypes import HiveServerException
from thrift import Thrift
from thrift.transport import TSocket
from thrift.transport import TTransport
from thrift.protocol import TBinaryProtocol

try:
    transport = TSocket.TSocket('localhost', 10000)
    transport = TTransport.TBufferedTransport(transport)
    protocol = TBinaryProtocol.TBinaryProtocol(transport)

    client = ThriftHive.Client(protocol)
    transport.open()

    client.execute("SELECT * FROM table_name")
    while (1):
      row = client.fetchOne()
      if (row == None):
        break
      print row

    transport.close()

except Thrift.TException, tx:
    print '%s' % (tx.message)

[ hive 외부 명령어 실행 ]
$ hive -S -e 'SELECT * FROM tb_test LIMIT 1';

[ beeline 외부 명령을 통한 파일 등록 ]
- 명령어 실행 방법
  아래 명령을 실행하면 로그인 후 file.hql을 호출하여 test.txt의 내용을 등록한다.(터미널에서 확인 가능)
$ beeline -u "jdbc:hive2://localhost:10000/" -f file.hql HDFSDIR=/tmp
- file.hql 데이터
$ cat file.hql
LOAD DATA LOCAL INPATH '/home/hdfs/test/test.txt' INTO TABLE tb_test;

$ head test.txt => 테이블 컬럼 수와 동일해야 함
honggildong
leesunmee
jangjang
Jack kim

=============================================

hive-site.xml에 등록
하위 디렉토리 모든 파일 검색 가능하게 조치
set hive.input.dir.recursive=true;
set hive.mapred.supports.subdirectories=true;
set hive.supports.subdirectories=true;
set mapred.input.dir.recursive=true;

중요) 테이블, 디렉토리 대문자 구분이 안된다. ㅠㅠㅠ;;
create table Tweet (Update_Date string, Tweet_ID string, Screen_NM string, Text string, CREATE_DT_HMS string)
                   row format delimited fields terminated by ',' 
                   lines terminated by '\n'
                   stored as textfile location '/dataset/data/tweet/*';
------------------
CREATE EXTERNAL TABLE user (id int, name string);
LOAD DATA INPATH "/data/user/*/*" INTO TABLE users;
----------------------
CREATE TABLE mytable
(
num1 INT,
text1 STRING,
num2 INT,
text2 STRING
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ",";

LOAD DATA LOCAL INPATH '/data/data.csv'
OVERWRITE INTO TABLE mytable;    
-----------------

덧글

댓글 입력 영역