特別注意thrift 、thrift2,新版本的hbase,預設使用thrift2,而thrift2相比thrift,去掉了很多對hbase的命令支援。如果你要換用thrift,只要停止thrift2 服務,啟動thrift服務即可
啟動、停止命令:
/hbase/bin/hbase-daemon.sh stop thrift2
/hbase/bin/hbase-daemon.sh start thrift
# -*- coding:utf-8 -*-
from collections import iterable
import sys, os
hbase_host = '127.0.0.1'
hbase_port = 9090
class
hbase
(object):
def__init__
(self, _tablename, _families=none):
self.connection.open()
self.tablename = _tablename
self.create_table()
defcreate_table
(self):
""" 如果此表不存在,則建立
:param _tablename: 表名
:return:
"""_tablelist = self.connection.tables()
families =
if self.tablename not
in _tablelist:
self.connection.create_table(self.tablename, families)
defput(self, _rowkey, _fields):
""" insert/update
:param _rowkey: string
:return:
"""print _rowkey, _fields
fields = {}
for k, v in _fields.items():
fields['%s:%s' % (self.tablename, k)] = v
table = self.connection.table(self.tablename)
print _rowkey,fields
table.put(_rowkey, fields)
defscan
(self, _rowkey=none, _filters=none):
""" 多列、and、精確/模糊匹配 查詢
:param _rowkey: string ^138$
:return:
"""table = self.connection.table(self.tablename)
filters = none
if _rowkey:
filters = "rowfilter(=,'regexstring:%s')" % _rowkey
if _filters:
for k, v in _filters.items():
ifnot filters:
filters = "singlecolumnvaluefilter('%s','%s',=,'regexstring:^%s$') " % (self.tablename, k, v)
else:
fl = " and singlecolumnvaluefilter('%s','%s',=,'regexstring:^%s$')" % (self.tablename, k, v)
filters += fl
rows = table.scan(filter=filters)
return self.dumps(rows)
defcount
(self, _field):
""" count統計行數
:param _field: 根據某列統計
:return: int
"""field = '%s:%s' % (self.tablename, _field)
table = self.connection.table(self.tablename)
count = 0
for key, data in table.scan(columns=[field]):
count += 1
return count
defrow(self, _rowkey, _columns=none):
""" 根據rowkey,獲取某行資料
:param _rowkey: string '123'
:param _columns: list ['name', 'domain']
:return: dict
"""columns =
if _columns:
columns = ['%s:%s' % (self.tablename, i) for i in _columns]
table = self.connection.table(self.tablename)
row = table.row(_rowkey, columns=columns)
rtn = self.dumps(row)
rtn['_id'] = _rowkey
return rtn
defrows
(self, _rowkey):
""" 根據rowkeys,獲取某幾行資料
:param _rowkey: list ['123', '234']
:return: list
"""table = self.connection.table(self.tablename)
rows = table.rows(_rowkey)
return self.dumps(rows)
defdumps
(self, _data):
""" 將資料轉換為dict,或list
:param _data: list or dict or generator
:return: dict or list
"""rtn =
if isinstance(_data, dict):
rtn = {}
for k, v in _data.items():
rtn[k.split(':')[1]] = v
elif isinstance(_data, list):
for row in _data:
tmp = {}
tmp['_id'] = row[0]
for k, v in row[1].items():
tmp[k.split(':')[1]] = v
if tmp:
elif isinstance(_data, iterable):
for key, data in _data:
tmp = {}
tmp['_id'] = key
for k, v in data.items():
tmp[k.split(':')[1]] = v
if tmp:
else:
pass
return rtn
def__del__
(self):
self.connection.close()
if __name__ =='__main__':
print
'start----------------'
hbase = hbase('test1')
print
'insert data finshed'
print hbase.scan()
print
'scan data finshed'
print hbase.scan(_rowkey='1')
print
'scan data finshed'
print
'scan data finshed'
print
'scan data finshed'
print hbase.row('111')
print
'row data finshed'
print hbase.row('111', ['name'])
print
'row data finshed'
print hbase.rows(['111'])
print
'rows data finshed'
print hbase.count('name')
print
'count data finshed'
print
'end-----------------------'
python操作 hbase 資料
python使用的包 thrift 個人使用的python 編譯器是pycharm community edition.在工程中設定中,找到project interpreter,在相應的工程下,找到package,然後選擇 新增,搜尋 hbase thrift python client for ...
用Python操作HBase之happybase
安裝thrift的具體操作,鏈結 port 埠 timeout 超時時間 autoconnect 連線是否直接開啟 table prefix 用於構造表名的字首 table prefix separator 用於table prefix的分隔符 compat 相容模式 transport 運輸模式 ...
Hbase 二 Hbase常用操作
常用shell命令 hbase shell命令 描述alter 修改列族 column family 模式 count 統計表中行的數量 create 建立表describe 顯示表相關的詳細資訊 delete 刪除指定物件的值 可以為表,行,列對應的值,另外也可以指定時間戳的值 deleteall...