python操作hbase 基於thrift服務

2021-08-25 19:26:41 字數 4537 閱讀 5726

特別注意

thrift 、thrift2,新版本的hbase,預設使用thrift2,而thrift2相比thrift,去掉了很多對hbase的命令支援。如果你要換用thrift,只要停止thrift2 服務,啟動thrift服務即可

啟動、停止命令:

/hbase/bin/hbase-daemon.sh stop thrift2

/hbase/bin/hbase-daemon.sh start thrift

# -*- coding:utf-8 -*-

from collections import iterable

import sys, os

hbase_host = '127.0.0.1'

hbase_port = 9090

class

hbase

(object):

def__init__

(self, _tablename, _families=none):

self.connection.open()

self.tablename = _tablename

self.create_table()

defcreate_table

(self):

""" 如果此表不存在,則建立

:param _tablename: 表名

:return:

"""_tablelist = self.connection.tables()

families =

if self.tablename not

in _tablelist:

self.connection.create_table(self.tablename, families)

defput(self, _rowkey, _fields):

""" insert/update

:param _rowkey: string

:return:

"""print _rowkey, _fields

fields = {}

for k, v in _fields.items():

fields['%s:%s' % (self.tablename, k)] = v

table = self.connection.table(self.tablename)

print _rowkey,fields

table.put(_rowkey, fields)

defscan

(self, _rowkey=none, _filters=none):

""" 多列、and、精確/模糊匹配 查詢

:param _rowkey: string ^138$

:return:

"""table = self.connection.table(self.tablename)

filters = none

if _rowkey:

filters = "rowfilter(=,'regexstring:%s')" % _rowkey

if _filters:

for k, v in _filters.items():

ifnot filters:

filters = "singlecolumnvaluefilter('%s','%s',=,'regexstring:^%s$') " % (self.tablename, k, v)

else:

fl = " and singlecolumnvaluefilter('%s','%s',=,'regexstring:^%s$')" % (self.tablename, k, v)

filters += fl

rows = table.scan(filter=filters)

return self.dumps(rows)

defcount

(self, _field):

""" count統計行數

:param _field: 根據某列統計

:return: int

"""field = '%s:%s' % (self.tablename, _field)

table = self.connection.table(self.tablename)

count = 0

for key, data in table.scan(columns=[field]):

count += 1

return count

defrow(self, _rowkey, _columns=none):

""" 根據rowkey,獲取某行資料

:param _rowkey: string '123'

:param _columns: list ['name', 'domain']

:return: dict

"""columns =

if _columns:

columns = ['%s:%s' % (self.tablename, i) for i in _columns]

table = self.connection.table(self.tablename)

row = table.row(_rowkey, columns=columns)

rtn = self.dumps(row)

rtn['_id'] = _rowkey

return rtn

defrows

(self, _rowkey):

""" 根據rowkeys,獲取某幾行資料

:param _rowkey: list ['123', '234']

:return: list

"""table = self.connection.table(self.tablename)

rows = table.rows(_rowkey)

return self.dumps(rows)

defdumps

(self, _data):

""" 將資料轉換為dict,或list

:param _data: list or dict or generator

:return: dict or list

"""rtn =

if isinstance(_data, dict):

rtn = {}

for k, v in _data.items():

rtn[k.split(':')[1]] = v

elif isinstance(_data, list):

for row in _data:

tmp = {}

tmp['_id'] = row[0]

for k, v in row[1].items():

tmp[k.split(':')[1]] = v

if tmp:

elif isinstance(_data, iterable):

for key, data in _data:

tmp = {}

tmp['_id'] = key

for k, v in data.items():

tmp[k.split(':')[1]] = v

if tmp:

else:

pass

return rtn

def__del__

(self):

self.connection.close()

if __name__ =='__main__':

print

'start----------------'

hbase = hbase('test1')

print

'insert data finshed'

print hbase.scan()

print

'scan data finshed'

print hbase.scan(_rowkey='1')

print

'scan data finshed'

print

'scan data finshed'

print

'scan data finshed'

print hbase.row('111')

print

'row data finshed'

print hbase.row('111', ['name'])

print

'row data finshed'

print hbase.rows(['111'])

print

'rows data finshed'

print hbase.count('name')

print

'count data finshed'

print

'end-----------------------'

python操作 hbase 資料

python使用的包 thrift 個人使用的python 編譯器是pycharm community edition.在工程中設定中,找到project interpreter,在相應的工程下,找到package,然後選擇 新增,搜尋 hbase thrift python client for ...

用Python操作HBase之happybase

安裝thrift的具體操作,鏈結 port 埠 timeout 超時時間 autoconnect 連線是否直接開啟 table prefix 用於構造表名的字首 table prefix separator 用於table prefix的分隔符 compat 相容模式 transport 運輸模式 ...

Hbase 二 Hbase常用操作

常用shell命令 hbase shell命令 描述alter 修改列族 column family 模式 count 統計表中行的數量 create 建立表describe 顯示表相關的詳細資訊 delete 刪除指定物件的值 可以為表,行,列對應的值,另外也可以指定時間戳的值 deleteall...