DataX資料同步

2021-09-11 07:01:51 字數 3973 閱讀 8160

wget 

tar -xvf datax.tar.gz

vi ~/.bash_profile

source ~/.bash_profile

在$datax_home/job目錄下,編寫同步指令碼:

], "username"

:"root",

"password"

:"123456",

"splitpk"

:"id"

}},

"writer":],

"username"

:"root",

"password"

:"123456"}}

}],

"setting":}

}}

指令碼配置說明:

1、確保已經安裝python3,如果沒有按如下命令安裝:

wget 

tar -xvjf python-3.6.5.tar.xz

yum -y install zlib zlib-devel openssl-devel

mkdir /usr/local/python3

./configure --prefix=/usr/local/python3 --with-ssl

make

&&make

install

ln -s /usr/local/python3/bin/python3 /usr/bin/python3

ln -s /usr/local/python3/bin/pip3 /usr/bin/pip3

驗證是否安裝成功:python3 -v

2、安裝pymysql和apscheduler

pip3 install pymysql

pip3 install apscheduler

3、定時指令碼

#!/usr/bin/python3

# -*- coding: utf-8 -*-

from apscheduler.schedulers.blocking import blockingscheduler

import os

import datetime

import pymysql

import threading

#獲取昨日日期

def getlastdate(

): return

(datetime.datetime.now(

) - datetime.timedelta(days = 1)).strftime(

'%y%m%d'

)# 同步執行緒

class syncerthread (threading.thread):

def __init__(self, threadid, name):

threading.thread.__init__(self)

self.threadid = threadid

self.name = name

def run(self):

ad_syncer(self.name)

# 資料同步

def ad_syncer(name)

: print(

'同步日期:'.format(getlastdate()))

table = name + getlastdate(

) db = pymysql.connect(

"10.10.4.202","root","123456","scene"

) cursor = db.cursor(

) cursor.execute(

"show tables like '" + table + "'"

) data = cursor.fetchone(

)if data != none:

print(

"開始同步".format(table))

os.system(

'python $datax_home/bin/datax.py --jvm="-xms2g -xmx2g" -p "-dtable=' + str(table) + '" $datax_home/job/' + name + '.json'

) print(

"同步完成".format(table))

else:

print(

'不存在'.format(table))

db.close(

)# 啟動

def start_syncer():

# 建立執行緒

syncer1 = syncerthread(1, "tb_scene_site_request_event_log"

)# a計畫請求日誌

# 啟動執行緒

syncer1.start(

) syncer1.join(

) print (

'同步結束'

)if __name__ ==

'__main__'

: scheduler = blockingscheduler(

) scheduler.add_job(start_syncer, 'cron', hour=

'2')

print(

'press ctrl+ to exit'.format(

'break'

if os.name ==

'nt'

else

'c'))

try:

scheduler.start(

) except (keyboardinterrupt, systemexit):

scheduler.shutdown(

)

4、相關命令

1)啟動

nohup python3 scene_site_syncer.py &
2)停止

#!/usr/bin/python

#coding=utf-8

import sys,os

def kill_crawler(id):

cmd =

'ps -ef | grep python'

f = os.popen(cmd)

txt = f.readlines(

)for line in txt:

colum = line.split(

) pid = colum[1]

name = colum[-1]

if name.startswith(

'scene_site_syncer'

): task_id = name[9:-3]

if task_id ==

id or id

=='0'

: cmd =

"kill -9 %d" % int(pid)

rc = os.system(cmd)

if rc == 0 :

print "stop \"%s\" success!!" % name

else:

print "stop \"%s\" failed!!" % name

if __name__ ==

'__main__'

:if not len(sys.ar**)

==2:

print u'輸入要結束的任務編號,0代表停止所有'

sys.exit()id

= sys.ar**[1]

kill_crawler(id)

同步資料庫神器DataX

datax同步資料步驟很簡單,其他不少人,寫的華麗呼哨,自己總結一篇。試了window 估計linux 一樣,步驟如下 安裝datax要求 python 2或者3 jdk 解壓出來是這樣 然後開啟 開啟 datax bin資料夾,再這個目錄下面cmd執行指令碼命令 中文亂碼 處理 回車就不是中文亂碼...

DataX的使用 大資料同步技術

準備工作 2.datax的安裝部署 3.datax的使用python版本要求 2.7.x,datax未更新至python3win10下python 2.7與python 3.6雙環境安裝 教程 設計json文件 sqlserver to mysql errorlimit content writer...

DataX增量同步到ADB ADS時報錯

2020 07 24 11 23 12.145 0 0 0 writer warn commonrdbmswriter task 回滾此次寫入,採用每次寫入一行方式提交.因為 9001,2020072411231201011105516903453695186 unsupport packet 03...