1 大資料跑數shell學習

#!/bin/sh echo "清空同名檔案" rm -rf bdp_venv.zip echo "獲取環境、指令碼、資料等檔案" hdfs dfs -get "/user/0/upload/env.zip" hdfs dfs -get "/user/0/upload/a.csv" hdfs dfs -get "/user/0/upload/speed.py" echo "開始解壓,不列印日誌" unzip env.zip > /dev/null echo "當前路徑" cur_dir=`pwd` echo "hive取數檔案存放位置" t_path="env/data/t.csv" s_path="env/data/s.csv" echo "hivesql" sql_s=" select d_code, id,start_time, from_unixtime(cast(start_time as int), 'yyyy/mm/dd hh:mm:ss') s_time, end_time, from_unixtime(cast(end_time as int), 'yyyy/mm/dd hh:mm:ss') e_time, inc_day from dm.data p_test where inc_day between '20210101' and '20210131' and d_code like '5%' order by start_time "echo "hive建表推資料，適用於資料量超大時，hive按行讀取慢" echo "執行sql語句取數" hive --incremental=true --outputformat=tsv2 -e "$sql_s" >$s_path echo "getmerge的形式取出資料" hdfs dfs -getmerge hdfs://**bd/user/hive/warehouse/d.db/exp_new/c_code='0' $t_path echo "插入第一行表頭" sed -i 1i\un\\id\\tspeed $t_path echo "開始跑數" echo "設定環境變數" export conda_home=$/env export path=$conda_home/bin:$path echo "開始執行python" chmod 755 env/bin/python3.7 env/bin/python3.7 s.py echo "結束執行python" echo "結果推送至伺服器" host='10.118.48.128' user='test' passwd='******' cd env/data/output echo "要推送的檔案" file='res.csv' echo "伺服器目標檔案路徑，建立以日期命名的資料夾" dest=/000/bg_data/`date +%m%d` ftp -inv $host quote pass $passwd binary mkdir $dest cd $dest mdelete $file mput $file quit end_script

exit 0

-- 這個屬性預設值是strict,就是要求分割槽字段必須有乙個是靜態的分割槽值，當前設定為nonstrict,那麼可以全部動態分割槽 set hive.exec.dynamic.partition.mode=nonstrict; -- 是開啟動態分割槽 set hive.optimize.sort.dynamic.partition=true; -- 被一條帶有動態分割槽的sql語句所能建立的動態分割槽總量，如果超出限制會報錯 set hive.exec.max.dynamic.partitions=1000; set hive.exec.max.dynamic.partitions.pernode=1000; -- 覆蓋現有資料插入分割槽 insert overwrite table exp partition (c_code='0') select id,speed from dm.new

where inc_day between 20201222 and 20210224 and c_code='0'

1 大資料跑數shell學習

大數運算 1 大數儲存

1 大資料概述

1 大資料概述

1 大資料跑數shell學習

大數運算 1 大數儲存

1 大資料概述

1 大資料概述

相關推薦