import pandas as pd
# 讀取**資料集,檢視一月的資料
weight_loss = pd.read_csv('data/weight_loss.csv')
weight_loss.query('month == "jan"')
name
month
week
weight
0bob
janweek 1
2911
amyjan
week 1
1972
bobjan
week 2
2883
amyjan
week 2
1894
bobjan
week 3
2835
amyjan
week 3
1896
bobjan
week 4
2837
amyjan
week 4
190
# 定義乙個求**比例的函式
def find_perc_loss(s):
return (s - s.iloc[0]) / s.iloc[0]
# 檢視bob在一月的**成果
bob_jan = weight_loss.query('name=="bob" and month=="jan"')
find_perc_loss(bob_jan['weight'])
'''0 0.000000
2 -0.010309
4 -0.027491
6 -0.027491
name: weight, dtype: float64
'''
# 對name和month進行分組,然後使用transform方法,傳入函式,對數值進行轉換
pcnt_loss = weight_loss.groupby(['name', 'month'])['weight'].transform(find_perc_loss)
pcnt_loss.head(8)
0 0.000000
1 0.000000
2 -0.010309
3 -0.040609
4 -0.027491
5 -0.040609
6 -0.027491
7 -0.035533
name: weight, dtype: float64
# transform之後的結果,行數不變,可以賦值給原始dataframe作為乙個新列;
# 為了縮短輸出,只選擇bob的前兩個月資料
weight_loss['perc weight loss'] = pcnt_loss.round(3)
weight_loss.query('name=="bob" and month in ["jan", "feb"]')
name
month
week
weight
perc weight loss
0bob
janweek 1
2910.000
2bob
janweek 2
288-0.010
4bob
janweek 3
283-0.027
6bob
janweek 4
283-0.027
8bob
febweek 1
2830.000
10bob
febweek 2
275-0.028
12bob
febweek 3
268-0.053
14bob
febweek 4
268-0.053
# 因為最重要的是每個月的第4周,只選擇第4周的資料
week4 = weight_loss.query('week == "week 4"')
week4
name
month
week
weight
perc weight loss
6bob
janweek 4
283-0.027
7amy
janweek 4
190-0.036
14bob
febweek 4
268-0.053
15amy
febweek 4
173-0.089
22bob
marweek 4
261-0.026
23amy
marweek 4
170-0.017
30bob
aprweek 4
250-0.042
31amy
aprweek 4
161-0.053
# 用pivot重構dataframe,讓amy和bob的資料併排放置
winner = week4.pivot(index='month', columns='name', values='perc weight loss')
winner
name
amybob
month
apr-0.053
-0.042
feb-0.089
-0.053
jan-0.036
-0.027
mar-0.017
-0.026
# 用where方法選出每月的贏家
winner['winner'] = np.where(winner['amy'] < winner['bob'], 'amy', 'bob')
winner.style.highlight_min(axis=1)
name
amybob
winner
month
apr-0.053000
-0.042000
amyfeb
-0.089000
-0.053000
amyjan
-0.036000
-0.027000
amymar
-0.017000
-0.026000
bob
# 用value_counts()返回最後的比分
winner.winner.value_counts()
'''amy 3
bob 1
name: winner, dtype: int64
'''
# pandas預設是按字母排序的
week4a = week4.copy()
month_chron = week4a['month'].unique()
month_chron
#array(['jan', 'feb', 'mar', 'apr'], dtype=object)
# 轉換為categorical變數,可以做成按時間排序
week4a['month'] = pd.categorical(week4a['month'], categories=month_chron,ordered=true)
week4a.pivot(index='month', columns='name', values='perc weight loss')
name
amybob
month
jan-0.036
-0.027
feb-0.089
-0.053
mar-0.017
-0.026
apr-0.053
-0.042
排除表和query查詢條件的expdp impdp
排除某些表的expdp impdp expdp system 123456 directory data pump dir dumpfile wf.dmp logfile wf.log schemas winwf exclude table in table name1 expdp system 1...
高階查詢query
子條查詢 特定字段查詢所有特定值 子條件查詢 query context filter context 兩種查詢方式 1 query context 在查詢過程中,除了判斷文件是否滿足查詢條件之外,es還會計算乙個 score來標識匹配的程度,旨在判斷目標文件和查詢條件匹配的有多好 吻合度 常用查詢...
SQLAlchemy的查詢操作Query
查詢操作 查詢子句使用session的.query 方法來獲取query查詢物件。查詢物件能夠使用一些方法來對應一些查詢子句,比如.order by limit filter 等。查詢物件有這麼幾種方法.one all scalar one or none get 以及.first 等。下面對這幾個...