importpandasaspdimportnumpyasnp
titanic_survival = pd.read_csv("titanic_train.csv")
# ---------------------------
統計age
列有多少值為空
-------------------------
age = titanic_survival["age"]
age_is_null = pd.isnull(age)
age_null_true = age[age_is_null]
age_null_count = len(age_null_true)
print(age_null_count)
#-------------------------
求均值方法一
----------------------------------------
good_ages = titanic_survival["age"][age_is_null ==false] #
把age
列中不為空的值賦值給
good_ages
correct_mean_age = sum(good_ages) / len(good_ages)
print(correct_mean_age)
#-------------------------
求均值方法二
----------------------------------------
correct_mean_age = titanic_survival["age"].mean() #mean
函式會自動取出
age列中為空的值,然後賦值給
correct_mean_age
print(correct_mean_age)
#-----------------------------
統計每種等級船艙平均票價
------------------------------
passenger_classes = [1, 2
, 3]
fares_by_class = {}
forthis_classinpassenger_classes:
pclass_rows = titanic_survival[titanic_survival["pclass"] == this_class] #
找出pclass為1
的所有行
pclass_fares = pclass_rows["fare"] #
找出pclass為1
的fare
列fare_for_class = pclass_fares.mean()
fares_by_class[this_class] = fare_for_class
print(fares_by_class)
# -----------------pivot_table
透視表函式,找出每種
pclass
所對應survived
的平均值
-------------------------------------
passenger_survival = titanic_survival.pivot_table(index="pclass"
, values="survived"
, aggfunc=np.mean)
print(passenger_survival)
passenger_age = titanic_survival.pivot_table(index="pclass"
, values="age"
, aggfunc=np.mean)
print(passenger_age)
port_stats = titanic_survival.pivot_table(index="embarked"
, values=["fare"
,"survived"],
aggfunc=np.sum)
print(port_stats)
pandas 資料透視表
import numpy as np import pandas as pd df pd.dataframe dfout 38 a b c d e 0 foo one small 1 2 1 foo one large 2 4 2 foo one large 2 5 3 foo two small ...
pandas 交叉表與透視表
利用pivot table函式可以實現透視表,pivot table 函式的常用引數及其使用格式如下。pands.pivot table data,values none,index none,columns none,aggfunc mean fill value none,margins fal...
pandas資料預處理 缺失值
缺失值的分類 按照資料缺失機制可分為 不可忽略的缺失 non ignorable missing nim 或非隨機缺失 not missing at random,nmar,or,missing not at random,mnar 如果不完全變數中資料的缺失既依賴於完全變數又依賴於不完全變數本身,...