# _*_coding:utf-8_*_
# author: 稻田工作者
# date: 2020-06-13
"""根據原始樣本對應的突變資訊從資料分析檔案中提取檢出結果,如:
原始樣本lc-br3對應的突變資訊如下:
nm_000245.2:exon14_intron14:c.3028_3028+16del17:p.?
nm_005228.3:exon20:c.2290_2291ins12:p.a763_y764insfqea
nm_000245.2:intron13:c.2888-41_2888-2deltagccgtctttaacaagctctttctttctctctgttttaa:p.?
即:需要從分析檔案中提取該突變資訊對應的sample_name,depth,frequency,cds_change,var_ss對應的檔案內index(0,9,10,14,28)
"""import xlrd
import csv
import pandas as pd
# 根據樣本對應突變list生成樣本和突變的字典
def generate_sample_dict(work_path,mutation_dict):
list_file = open(r"%s\list.txt"%work_path,"r")
for line in list_file:
s_key = line.strip().split("\t")[0]
s_value = line.strip().split("\t")[1]
if s_value is not '':
if s_key in mutation_dict:
else:
return mutation_dict
def extract_info(work_path,data_xlxs_name,mutation_dict):
data_xlxs = xlrd.open_workbook(r"%s\%s.xlsx"%(work_path,data_xlxs_name),"r")
data_sheet = data_xlxs.sheet_by_name("snvindelhotspot")
n_rows = data_sheet.nrows
# 遍歷源資料檔案和樣本list資訊提取資料
with open(r"%s\%s_filter_data.csv"%(work_path,data_xlxs_name),"w",newline="")as outfile:
header = ["#sample_name","depth","frequency","cds_change","var_ss"]
write_csv = csv.dictwriter(outfile,fieldnames=header)
write_csv.writeheader()
for n in range(1,n_rows):
if data_sheet.cell(n,14).value in mutation_dict and mutation_dict.values(data_sheet.cell(n,14).value) in data_sheet.cell(n,0).value:
for k,v in mutation_dict.items():
if k in data_sheet.cell(n,0).value and data_sheet.cell(n,14).value in v:
write_csv.writerow()
def csv2xls(work_path,data_xlxs_name):
csv_file = pd.read_csv(r"%s\%s_filter_data.csv"%(work_path,data_xlxs_name),encoding="utf-8",index_col=0,engine='python')
csv_file.to_excel(r"%s\%s_filter_data.xlsx"%(work_path,data_xlxs_name),sheet_name="filter_data")
if __name__ == '__main__':
mutation_dict = {}
work_path = input("輸入list及源分析檔案路徑: ")
generate_sample_dict(work_path,mutation_dict)
data_xlxs_name = input("輸入需要提取資料的原始檔名稱:")
extract_info(work_path,data_xlxs_name,mutation_dict)
csv2xls(work_path, data_xlxs_name)
java 根據條件從List中篩選出符合條件的集合
1 list 你要在裡面篩選的物件集合 存放格式例如 list.add user1 list.add user2 list.add user3 2 tablecolumnname user 裡面的屬性字段 例如 name 3 agers name欄位的屬性值,也就是你要篩選出 name 的名稱為 張...
python 中從list表中找出質數
很抱歉之前上傳了乙個錯誤的方法,後來自己檢查發現是錯的。以下是我花了半個小時寫的,真的對不起之前的那些看了我的博文的同行,def zhishu list for i in list 遍歷列表 for j in range 2,int i 2 這裡是判斷這個數能不能給除了1和本身以外的數整除,剛開始的...
List中根據class實體的某個屬性去重或排序
去重 根據alarmentity.class的乙個屬性sourceproblem去重,並保留最新 alarms是按照時間順序儲存的 private static listremoveduplicatealarms listalarms 為了保留最新,所以list需要使用reverse從後往前讀取 c...