import pickle
from textrank4zh import textrank4keyword, textrank4sentence
import os
from snownlp import snownlp
import jieba
import jieba.analyse
from bosonnlp import bosonnlp
#def main():
# textrank2()
textranktest1()
## def textrank2():
# """"載入50萬資料"""
## path = "d:\\150w"
# file = open(path, 'rb')
# a = pickle.load(file)
# # print(a)
# # tr4s = textrank4sentence()
# ll =
# i = 0
# for item in a:
# print(item[0])
# print("***************=標題**********===")
# print(item[1])
# rowitem = item[2]
# s = snownlp(rowitem)
# print("***************====原文***************====")
# print(rowitem)
# # tr4s.analyze(text=rowitem, lower=true, source='all_filters')
# # print(rowitem)
# # print("\033[1;31m%s\033[43m" %rowitem)
# ceshi =
# nlp = bosonnlp('lsfw0zxs.17321.5fbmjszhbwev')
# print("***************===情感分析********************")
# print(nlp.sentiment(rowitem)) # 情感分析結果分別為 「非負面」 和 「負面」 概率組成的列表。
# print('***************===摘要********************=')
# tags_output = jieba.analyse.extract_tags(rowitem, topk=20, withweight=true)
# print(tags_output)
# print(s.summary(5))
# i = i + 1
# if i > 100:
# break
# # print(i)
## current_dir = os.path.abspath('.')
# file_name2 = os.path.join(current_dir, 'abstract.csv')
# f2 = open(file_name2, 'w+', encoding='utf8')
## for item in ll:
# f2.write("\n")
# f2.write("*****=測試(原文)====")
# f2.write("\n")
# f2.write(str(item[0]))
# f2.write("\n")
# f2.write("*****=摘要====")
# f2.write("\n")
# f2.write(str(item[1]))
# f2.write("\n")
## f2.close()
def textranktest1():
""""載入50萬資料"""
path = "d:\\100w"
file = open(path, 'rb')
a = pickle.load(file)
# print(a)
tr4s = textrank4sentence()
ll =
i = 0
for item in a:
print(item[0])
rowitem = item[2]
tr4s.analyze(text=rowitem, lower=true, source='all_filters')
("***************====原文***************====")
# print(rowitem)
# print("\033[1;31m%s\033[43m" %rowitem)
ceshi =
print()
print('***************===摘要********************=')
for item in tr4s.get_key_sentences(num=3):
print(item.index, item.weight, item.sentence) # index是語句在文字中位置,weight是權重
i = i + 1
if i > 100:
break
print(i)
current_dir = os.path.abspath('.')
file_name2 = os.path.join(current_dir, 'abstract.csv')
f2 = open(file_name2, 'w+', encoding='utf8')
for item in ll:
f2.write("\033[0;31m%s\033[0m" % "*****=測試(原文)====")
f2.write("\n")
f2.write(str(item[0]))
f2.write("\n")
f2.write("\033[0;31m%s\033[0m" % "*****=摘要====")
f2.write("\n")
f2.write(str(item[1]))
f2.write("\n")
f2.close()
# print("\033[0;31m%s\033[0m" % "*****=測試====")
if __name__ == '__main__':
main()
基於NPOI開源框架寫的ExcelHelper
namespace exceltest datatable匯出到excel的memorystream 源datatable 表頭文字 public static memorystream export datatable dtsource,string strheadertext endregion...
基於wsgiref模組寫的web框架
基於wsgiref web框架 from wsgiref.server import make server from urls import urls from views import def run env,response param env 請求相關的所有資料型別 param respon...
基於NPOI開源框架寫的ExcelHelper
namespace exceltest datatable匯出到excel的memorystream 源datatable 表頭文字 public static memorystream export datatable dtsource,string strheadertext endregion...