联系方式

  • QQ:99515681
  • 邮箱:99515681@qq.com
  • 工作时间:8:00-21:00
  • 微信:codinghelp

您当前位置:首页 >> Python编程Python编程

日期:2018-05-23 01:58

# -*- coding: utf-8 -*-

import requests

from bs4 import BeautifulSoup

import jieba

def getHTMLText(url):

   try:

       r = requests.get(url, timeout = 30)

       r.raise_for_status()

       #r.encoding = 'utf-8'

       return r.text

   except:

       return ""

def getContent(url):

   html = getHTMLText(url)

   # print(html)

   soup = BeautifulSoup(html, "html.parser")

   title = soup.select("div.hd > h1")

   print(title[0].get_text())

   time = soup.select("div.a_Info > span.a_time")

   print(time[0].string)

   author = soup.select("div.qq_articleFt > div.qq_toolWrap > div.qq_editor")

   print(author[0].get_text())

   paras = soup.select("div.Cnt-Main-Article-QQ > p.text")

   for para in paras:

       if len(para) > 0:

           print(para.get_text())

           print()

   fo = open("news_text.txt", "w+",encoding='GBK')

   fo.writelines(title[0].get_text() + "\n")

   fo.writelines(time[0].get_text() + "\n")

   for para in paras:

       if len(para) > 0:

           fo.writelines(para.get_text() + "\n\n")

   fo.writelines(author[0].get_text() + '\n')

   fo.close()


def fenci():

   book = "./news_text.txt"

   txt = open(book, "r", encoding='GBK').read()

   ls = []

   words = jieba.lcut(txt)

   counts = {}

   for word in words:

       ls.append(word)

       if len(word) == 1:

           continue

       else:

           counts[word] = counts.get(word, 0) + 1


   items = list(counts.items())

   items.sort(key=lambda x: x[1], reverse=True)


   lk = open('result.txt', 'w+',encoding='GBK')

   for i in range(15):

       word, count = items[i]

       lk.writelines(str(word) + ':' + str(count) + "\n")

       print("{:<10}{:>5}".format(word, count))

   lk.close()


def main():

   url = "https://news.qq.com/a/20180515/023424.htm"

   getContent(url)

   fenci()


main()


版权所有:编程辅导网 2021 All Rights Reserved 联系方式:QQ:99515681 微信:codinghelp 电子信箱:99515681@qq.com
免责声明:本站部分内容从网络整理而来,只供参考!如有版权问题可联系本站删除。 站长地图

python代写
微信客服:codinghelp