代写Python getHTMLText 、代做BeautifulSoup python-代写Python编程

联系方式

QQ：99515681
邮箱：99515681@qq.com
工作时间：8:00-21:00
微信：codinghelp

您当前位置：首页 >> Python编程Python编程

代写Python getHTMLText 、代做BeautifulSoup python

日期：2018-05-23 01:58

# -*- coding: utf-8 -*-

import requests

from bs4 import BeautifulSoup

import jieba

def getHTMLText(url):

try:

r = requests.get(url, timeout = 30)

r.raise_for_status()

#r.encoding = 'utf-8'

return r.text

except:

return ""

def getContent(url):

html = getHTMLText(url)

# print(html)

soup = BeautifulSoup(html, "html.parser")

title = soup.select("div.hd > h1")

print(title[0].get_text())

time = soup.select("div.a_Info > span.a_time")

print(time[0].string)

author = soup.select("div.qq_articleFt > div.qq_toolWrap > div.qq_editor")

print(author[0].get_text())

paras = soup.select("div.Cnt-Main-Article-QQ > p.text")

for para in paras:

if len(para) > 0:

print(para.get_text())

print()

fo = open("news_text.txt", "w+",encoding='GBK')

fo.writelines(title[0].get_text() + "\n")

fo.writelines(time[0].get_text() + "\n")

for para in paras:

if len(para) > 0:

fo.writelines(para.get_text() + "\n\n")

fo.writelines(author[0].get_text() + '\n')

fo.close()

def fenci():

book = "./news_text.txt"

txt = open(book, "r", encoding='GBK').read()

ls = []

words = jieba.lcut(txt)

counts = {}

for word in words:

ls.append(word)

if len(word) == 1:

continue

else:

counts[word] = counts.get(word, 0) + 1

items = list(counts.items())

items.sort(key=lambda x: x[1], reverse=True)

lk = open('result.txt', 'w+',encoding='GBK')

for i in range(15):

word, count = items[i]

lk.writelines(str(word) + ':' + str(count) + "\n")

print("{:<10}{:>5}".format(word, count))

lk.close()

def main():

url = "https://news.qq.com/a/20180515/023424.htm"

getContent(url)

fenci()

main()

【返回顶部】【打印本稿】【关闭本页】

【上一篇】：代写python pandas 国外、代做pandas python程序

【下一篇】：代写python pandas 国外、代做pandas python程序

联系方式

最新辅导

热门辅导

您当前位置：首页 >> Python编程Python编程

代写Python getHTMLText 、代做BeautifulSoup python

日期：2018-05-23 01:58

相关文章