From 44df96ee2f734384a89bc4021a8812ce5d1e4273 Mon Sep 17 00:00:00 2001 From: kevin Date: Wed, 31 Jan 2024 23:20:09 +0800 Subject: [PATCH] Signed-off-by: kevin --- reptile.py | 52 ++++------------------------------------------------ 1 file changed, 4 insertions(+), 48 deletions(-) diff --git a/reptile.py b/reptile.py index 93b3fea..0716b88 100644 --- a/reptile.py +++ b/reptile.py @@ -12,7 +12,7 @@ import re import jieba import html import json -from myfunsion import myfunsion + #from bs4 import BeautifulSoup as bs #from lxml import etree @@ -323,52 +323,8 @@ def getoneurl(): -test=1 - -if test==0: - while 1: - theurl=getoneurl() - if theurl!='-1': - getandpost(theurl) - else : - print("获取url失败,自动重试") - -if test==1: - theurl="https://lmve.net" - getandpost(theurl) - -if test==2: - theurl=getoneurl() - print(theurl) - getandpost(theurl) - -if test==3: - headers = { - 'user-agent': 'my-app/0.0.1', - 'Content-Type': 'text/html; charset=utf-8' - } - - #print(begin_url) - #获取整个页面 - url1="https://git.lmve.net/kevin/um-all-index-web/-/avatar" - url2="https://yyyyyyounger.com/" - try : - r = requests.get(url2,headers=headers,verify=False) - htmlx_byt=r.content - print(r.headers['Content-Type']) - except: - print("11") -if test==4: - #js=get_page_words("https://lmve.net") - #print(js) - myfunsion.a() - - - - - - - - +theurl="https://lmve.net" +redata=get_page_words(theurl) +print(redata['words'])