Signed-off-by: fong <wuwenfengmi@outlook.com>
This commit is contained in:
fong
2024-04-29 10:28:06 +08:00
parent 8666d52f90
commit f6d0da4ff1
5 changed files with 684 additions and 4 deletions
+30
View File
@@ -0,0 +1,30 @@
<?php
if($_POST['password']!="(*&RV^*(&VRH*(V)))"){die("ERROR -1");}
define ('IN_LM',true);
include "./sql.php";
include "./config.php";
$db1=new mysqldb;
$db1->conetdb(MYSQL_USERNAME,MYSQL_PASSWORD,MYSQL_DBNAME);
$nowtime=date('Y-m-d H:i:s');
$re=$db1->read_data_on_tab("lmve_newurls",array("flag"=>"1"),array("lastupdata"=>"ASC","score"=>"DESC"),"0,1");
if(empty($re))
{
$re=$db1->read_data_on_tab("lmve_newurls",array("flag"=>"0"),array("lastupdata"=>"ASC","score"=>"DESC"),"0,1");
$db1->updata_on_tab("lmve_newurls",array("id"=>$re[0]['id']),array("flag"=>"1","lastupdata"=>$nowtime));
print_r($re[0]['url']);
}else
{
if(strtotime($nowtime)-strtotime($re[0]['lastupdata'])>600)//如果这个url10分钟还没更新
{
$db1->updata_on_tab("lmve_newurls",array("id"=>$re[0]['id']),array("lastupdata"=>$nowtime));
print_r($re[0]['url']);
}else
{
$re=$db1->read_data_on_tab("lmve_newurls",array("flag"=>"0"),array("lastupdata"=>"ASC","score"=>"DESC"),"0,1");
$db1->updata_on_tab("lmve_newurls",array("id"=>$re[0]['id']),array("flag"=>"1","lastupdata"=>$nowtime));
print_r($re[0]['url']);
}
}
?>
+339
View File
@@ -0,0 +1,339 @@
-- phpMyAdmin SQL Dump
-- version 5.2.0
-- https://www.phpmyadmin.net/
--
-- 主机: localhost
-- 生成日期: 2022-09-11 16:46:13
-- 服务器版本: 5.6.50-log
-- PHP 版本: 7.4.28
SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
START TRANSACTION;
SET time_zone = "+00:00";
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!40101 SET NAMES utf8mb4 */;
--
-- 数据库: `lmve_net`
--
-- --------------------------------------------------------
--
-- 表的结构 `lmve_domainlinksents`
--
CREATE TABLE `lmve_domainlinksents` (
`id` int(11) NOT NULL,
`domainid` int(11) NOT NULL,
`sentid` int(11) NOT NULL,
`time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- --------------------------------------------------------
--
-- 表的结构 `lmve_domains`
--
CREATE TABLE `lmve_domains` (
`id` int(11) NOT NULL,
`domain` varchar(64) NOT NULL,
`score` int(8) NOT NULL DEFAULT '0',
`time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- --------------------------------------------------------
--
-- 表的结构 `lmve_imgs`
--
CREATE TABLE `lmve_imgs` (
`id` int(11) NOT NULL,
`imgurl` varchar(256) NOT NULL,
`time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- --------------------------------------------------------
--
-- 表的结构 `lmve_imgslinkurls`
--
CREATE TABLE `lmve_imgslinkurls` (
`id` int(11) NOT NULL,
`imgurlid` int(11) NOT NULL,
`urlid` int(11) NOT NULL,
`time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- --------------------------------------------------------
--
-- 表的结构 `lmve_newurls`
--
CREATE TABLE `lmve_newurls` (
`id` int(11) NOT NULL,
`domainid` int(11) NOT NULL,
`url` varchar(256) NOT NULL,
`score` int(4) NOT NULL DEFAULT '100',
`flag` int(1) DEFAULT '0',
`lastupdata` datetime NOT NULL DEFAULT '1998-09-12 00:00:00',
`time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- --------------------------------------------------------
--
-- 表的结构 `lmve_sents`
--
CREATE TABLE `lmve_sents` (
`id` int(11) NOT NULL,
`sent` varchar(256) NOT NULL,
`creatdata` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- --------------------------------------------------------
--
-- 表的结构 `lmve_sentslinkurls`
--
CREATE TABLE `lmve_sentslinkurls` (
`id` int(11) NOT NULL,
`sentid` int(11) NOT NULL,
`urlid` int(11) NOT NULL,
`time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- --------------------------------------------------------
--
-- 表的结构 `lmve_snapshot`
--
CREATE TABLE `lmve_snapshot` (
`id` int(11) NOT NULL,
`urlid` int(11) NOT NULL,
`url` varchar(256) NOT NULL,
`snapshot` text NOT NULL,
`time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- --------------------------------------------------------
--
-- 表的结构 `lmve_urls`
--
CREATE TABLE `lmve_urls` (
`id` int(8) NOT NULL COMMENT '序号',
`domainid` int(11) NOT NULL,
`url` varchar(256) NOT NULL COMMENT '链接',
`title` varchar(64) DEFAULT NULL COMMENT '标题',
`mate` varchar(512) DEFAULT NULL COMMENT '标记',
`score` int(4) NOT NULL DEFAULT '100' COMMENT '分值',
`laststatus` int(4) DEFAULT NULL,
`contenttype` varchar(32) DEFAULT NULL,
`lastupdata` datetime NOT NULL DEFAULT '1998-09-12 00:00:00' COMMENT '最后更新时间',
`creatdata` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间'
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- --------------------------------------------------------
--
-- 表的结构 `lmve_words`
--
CREATE TABLE `lmve_words` (
`id` int(11) NOT NULL,
`word` varchar(8) NOT NULL,
`creatdata` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- --------------------------------------------------------
--
-- 表的结构 `lmve_wordslinksents`
--
CREATE TABLE `lmve_wordslinksents` (
`id` int(11) NOT NULL,
`wordid` int(11) NOT NULL,
`sentid` int(11) NOT NULL,
`time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
--
-- 转储表的索引
--
--
-- 表的索引 `lmve_domainlinksents`
--
ALTER TABLE `lmve_domainlinksents`
ADD PRIMARY KEY (`id`),
ADD KEY `domainid` (`domainid`),
ADD KEY `sentid` (`sentid`);
--
-- 表的索引 `lmve_domains`
--
ALTER TABLE `lmve_domains`
ADD PRIMARY KEY (`id`),
ADD KEY `domain` (`domain`),
ADD KEY `score` (`score`);
--
-- 表的索引 `lmve_imgs`
--
ALTER TABLE `lmve_imgs`
ADD PRIMARY KEY (`id`),
ADD KEY `imgurl` (`imgurl`(255));
--
-- 表的索引 `lmve_imgslinkurls`
--
ALTER TABLE `lmve_imgslinkurls`
ADD PRIMARY KEY (`id`),
ADD KEY `imgurlid` (`imgurlid`),
ADD KEY `urlid` (`urlid`);
--
-- 表的索引 `lmve_newurls`
--
ALTER TABLE `lmve_newurls`
ADD PRIMARY KEY (`id`),
ADD KEY `url` (`url`(255)),
ADD KEY `score` (`score`),
ADD KEY `flag` (`flag`),
ADD KEY `lastupdata` (`lastupdata`),
ADD KEY `domainid` (`domainid`);
--
-- 表的索引 `lmve_sents`
--
ALTER TABLE `lmve_sents`
ADD PRIMARY KEY (`id`),
ADD KEY `sent` (`sent`(255));
--
-- 表的索引 `lmve_sentslinkurls`
--
ALTER TABLE `lmve_sentslinkurls`
ADD PRIMARY KEY (`id`),
ADD KEY `sentid` (`sentid`),
ADD KEY `urlid` (`urlid`);
--
-- 表的索引 `lmve_snapshot`
--
ALTER TABLE `lmve_snapshot`
ADD PRIMARY KEY (`id`);
--
-- 表的索引 `lmve_urls`
--
ALTER TABLE `lmve_urls`
ADD PRIMARY KEY (`id`),
ADD KEY `url` (`url`(255)),
ADD KEY `lastupdata` (`lastupdata`),
ADD KEY `score` (`score`),
ADD KEY `domainid` (`domainid`);
--
-- 表的索引 `lmve_words`
--
ALTER TABLE `lmve_words`
ADD PRIMARY KEY (`id`),
ADD KEY `word` (`word`);
--
-- 表的索引 `lmve_wordslinksents`
--
ALTER TABLE `lmve_wordslinksents`
ADD PRIMARY KEY (`id`),
ADD KEY `wordid` (`wordid`),
ADD KEY `sentid` (`sentid`);
--
-- 在导出的表使用AUTO_INCREMENT
--
--
-- 使用表AUTO_INCREMENT `lmve_domainlinksents`
--
ALTER TABLE `lmve_domainlinksents`
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
--
-- 使用表AUTO_INCREMENT `lmve_domains`
--
ALTER TABLE `lmve_domains`
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
--
-- 使用表AUTO_INCREMENT `lmve_imgs`
--
ALTER TABLE `lmve_imgs`
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
--
-- 使用表AUTO_INCREMENT `lmve_imgslinkurls`
--
ALTER TABLE `lmve_imgslinkurls`
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
--
-- 使用表AUTO_INCREMENT `lmve_newurls`
--
ALTER TABLE `lmve_newurls`
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
--
-- 使用表AUTO_INCREMENT `lmve_sents`
--
ALTER TABLE `lmve_sents`
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
--
-- 使用表AUTO_INCREMENT `lmve_sentslinkurls`
--
ALTER TABLE `lmve_sentslinkurls`
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
--
-- 使用表AUTO_INCREMENT `lmve_snapshot`
--
ALTER TABLE `lmve_snapshot`
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
--
-- 使用表AUTO_INCREMENT `lmve_urls`
--
ALTER TABLE `lmve_urls`
MODIFY `id` int(8) NOT NULL AUTO_INCREMENT COMMENT '序号';
--
-- 使用表AUTO_INCREMENT `lmve_words`
--
ALTER TABLE `lmve_words`
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
--
-- 使用表AUTO_INCREMENT `lmve_wordslinksents`
--
ALTER TABLE `lmve_wordslinksents`
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;
COMMIT;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
+236
View File
@@ -0,0 +1,236 @@
<?php
if($_POST['password']!="(*&RV^*(&VRH*(V)))"){die("ERROR -1");}
$starttime=time();
define ('IN_LM',true);
include "./sql.php";
include "./config.php";
$db1=new mysqldb;
$db1->conetdb(MYSQL_USERNAME,MYSQL_PASSWORD,MYSQL_DBNAME);
$nowtime=date('Y-m-d H:i:s');
function lmve_get_dataid_at_tab($tab,$da)
{
global $db1;
$re=$db1->read_data_on_tab($tab,$da);
if(empty($re))
{
$rt_id=$db1->addend_data_on_tab($tab,$da);
$da["id"]=$rt_id;
$da["isnew"]=true;
}else
{
$rt_id=$re[0]['id'];
$da["id"]=$rt_id;
$da["isnew"]=false;
}
return $da;
}
function lmve_get_domainscore_at_tab($domain)
{
global $db1;
$da=array("domain"=>$domain);
$re=$db1->read_data_on_tab("lmve_domains",$da);
if(empty($re))
{
$rt_id=$db1->addend_data_on_tab("lmve_domains",$da);
$da["id"]=$rt_id;
$da["isnew"]=true;
$da["score"]="0";
}else
{
$rt_id=$re[0]['id'];
$da["id"]=$rt_id;
$da["isnew"]=false;
$da["score"]=$re[0]['score'];
}
return $da;
}
//对数据预处理
$_POST['url']=addslashes($_POST['url']);
$_POST['title']=addslashes($_POST['title']);
$_POST['mate']=addslashes($_POST['mate']);
echo "**远程消息**\n";
echo "url:".$_POST['url']."\n";
//获取url在数据库中的id
$url_id=lmve_get_dataid_at_tab("lmve_urls",array("url"=>$_POST['url']));
//获取domain在数据库中的id
$url_domain=lmve_get_domainscore_at_tab($_POST['domain']);
if($_POST['contenttype']=='text/html')//如果是text页
{
//写入快照
$_POST['htmlx']=addslashes($_POST['htmlx']);
lmve_get_dataid_at_tab("lmve_snapshot",array("urlid"=>$url_id['id'],"url"=>$_POST['url'],"snapshot"=>$_POST['htmlx']));
//将获取到的新连接也一起加到数据库
//解包参考分
$new_urls_score=json_decode($_POST['newurlscore']);
$new_urls_score_arr=array();
foreach($new_urls_score as $val)
{
array_push($new_urls_score_arr,$val);
}
//解包域名
$new_urls_domain=json_decode($_POST['newurldomain']);
$new_urls_domain_arr=array();
foreach($new_urls_domain as $val)
{
array_push($new_urls_domain_arr,$val);
}
//解包新url
$new_urls=json_decode($_POST['newurls']);
$new_urls_int=0;
foreach($new_urls as $val)
{
$new_url=addslashes($val);
$nwe_domain=$new_urls_domain_arr[$new_urls_int];
$nwe_domain_info=lmve_get_domainscore_at_tab($nwe_domain);
$score=intval($new_urls_score_arr[$new_urls_int])+intval($nwe_domain_info['score']);
//先检查在数据库中是否有
$new_url_db=$db1->read_data_on_tab("lmve_newurls",array("url"=>$new_url));
if(empty($new_url_db))
{
lmve_get_dataid_at_tab("lmve_newurls",array("domainid"=>$nwe_domain_info['id'],"url"=>$new_url,"score"=>$score));
}else
{
$db1->updata_on_tab("lmve_newurls",array("id"=>$new_url_db[0]['id']),array("score"=>$score));
}
$new_urls_int+=1;
}
$imgs=json_decode($_POST['images']);
//记录网站的图片
//先删除旧的再连接新的
$db1->deldata_on_tab("lmve_imgslinkurls",array("urlid"=>$url_id['id']));
foreach($imgs as $val)
{
$img_url=addslashes($val);
$imgs_id=lmve_get_dataid_at_tab("lmve_imgs",array("imgurl"=>$img_url));
lmve_get_dataid_at_tab("lmve_imgslinkurls",array("imgurlid"=>$imgs_id['id'],"urlid"=>$url_id['id']));
}
$words=json_decode($_POST['words']);
$sents=json_decode($_POST['sents']);
$links=json_decode($_POST['links']);
//拆包
$words_id_at_db=array();
$wordio_int=0;
foreach($words as $val)
{
$word=addslashes($val);
$wtemp=lmve_get_dataid_at_tab("lmve_words",array("word"=>$word));//获取单词在数据库的id
array_push($words_id_at_db,$wtemp['id']);
$wordio_int+=1;
}
//处理内容与单词之间的连接
$wordid=array();
$sentid=array();
foreach($links as $val)
{
$temp=explode(":",$val);
array_push($wordid,$temp[0]);
array_push($sentid,$temp[1]);
}
//先获得单词在数据库中的id
//处理内容
//先删除旧的再连接新的
$db1->deldata_on_tab("lmve_sentslinkurls",array("urlid"=>$url_id['id']));
$sents_int=0;
$repeat_int=0;
$newsents_int=0;
foreach($sents as $val)
{
//从数据库获取内容id并检查是否重复
$sent=addslashes($val);
$stemp=lmve_get_dataid_at_tab("lmve_sents",array("sent"=>$sent));
if($stemp['isnew'])//在数据库中是新内容
{
lmve_get_dataid_at_tab("lmve_domainlinksents",array("domainid"=>$url_domain['id'],"sentid"=>$stemp['id']));//连接域名关系
$sentsid_int=0;
foreach($sentid as $val1)//将内容对应的单词添加到数据库并进行关系连接
{
if($val1==$sents_int)
{
$word_id=$words_id_at_db[$wordid[$sentsid_int]];
lmve_get_dataid_at_tab("lmve_wordslinksents",array("wordid"=>$word_id,"sentid"=>$stemp['id']));//连接单词与内容
}
$sentsid_int+=1;
}
$newsents_int+=1;
}else
{
//这个内容在数据库中并不是唯一的,说明有重复内容,还要验证是否是在该域名下重复的
$temp=lmve_get_dataid_at_tab("lmve_domainlinksents",array("domainid"=>$url_domain['id'],"sentid"=>$stemp['id']));
if($temp['isnew'])
{
//域名下没有重复
}else
{
//域名下有重复
$repeat_int+=1;
}
}
//连接内容与url
lmve_get_dataid_at_tab("lmve_sentslinkurls",array("sentid"=>$stemp['id'],"urlid"=>$url_id['id']));
$sents_int+=1;
}
//更新连接
$reppp=ceil(($repeat_int/$sents_int)*100);
$url_score=intval($_POST['score'])+100-$reppp+intval($url_domain['score']);
if($url_id['isnew'])
{
$db1->updata_on_tab("lmve_urls",array("id"=>$url_id['id']),array("domainid"=>$url_domain['id'],"title"=>$_POST['title'],"mate"=>$_POST['mate'],"score"=>$url_score,"laststatus"=>$_POST['status'],"contenttype"=>$_POST['contenttype'],"lastupdata"=>$nowtime));
echo "记录新url\n";
}else
{
$db1->updata_on_tab("lmve_urls",array("id"=>$url_id['id']),array("title"=>$_POST['title'],"mate"=>$_POST['mate'],"laststatus"=>$_POST['status'],"contenttype"=>$_POST['contenttype'],"lastupdata"=>$nowtime));
echo "不是新url,将不会更新评分\n";
}
echo "内容数量:".$newsents_int."\n";
echo "重复率:".$reppp."%\n";
echo "url得分:".$url_score."\n";
echo "单词io:".$wordio_int."\n";
}else
{
$db1->updata_on_tab("lmve_urls",array("id"=>$url_id['id']),array("domainid"=>$url_domain['id'],"title"=>$_POST['title'],"score"=>"0","laststatus"=>$_POST['status'],"contenttype"=>$_POST['contenttype'],"lastupdata"=>$nowtime));
}
$db1->updata_on_tab("lmve_newurls",array("url"=>$_POST['url']),array("flag"=>"3","lastupdata"=>$nowtime));
echo "url码:".$_POST['status']."\n";
$oktime=time();
$runtime=$oktime-$starttime;
echo "响应时:".$runtime."\n";
?>
+31
View File
@@ -0,0 +1,31 @@
<?php
$starttime=time();
define ('IN_LM',true);
include "./sql.php";
include "./config.php";
$db1=new mysqldb;
$db1->conetdb(MYSQL_USERNAME,MYSQL_PASSWORD,MYSQL_DBNAME);
$nowtime=date('Y-m-d H:i:s');
//ini_set('memory_limit', '128M');
require_once "./jieba/vendor/multi-array/MultiArray.php";
require_once "./jieba/vendor/multi-array/Factory/MultiArrayFactory.php";
require_once "./jieba/class/Jieba.php";
require_once "./jieba/class/Finalseg.php";
use Fukuball\Jieba\Jieba;
Jieba::init();
$seg_list = Jieba::cutForSearch($_GET['s']); #搜索引擎模式
var_dump($seg_list);
$oktime=time();
$runtime=$oktime-$starttime;
echo "响应时:".$runtime."\n";
?>
+48 -4
View File
@@ -12,7 +12,7 @@ import re
import jieba
import html
import json
from myfunsion import myfunsion
#from bs4 import BeautifulSoup as bs
#from lxml import etree
@@ -323,8 +323,52 @@ def getoneurl():
test=1
if test==0:
while 1:
theurl=getoneurl()
if theurl!='-1':
getandpost(theurl)
else :
print("获取url失败,自动重试")
if test==1:
theurl="https://lmve.net"
getandpost(theurl)
if test==2:
theurl=getoneurl()
print(theurl)
getandpost(theurl)
if test==3:
headers = {
'user-agent': 'my-app/0.0.1',
'Content-Type': 'text/html; charset=utf-8'
}
#print(begin_url)
#获取整个页面
url1="https://git.lmve.net/kevin/um-all-index-web/-/avatar"
url2="https://yyyyyyounger.com/"
try :
r = requests.get(url2,headers=headers,verify=False)
htmlx_byt=r.content
print(r.headers['Content-Type'])
except:
print("11")
if test==4:
#js=get_page_words("https://lmve.net")
#print(js)
myfunsion.a()
theurl="https://lmve.net"
redata=get_page_words(theurl)
print(redata['words'])