@ -104,22 +104,24 @@ class TujiguService
}
}
}
}
}
}
$baseUrl = " https://www.tujigu.net / " ;
$baseUrl = " https://www.tujidao01.com / " ;
usleep ( random_int ( 100 , 1000 ) * 1000 );
usleep ( random_int ( 100 , 1000 ) * 1000 );
$peopleUrl = " https://www.tujigu.net/t/ " ;
$peopleUrl = " https://www.tujidao01.com/t/?id= " ;
$peopleUrl .= $name [ 'code' ];
$peopleUrl .= $name [ 'code' ];
// 获取总的相册数量
// 获取总的相册数量
$albumNumSelector = " body > div:nth-child(4) > span " ;
$albumNumSelector = " body > div:nth-child(4) > span " ;
$albumNumSelector = " #pages > div > a:last-child " ;
echo " 111111111 " ;
echo " 111111111 " ;
$content = null ;
try {
try {
$baseQl = QueryList :: get ( $peopleUrl , null , [ 'timeout' => 5 ]);
$content = $this -> getContent ( $peopleUrl );
$baseQl = QueryList :: getInstance () -> setHtml ( $content );
} catch ( Exception $e ) {
} catch ( Exception $e ) {
dump ( $e -> getMessage ());
dump ( $e -> getMessage ());
}
}
echo " 222222222 " ;
echo " 222222222 " ;
$ql = $baseQl -> find ( $albumNumSelector ) -> htmls ();
dump ( $ql -> all ());
$ql = $baseQl -> find ( $albumNumSelector ) -> attrs ( " href " );
$onlyOnePage = false ;
$onlyOnePage = false ;
// 相册页数
// 相册页数
if ( count ( $ql -> all ()) == 0 ) {
if ( count ( $ql -> all ()) == 0 ) {
@ -127,32 +129,39 @@ class TujiguService
$totalAlbumPage = 1 ;
$totalAlbumPage = 1 ;
$onlyOnePage = true ;
$onlyOnePage = true ;
} else {
} else {
preg_match ( " # \ d+# " , $ql -> all ()[ 0 ], $result );
preg_match ( " #page=( \ d+) # " , $ql -> all ()[ 0 ], $result );
dump ( $result );
dump ( $result );
$totalAlbumNum = 0 ;
if ( is_numeric ( $result [ 0 ])) {
$totalAlbumNum = $result [ 0 ];
$totalAlbumPage = 1 ;
if ( is_numeric ( $result [ 1 ])) {
$totalAlbumPage = $result [ 1 ];
}
}
$totalAlbumPage = ceil ( $totalAlbumNum / 40 );
}
}
$baseAlbumUrl = " https://www.tujigu.net/t/ { $name [ 'code' ] } / " ;
$baseAlbumUrl = " https://www.tujidao01.com/t/?id= { $name [ 'code' ] } &page= " ;
// 表示限制抓取相册数
// 表示限制抓取相册数
$countLimit = 8 ;
$countLimit = 8 ;
if ( $isAll ) {
if ( $isAll ) {
$countLimit = 5000 ;
$countLimit = 5000 ;
}
}
for ( $i = 0 ; $i < $totalAlbumPage ; $i ++ ) {
if ( $onlyOnePage || $totalAlbumPage == 1 || $i == 0 ) {
$albumQl = QueryList :: get ( $peopleUrl );
echo " totalAlbumPage is $totalAlbumPage\n " ;
for ( $i = 1 ; $i <= $totalAlbumPage ; $i ++ ) {
if ( $onlyOnePage || $totalAlbumPage == 1 || $i == 1 ) {
$albumQl = QueryList :: getInstance () -> setHtml ( $content );
} else {
} else {
$albumQl = QueryList :: get ( $baseAlbumUrl . " index_ " . $i . " .html " );
$content = $this -> getContent ( $baseAlbumUrl . $i );
$albumQl = QueryList :: getInstance () -> setHtml ( $content );
}
}
// dump($albumQl->getHtml());
// dump($albumQl->getHtml());
$albumList = $albumQl -> find ( " body > div.hezi > ul > li > a " ) -> attrs ( " href " );
$pageAlbum = $albumList -> all ();
dump ( $pageAlbum );
foreach ( $pageAlbum as $album ) {
$rules = [
'num' => [ 'span.shuliang' , 'text' ],
'title' => [ 'p.biaoti' , 'text' ],
'img' => [ 'a>img' , 'src' ],
" code " => [ 'p.biaoti a' , " href " ]
];
$range = " div.hezi>ul li " ;
// "body > div.hezi > ul > li"
$albumList = $albumQl -> rules ( $rules ) -> range ( $range ) -> query () -> getData () -> all ();
foreach ( $albumList as $album ) {
if ( $countLimit <= 0 ) {
if ( $countLimit <= 0 ) {
dump ( " 相册已超过限制数量,跳出 " );
dump ( " 相册已超过限制数量,跳出 " );
break 2 ;
break 2 ;
@ -160,23 +169,18 @@ class TujiguService
dump ( " current album page no: " . $i );
dump ( " current album page no: " . $i );
usleep ( 1000 * random_int ( 100 , 1000 ));
usleep ( 1000 * random_int ( 100 , 1000 ));
dump ( " 相册: " , [ $album ]);
dump ( " 相册: " , [ $album ]);
$pageQL = QueryList :: get ( $album );
$page = $pageQL -> find ( " body > div.tuji > p:nth-child(5) " ) -> htmls ();
$pageAlternative = $pageQL -> find ( " body > div.tuji > p:nth-child(6) " ) -> htmls ();
$title = $pageQL -> find ( " body > div.tuji > div.weizhi > h1 " ) -> htmls ();
dump ( " pageTitle all " , [ $title -> all (), $album ]);
$titleStr = $title -> all ()[ 0 ];
$titleStr = preg_replace ( " #/# " , " - " , $titleStr );
preg_match ( " #图片数量: ( \ d+)P# " , $page -> all ()[ 0 ], $result );
if ( count ( $result ) < 2 ) {
preg_match ( " #图片数量: ( \ d+)P# " , $pageAlternative -> all ()[ 0 ], $result );
}
$title = $album [ " title " ];
$titleStr = preg_replace ( " #/# " , " - " , $title );
preg_match ( " #( \ d+)P# " , $album [ " num " ], $result );
$totalImageNum = $result [ 1 ];
$totalImageNum = $result [ 1 ];
for ( $j = 1 ; $j <= $totalImageNum ; $j ++ ) {
for ( $j = 1 ; $j <= $totalImageNum ; $j ++ ) {
// $albumCode = substr($album, 25, 5);
// $albumCode = substr($album, 25, 5);
$albumCode = explode ( " / " , $album )[ 4 ];
preg_match ( " #id \ =( \ d+)# " , $album [ " code " ], $albumCodeResult );
$albumCode = $albumCodeResult [ 1 ];
$baseImageUrl = " https://tjg.gzhuibei.com/a/1/ { $albumCode } / " ;
$baseImageUrl = " https://tjg.gzhuibei.com/a/1/ { $albumCode } / " ;
// if ($j == 0) {
// if ($j == 0) {
// $imageName = $j . ".jpg";
// $imageName = $j . ".jpg";
// } else {
// } else {
@ -203,16 +207,35 @@ class TujiguService
dump ( $albumPath . " / " . $imageName . " exists. skipped! " );
dump ( $albumPath . " / " . $imageName . " exists. skipped! " );
continue ;
continue ;
}
}
$opts = array ( 'http' => [ 'header' =>
" User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36 \r \n
Referer : " . $baseUrl . " \r\n "
]);
$curl_handle = curl_init ();
$curl_handle = curl_init ();
curl_setopt ( $curl_handle , CURLOPT_URL , $imageUrl );
curl_setopt ( $curl_handle , CURLOPT_CONNECTTIMEOUT , 2000 );
curl_setopt ( $curl_handle , CURLOPT_RETURNTRANSFER , 1 );
curl_setopt ( $curl_handle , CURLOPT_USERAGENT , 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36' );
curl_setopt ( $curl_handle , CURLOPT_REFERER , $baseUrl );
curl_setopt_array ( $curl_handle , array (
CURLOPT_URL => $imageUrl ,
CURLOPT_RETURNTRANSFER => true ,
CURLOPT_ENCODING => '' ,
CURLOPT_MAXREDIRS => 10 ,
CURLOPT_TIMEOUT => 0 ,
CURLOPT_FOLLOWLOCATION => true ,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1 ,
CURLOPT_CUSTOMREQUEST => 'GET' ,
CURLOPT_HTTPHEADER => array (
'Accept: image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8' ,
'Accept-Language: zh-CN,zh;q=0.9' ,
'Cache-Control: no-cache' ,
'Connection: keep-alive' ,
'Pragma: no-cache' ,
'Referer: https://www.tujidao01.com/' ,
'Sec-Fetch-Dest: image' ,
'Sec-Fetch-Mode: no-cors' ,
'Sec-Fetch-Site: cross-site' ,
'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36' ,
'sec-ch-ua: " Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"' ,
'sec-ch-ua-mobile: ?0' ,
'sec-ch-ua-platform: "macOS"'
),
));
$query = curl_exec ( $curl_handle );
$query = curl_exec ( $curl_handle );
$i = 1 ;
$i = 1 ;
while ( $query === false ) {
while ( $query === false ) {
@ -237,7 +260,6 @@ class TujiguService
usleep ( $sleepTime );
usleep ( $sleepTime );
echo $imageUrl ;
echo $imageUrl ;
}
}
dump ( $page -> all ());
// exit;
// exit;
$countLimit -- ;
$countLimit -- ;
}
}
@ -246,4 +268,44 @@ class TujiguService
}
}
}
}
public function getContent ( $url )
{
$curl = curl_init ();
curl_setopt_array ( $curl , array (
CURLOPT_URL => " $url " ,
CURLOPT_RETURNTRANSFER => true ,
CURLOPT_ENCODING => '' ,
CURLOPT_MAXREDIRS => 10 ,
CURLOPT_TIMEOUT => 10 ,
CURLOPT_FOLLOWLOCATION => true ,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1 ,
CURLOPT_CUSTOMREQUEST => 'GET' ,
CURLOPT_HTTPHEADER => array (
'authority: www.tujidao01.com' ,
'accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' ,
'accept-language: zh-CN,zh;q=0.9' ,
'cache-control: max-age=0' ,
'cookie: PHPSESSID=ndm118vli42e1db7dfhqmvgjo7; __51vcke__Je64MI06Q1Neac4F=3d9a0d91-cf15-5bf7-ab90-90734f856aba; __51vuft__Je64MI06Q1Neac4F=1654567556100; uid=315696; name=nicksxs; leixing=0; __51uvsct__Je64MI06Q1Neac4F=2; __vtins__Je64MI06Q1Neac4F=%7B%22sid%22%3A%20%22d5d48e8b-a16e-5451-95f4-e629e6a4ec1b%22%2C%20%22vd%22%3A%205%2C%20%22stt%22%3A%20287951%2C%20%22dr%22%3A%205812%2C%20%22expires%22%3A%201654862307975%2C%20%22ct%22%3A%201654860507975%7D' ,
'referer: https://www.tujidao01.com/sousu/?s0=%E6%9D%A8%E6%99%A8%E6%99%A8' ,
'sec-ch-ua: " Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"' ,
'sec-ch-ua-mobile: ?0' ,
'sec-ch-ua-platform: "macOS"' ,
'sec-fetch-dest: document' ,
'sec-fetch-mode: navigate' ,
'sec-fetch-site: same-origin' ,
'sec-fetch-user: ?1' ,
'upgrade-insecure-requests: 1' ,
'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36'
),
));
$response = curl_exec ( $curl );
curl_close ( $curl );
return $response ;
}
}
}