From bab77191c3fc6d4ec7a52da894c8180852993c1f Mon Sep 17 00:00:00 2001 From: shixuesen Date: Tue, 14 Jun 2022 11:15:44 +0800 Subject: [PATCH] modify bilibili --- app/Console/Commands/BiliVideoCode.php | 3 +- app/Console/Commands/RenameTest.php | 2 +- app/Console/Commands/TujiguCommand.php | 2 +- app/Http/Controllers/WeiboController.php | 16 +-- app/Services/BilibiliServiceV2.php | 6 +- app/Services/NewXiuGirlsService.php | 64 ++++++++-- app/Services/TujiguService.php | 146 ++++++++++++++++------- 7 files changed, 173 insertions(+), 66 deletions(-) diff --git a/app/Console/Commands/BiliVideoCode.php b/app/Console/Commands/BiliVideoCode.php index 178ae36..1c8fb18 100644 --- a/app/Console/Commands/BiliVideoCode.php +++ b/app/Console/Commands/BiliVideoCode.php @@ -55,7 +55,8 @@ class BiliVideoCode extends Command // exit; // $bilibili->queryDBCollectionList(); -// $bilibili->queryForVideoParts(); + dump($bilibili->requestVideoParts("33483603"));exit; + $bilibili->queryForVideoParts();exit; $bilibili->compareAndDownloadUpVideos(true); // $bilibili->compareAndDownloadCollectionVideos(); exit; diff --git a/app/Console/Commands/RenameTest.php b/app/Console/Commands/RenameTest.php index fd9135f..093fa1f 100644 --- a/app/Console/Commands/RenameTest.php +++ b/app/Console/Commands/RenameTest.php @@ -60,7 +60,7 @@ class RenameTest extends Command // $arr[2] = 1640488544; // asort($arr); // dump($arr);exit; - $rename->rename($path, $prefix); +// $rename->rename($path, $prefix); $rename->splitCustomSizeOfFolder($path, $prefix, 500);exit; // $rename->rename("/Volumes/WD/tmp/写真图/猫九", "猫九-"); diff --git a/app/Console/Commands/TujiguCommand.php b/app/Console/Commands/TujiguCommand.php index f5ef1c8..b4909eb 100644 --- a/app/Console/Commands/TujiguCommand.php +++ b/app/Console/Commands/TujiguCommand.php @@ -47,6 +47,6 @@ class TujiguCommand extends Command // echo $albumCode;exit; $service = new TujiguService(); - $service->scrapeTujiguGirls(); + $service->scrapeTujiguGirls(true); } } diff --git a/app/Http/Controllers/WeiboController.php b/app/Http/Controllers/WeiboController.php index 10148dd..639465f 100644 --- a/app/Http/Controllers/WeiboController.php +++ b/app/Http/Controllers/WeiboController.php @@ -67,14 +67,14 @@ class WeiboController extends Controller // for ($i = 10; $i >= 1; $i--) { // $url[] = 'https://m.weibo.cn/feed/group?gid=4423532052076817&&page=' . $i; // } - foreach ($list as $key => $value) { - for ($i = $size; $i >= 1; $i--) { - $url[] = "https://m.weibo.cn/api/container/getIndex?containerid=$value&page=" . $i; - } - } -// for ($i = 10; $i >= 1; $i--) { -// $url[] = 'https://m.weibo.cn/api/container/getIndex?containerid=230259&page=' . $i; -// } + // foreach ($list as $key => $value) { + // for ($i = $size; $i >= 1; $i--) { + // $url[] = "https://m.weibo.cn/api/container/getIndex?containerid=$value&page=" . $i; + // } + // } + for ($i = $size; $i >= 1; $i--) { + $url[] = 'https://m.weibo.cn/api/container/getIndex?containerid=230259&page=' . $i; + } // for ($i = 20; $i >= 1; $i--) { // $url[] = 'https://m.weibo.cn/api/container/getIndex?containerid=2304131916825084&page=' . $i; // } diff --git a/app/Services/BilibiliServiceV2.php b/app/Services/BilibiliServiceV2.php index ae1a859..b7e3239 100644 --- a/app/Services/BilibiliServiceV2.php +++ b/app/Services/BilibiliServiceV2.php @@ -658,7 +658,7 @@ class BilibiliServiceV2 public function queryForVideoParts() { - $i = 1; + $i = 9; $list = BilibiliVideos::orderBy('id', 'desc')->simplePaginate(2000, null, 'page', $i); // dump($list->items()[0]->aid); while ($list->isNotEmpty()) { @@ -704,9 +704,9 @@ class BilibiliServiceV2 } catch (Exception $e) { } } - break; +// break; $i++; - $list = BilibiliVideos::simplePaginate(50, null, 'page', $i); + $list = BilibiliVideos::simplePaginate(2000, null, 'page', $i); try { usleep(random_int(10, 1000) * 1000); } catch (Exception $e) { diff --git a/app/Services/NewXiuGirlsService.php b/app/Services/NewXiuGirlsService.php index 4d8bd2e..901b00a 100644 --- a/app/Services/NewXiuGirlsService.php +++ b/app/Services/NewXiuGirlsService.php @@ -110,7 +110,7 @@ class NewXiuGirlsService // $items = array_slice($items, 49); // print_r($items);exit; - $baseUrl = "https://www.xsnvshen.com"; + $baseUrl = "https://www.xsnvshen.co"; // $baseDir = "/Users/shixuesen/Documents/xg/" . $name["dir"]. "/"; $baseDir = "/Volumes/intel660p/image/xg/" . $name["dir"] . "/"; if (!file_exists($baseDir)) { @@ -165,7 +165,7 @@ class NewXiuGirlsService $username = $iUser; dump("here {$username}, {$name["code"]}"); usleep(random_int(1000, 10000) * 1000); - $peopleUrl = "https://xsnvshen.com/girl/"; + $peopleUrl = "https://xsnvshen.co/girl/"; $peopleUrl .= $name["code"]; $albumSelector = ".entryAblum > .star-mod-bd > ul > li > a"; @@ -176,7 +176,7 @@ class NewXiuGirlsService // $items = array_slice($items, 49); // print_r($items);exit; - $baseUrl = "https://www.xsnvshen.com"; + $baseUrl = "https://www.xsnvshen.co"; // $baseDir = "/Users/shixuesen/Documents/xg/" . $name["dir"]. "/"; $baseDir = "/Volumes/intel660p/image/xg/" . $name["dir"] . "/"; if (!file_exists($baseDir)) { @@ -198,7 +198,8 @@ class NewXiuGirlsService $queryItemUrlTimes = 1; do { try { - $html = (new QueryList)->get($baseUrl . $item); + $html = $this->getContent($baseUrl . $item); + // $html = (new QueryList)->get($baseUrl . $item); } catch (Exception $e) { Log::error("查询相册子链接失败,将重试, 异常信息: " . $e->getMessage()); $sleepTime = 1000 * random_int(1000, 10000); @@ -305,11 +306,12 @@ class NewXiuGirlsService private function queryAllAlbum($name, $isAll = false): array { CommonUtils::randomSleep(100); - $peopleUrl = "https://xsnvshen.com/girl/"; + $peopleUrl = "https://www.xsnvshen.co/girl/"; $peopleUrl .= $name["code"]; $albumSelector = ".entryAblum > .star-mod-bd > ul > li > a"; - $baseQl = $this->getQueryInstance()->get($peopleUrl); + $html = $this->getContent($peopleUrl); + $baseQl = $this->getQueryInstance()->setHtml($html); $ql = $baseQl->find($albumSelector)->attrs("href"); $items = $ql->all(); @@ -331,7 +333,9 @@ class NewXiuGirlsService $queryItemUrlTimes = 1; do { try { - $html = $this->getQueryInstance()->get($baseUrl . $item); + $content = $this->getContent($baseUrl . $item); + $html = $this->getQueryInstance()->setHtml($content); + // $html = $this->getQueryInstance()->get($baseUrl . $item); } catch (Exception $e) { Log::error("查询相册子链接失败,将重试, 异常信息: " . $e->getMessage()); $sleepTime = 1000 * random_int(1000, 10000); @@ -391,7 +395,9 @@ class NewXiuGirlsService $images = []; do { try { - $images = $this->getQueryInstance()->get($baseUrl . $item)->find(".swi-hd > img")->attrs("src"); + $content = $this->getContent($baseUrl . $item); + $baseQl = $this->getQueryInstance()->setHtml($content); + $images = $baseQl->find(".swi-hd > img")->attrs("src"); } catch (Exception $e) { echo $e->getTraceAsString() . "\n"; $sleepTime = 10000 * random_int(100, 1000); @@ -485,7 +491,7 @@ class NewXiuGirlsService CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, CURLOPT_CUSTOMREQUEST => 'GET', CURLOPT_HTTPHEADER => array( - 'authority: img.xsnvshen.com', + 'authority: img.xsnvshen.co', 'pragma: no-cache', 'cache-control: no-cache', 'sec-ch-ua: " Not A;Brand";v="99", "Chromium";v="90", "Google Chrome";v="90"', @@ -495,7 +501,7 @@ class NewXiuGirlsService 'sec-fetch-site: same-site', 'sec-fetch-mode: no-cors', 'sec-fetch-dest: image', - 'referer: https://www.xsnvshen.com/album/' . $item, + 'referer: https://www.xsnvshen.co/album/' . $item, 'accept-language: zh-CN,zh;q=0.9', // 'cookie: __cfduid=dac5872f65e79a40a5b30229ba97beb6a1619333692' ), @@ -525,4 +531,42 @@ class NewXiuGirlsService echo "after write image sleep {$sleepTime} nano second \n"; usleep($sleepTime); } + + private function getContent(string $url): string + { + $curl = curl_init(); + + curl_setopt_array($curl, array( + CURLOPT_URL => $url, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_ENCODING => '', + CURLOPT_MAXREDIRS => 10, + CURLOPT_TIMEOUT => 0, + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, + CURLOPT_CUSTOMREQUEST => 'GET', + CURLOPT_HTTPHEADER => array( + 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', + 'Accept-Language: zh-CN,zh;q=0.9', + 'Cache-Control: max-age=0', + 'Connection: keep-alive', + 'Cookie: gcha_sf=1654239879; __51vcke__JNmlfXHHIrHMZgLq=7d2b0e0c-1ca8-56d7-bfb8-576c07ec9182; __51vuft__JNmlfXHHIrHMZgLq=1654479736820; __51uvsct__JNmlfXHHIrHMZgLq=2; __vtins__JNmlfXHHIrHMZgLq=%7B%22sid%22%3A%20%2207ccc540-953f-5f3c-8268-8c6c759cc5d8%22%2C%20%22vd%22%3A%204%2C%20%22stt%22%3A%20373842%2C%20%22dr%22%3A%2032187%2C%20%22expires%22%3A%201654654759205%2C%20%22ct%22%3A%201654652959205%7D; jpx=4; jpx=5', + 'Sec-Fetch-Dest: document', + 'Sec-Fetch-Mode: navigate', + 'Sec-Fetch-Site: none', + 'Sec-Fetch-User: ?1', + 'Upgrade-Insecure-Requests: 1', + 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36', + 'sec-ch-ua: " Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"', + 'sec-ch-ua-mobile: ?0', + 'sec-ch-ua-platform: "macOS"' + ), + )); + + $response = curl_exec($curl); + + curl_close($curl); + return $response; + + } } diff --git a/app/Services/TujiguService.php b/app/Services/TujiguService.php index e084a7d..1b5aec4 100644 --- a/app/Services/TujiguService.php +++ b/app/Services/TujiguService.php @@ -104,22 +104,24 @@ class TujiguService } } } - $baseUrl = "https://www.tujigu.net/"; + $baseUrl = "https://www.tujidao01.com/"; usleep(random_int(100, 1000) * 1000); - $peopleUrl = "https://www.tujigu.net/t/"; + $peopleUrl = "https://www.tujidao01.com/t/?id="; $peopleUrl .= $name['code']; // 获取总的相册数量 $albumNumSelector = "body > div:nth-child(4) > span"; + $albumNumSelector = "#pages > div > a:last-child"; echo "111111111"; + $content = null; try { - $baseQl = QueryList::get($peopleUrl, null, ['timeout' => 5]); + $content = $this->getContent($peopleUrl); + $baseQl = QueryList::getInstance()->setHtml($content); } catch (Exception $e) { dump($e->getMessage()); } echo "222222222"; - $ql = $baseQl->find($albumNumSelector)->htmls(); - dump($ql->all()); + $ql = $baseQl->find($albumNumSelector)->attrs("href"); $onlyOnePage = false; // 相册页数 if (count($ql->all()) == 0) { @@ -127,32 +129,39 @@ class TujiguService $totalAlbumPage = 1; $onlyOnePage = true; } else { - preg_match("#\d+#", $ql->all()[0], $result); + preg_match("#page=(\d+)#", $ql->all()[0], $result); dump($result); - $totalAlbumNum = 0; - if (is_numeric($result[0])) { - $totalAlbumNum = $result[0]; + $totalAlbumPage = 1; + if (is_numeric($result[1])) { + $totalAlbumPage = $result[1]; } - $totalAlbumPage = ceil($totalAlbumNum / 40); } - $baseAlbumUrl = "https://www.tujigu.net/t/{$name['code']}/"; + $baseAlbumUrl = "https://www.tujidao01.com/t/?id={$name['code']}&page="; // 表示限制抓取相册数 $countLimit = 8; if ($isAll) { - $countLimit = 5000; + $countLimit = 5000; } - for ($i = 0; $i < $totalAlbumPage; $i++) { - if ($onlyOnePage || $totalAlbumPage == 1 || $i == 0) { - $albumQl = QueryList::get($peopleUrl); + echo "totalAlbumPage is $totalAlbumPage\n"; + for ($i = 1; $i <= $totalAlbumPage; $i++) { + if ($onlyOnePage || $totalAlbumPage == 1 || $i == 1) { + $albumQl = QueryList::getInstance()->setHtml($content); } else { - $albumQl = QueryList::get($baseAlbumUrl . "index_".$i . ".html"); + $content = $this->getContent($baseAlbumUrl . $i); + $albumQl = QueryList::getInstance()->setHtml($content); } // dump($albumQl->getHtml()); - $albumList = $albumQl->find("body > div.hezi > ul > li > a")->attrs("href"); - $pageAlbum = $albumList->all(); - dump($pageAlbum); - foreach ($pageAlbum as $album) { + $rules = [ + 'num' => ['span.shuliang', 'text'], + 'title' => ['p.biaoti', 'text'], + 'img' => ['a>img', 'src'], + "code" => ['p.biaoti a', "href"] + ]; + $range = "div.hezi>ul li"; + // "body > div.hezi > ul > li" + $albumList = $albumQl->rules($rules)->range($range)->query()->getData()->all(); + foreach ($albumList as $album) { if ($countLimit <= 0) { dump("相册已超过限制数量,跳出"); break 2; @@ -160,23 +169,18 @@ class TujiguService dump("current album page no: " . $i); usleep(1000 * random_int(100, 1000)); dump("相册:", [$album]); - $pageQL = QueryList::get( $album); - $page = $pageQL->find("body > div.tuji > p:nth-child(5)")->htmls(); - $pageAlternative = $pageQL->find("body > div.tuji > p:nth-child(6)")->htmls(); - $title = $pageQL->find("body > div.tuji > div.weizhi > h1")->htmls(); - dump("pageTitle all", [$title->all(), $album]); - $titleStr = $title->all()[0]; - $titleStr = preg_replace("#/#", "-", $titleStr); - preg_match("#图片数量: (\d+)P#", $page->all()[0], $result); - if (count($result) < 2) { - preg_match("#图片数量: (\d+)P#", $pageAlternative->all()[0], $result); - } + $title = $album["title"]; + $titleStr = preg_replace("#/#", "-", $title); + preg_match("#(\d+)P#", $album["num"], $result); $totalImageNum = $result[1]; for ($j = 1; $j <= $totalImageNum; $j++) { // $albumCode = substr($album, 25, 5); - $albumCode = explode("/", $album)[4]; + preg_match("#id\=(\d+)#", $album["code"], $albumCodeResult); + $albumCode = $albumCodeResult[1]; $baseImageUrl = "https://tjg.gzhuibei.com/a/1/{$albumCode}/"; + + // if ($j == 0) { // $imageName = $j . ".jpg"; // } else { @@ -203,16 +207,35 @@ class TujiguService dump($albumPath . "/" . $imageName . " exists. skipped!"); continue; } - $opts = array('http' => ['header' => - "User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36 \r\n - Referer:" . $baseUrl . "\r\n" - ]); + $curl_handle = curl_init(); - curl_setopt($curl_handle, CURLOPT_URL, $imageUrl); - curl_setopt($curl_handle, CURLOPT_CONNECTTIMEOUT, 2000); - curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, 1); - curl_setopt($curl_handle, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'); - curl_setopt($curl_handle, CURLOPT_REFERER, $baseUrl); + + curl_setopt_array($curl_handle, array( + CURLOPT_URL => $imageUrl, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_ENCODING => '', + CURLOPT_MAXREDIRS => 10, + CURLOPT_TIMEOUT => 0, + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, + CURLOPT_CUSTOMREQUEST => 'GET', + CURLOPT_HTTPHEADER => array( + 'Accept: image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8', + 'Accept-Language: zh-CN,zh;q=0.9', + 'Cache-Control: no-cache', + 'Connection: keep-alive', + 'Pragma: no-cache', + 'Referer: https://www.tujidao01.com/', + 'Sec-Fetch-Dest: image', + 'Sec-Fetch-Mode: no-cors', + 'Sec-Fetch-Site: cross-site', + 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36', + 'sec-ch-ua: " Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"', + 'sec-ch-ua-mobile: ?0', + 'sec-ch-ua-platform: "macOS"' + ), + )); + $query = curl_exec($curl_handle); $i = 1; while ($query === false) { @@ -237,7 +260,6 @@ class TujiguService usleep($sleepTime); echo $imageUrl; } - dump($page->all()); // exit; $countLimit--; } @@ -246,4 +268,44 @@ class TujiguService } } + public function getContent($url) + { + $curl = curl_init(); + + curl_setopt_array($curl, array( + CURLOPT_URL => "$url", + CURLOPT_RETURNTRANSFER => true, + CURLOPT_ENCODING => '', + CURLOPT_MAXREDIRS => 10, + CURLOPT_TIMEOUT => 10, + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, + CURLOPT_CUSTOMREQUEST => 'GET', + CURLOPT_HTTPHEADER => array( + 'authority: www.tujidao01.com', + 'accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', + 'accept-language: zh-CN,zh;q=0.9', + 'cache-control: max-age=0', + 'cookie: PHPSESSID=ndm118vli42e1db7dfhqmvgjo7; __51vcke__Je64MI06Q1Neac4F=3d9a0d91-cf15-5bf7-ab90-90734f856aba; __51vuft__Je64MI06Q1Neac4F=1654567556100; uid=315696; name=nicksxs; leixing=0; __51uvsct__Je64MI06Q1Neac4F=2; __vtins__Je64MI06Q1Neac4F=%7B%22sid%22%3A%20%22d5d48e8b-a16e-5451-95f4-e629e6a4ec1b%22%2C%20%22vd%22%3A%205%2C%20%22stt%22%3A%20287951%2C%20%22dr%22%3A%205812%2C%20%22expires%22%3A%201654862307975%2C%20%22ct%22%3A%201654860507975%7D', + 'referer: https://www.tujidao01.com/sousu/?s0=%E6%9D%A8%E6%99%A8%E6%99%A8', + 'sec-ch-ua: " Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"', + 'sec-ch-ua-mobile: ?0', + 'sec-ch-ua-platform: "macOS"', + 'sec-fetch-dest: document', + 'sec-fetch-mode: navigate', + 'sec-fetch-site: same-origin', + 'sec-fetch-user: ?1', + 'upgrade-insecure-requests: 1', + 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36' + ), + )); + + $response = curl_exec($curl); + + + curl_close($curl); + return $response; + + } + }