queryInstance = QueryList::getInstance(); } /** * @return QueryList|null */ public function getQueryInstance(): ?QueryList { return $this->queryInstance; } public function scrapeAllAlbum($rootUrl) { $content = $this->getQueryInstance()->get($rootUrl); $rules = [ // 采集文章标题 'title' => ['.thumbnail', 'title'], // 采集链接 'link' => ['.thumbnail', 'href'], // 采集缩略图 'totalNum' => ['body > section > div.content-wrap > div > div.pagination.pagination-multi > ul > li:nth-child(8) > span', 'text'], ]; // $links = $content->find("a.thumbnail")->attrs("href"); $range = ".pb article"; $list = $content->rules($rules)->range($range)->query()->getData(); dump($list->all()); $parsedList = array_merge([], $list->all()); $totalPageNumMatchedItems = $content->find("body > section > div.content-wrap > div > div.pagination.pagination-multi > ul > li:nth-child(8) > span")->texts(); $totalPageNumStr = Arr::get($totalPageNumMatchedItems->all(), 0); preg_match("#\d+#", $totalPageNumStr, $totalPageNumPregMatchedItems); $totalPageNum = $totalPageNumPregMatchedItems[0]; // $totalPageNum = body > section > div.content-wrap > div > div.pagination.pagination-multi > ul > li:nth-child(8) > span // $links->dump(); for ($i = 2; $i < $totalPageNum; $i++) { break; $pageUrl = $rootUrl . "/page/" . $i; $content = $this->getQueryInstance()->get($pageUrl); $parsedList[] = $content->rules($rules)->range($range)->query()->getData()->all(); CommonUtils::randomSleep(); break; } dump($parsedList); foreach ($parsedList as $item) { $this->processAlbum($item); exit; } } public function processAlbum($item) { $content = $this->getQueryInstance()->get($item["link"]); // 单个相册里的页码链接 $rules = [ "link" => ["", "href"] ]; $range = ".article-content > div:nth-child(2) a"; $items = $content->rules($rules)->range($range)->query()->getData(); // $items = $content->find(".article-content > div:nth-child(2) a")->attrs("href"); dump("album link list", $items->all()); $parsedItems = array_slice($items->all(), 0, count($items->all()) - 1); $imageRules = [ "src" => ["", "src"] ]; $imageRange = "body > section > div.content-wrap > div > article > p:nth-child(4) img"; $imageItems = $content->rules($imageRules)->range($imageRange)->query()->getData(); $parsedImageItems = $imageItems->all(); // dump($items->all());exit; foreach ($parsedItems as $parsedItem) { break; $content = $this->getQueryInstance()->get($parsedItem["link"]); $imageItems = $content->rules($imageRules)->range($imageRange)->query()->getData(); $parsedImageItems = array_merge($parsedImageItems, $imageItems->all()); break; } $this->processSingleAlbumPage($parsedImageItems, $item["title"]); } public function processSingleAlbumPage($items, $title) { CommonService::mergeImage($this->comicRootPath . "/" . $title, array_slice($items, 0, 50));exit; foreach ($items as $item) { CommonService::downloadImage($this->comicRootPath . "/" . $title, $item["src"]); } dump(array_slice($items, 0, 10)); } public function processImages($item) { } public function processHomeBusinessPeople($url) { $content = $this->getQueryInstance()->get($url); $imageRules = [ "src" => ["", "data-src"] ]; $imageRange = "#comicdetail > div:nth-child(7) img"; $items = $content->rules($imageRules)->range($imageRange)->query()->getData(); dump($items->all()); foreach ($items->all() as $item) { if ($item["src"] != null) { CommonService::downloadImage($this->comicRootPath . "/" . "【周二连载】继母的朋友们(作者:Red-A&頸枕) 第1~48话/", $item["src"]); } usleep(1000); } } }