queryInstance = QueryList::getInstance(); } /** * @return QueryList|null */ public function getQueryInstance(): ?QueryList { return $this->queryInstance; } public function scrapeAllAlbum($rootUrl) { $content = $this->getQueryInstance()->get($rootUrl); $rules = [ // 采集文章标题 'title' => ['.thumbnail', 'title'], // 采集链接 'link' => ['.thumbnail', 'href'], // 采集缩略图 'totalNum' => ['body > section > div.content-wrap > div > div.pagination.pagination-multi > ul > li:nth-child(8) > span', 'text'], ]; // $links = $content->find("a.thumbnail")->attrs("href"); $range = ".pb article"; $list = $content->rules($rules)->range($range)->query()->getData(); dump($list->all()); $parsedList = array_merge([], $list->all()); $totalPageNumMatchedItems = $content->find("body > section > div.content-wrap > div > div.pagination.pagination-multi > ul > li:nth-child(8) > span")->texts(); $totalPageNumStr = Arr::get($totalPageNumMatchedItems->all(), 0); preg_match("#\d+#", $totalPageNumStr, $totalPageNumPregMatchedItems); $totalPageNum = $totalPageNumPregMatchedItems[0]; // $totalPageNum = body > section > div.content-wrap > div > div.pagination.pagination-multi > ul > li:nth-child(8) > span // $links->dump(); for ($i = 2; $i < $totalPageNum; $i++) { break; $pageUrl = $rootUrl . "/page/" . $i; $content = $this->getQueryInstance()->get($pageUrl); $parsedList[] = $content->rules($rules)->range($range)->query()->getData()->all(); CommonUtils::randomSleep(); break; } dump($parsedList); foreach ($parsedList as $item) { $this->processAlbum($item); exit; } } public function processAlbum($item) { $content = $this->getQueryInstance()->get($item["link"]); $rules = [ "link" => [".post-page-numbers", "href"] ]; $range = ".article-content > div:nth-child(1) a"; // $list = $content->rules($rules)->range($range)->query()->getData(); $list = $content->find(".article-content > div:nth-child(1) a")->attrs("href"); dump($list->all()); } }