|
|
@ -3,11 +3,14 @@ |
|
|
namespace App\Services; |
|
|
namespace App\Services; |
|
|
|
|
|
|
|
|
use App\Utils\CommonUtils; |
|
|
use App\Utils\CommonUtils; |
|
|
|
|
|
use Illuminate\Support\Arr; |
|
|
use QL\QueryList; |
|
|
use QL\QueryList; |
|
|
|
|
|
|
|
|
class ComicsService { |
|
|
|
|
|
|
|
|
class ComicsService |
|
|
|
|
|
{ |
|
|
|
|
|
|
|
|
private $queryInstance; |
|
|
private $queryInstance; |
|
|
|
|
|
|
|
|
public function __construct() |
|
|
public function __construct() |
|
|
{ |
|
|
{ |
|
|
$this->queryInstance = QueryList::getInstance(); |
|
|
$this->queryInstance = QueryList::getInstance(); |
|
|
@ -25,18 +28,50 @@ class ComicsService { |
|
|
{ |
|
|
{ |
|
|
$content = $this->getQueryInstance()->get($rootUrl); |
|
|
$content = $this->getQueryInstance()->get($rootUrl); |
|
|
$rules = [ |
|
|
$rules = [ |
|
|
// 采集文章标题
|
|
|
|
|
|
'title' => ['.thumbnail','title'], |
|
|
|
|
|
// 采集链接
|
|
|
|
|
|
'link' => ['.thumbnail','href'], |
|
|
|
|
|
// 采集缩略图
|
|
|
|
|
|
'totalNum' => ['body > section > div.content-wrap > div > div.pagination.pagination-multi > ul > li:nth-child(8) > span','text'], |
|
|
|
|
|
]; |
|
|
|
|
|
|
|
|
// 采集文章标题
|
|
|
|
|
|
'title' => ['.thumbnail', 'title'], |
|
|
|
|
|
// 采集链接
|
|
|
|
|
|
'link' => ['.thumbnail', 'href'], |
|
|
|
|
|
// 采集缩略图
|
|
|
|
|
|
'totalNum' => ['body > section > div.content-wrap > div > div.pagination.pagination-multi > ul > li:nth-child(8) > span', 'text'], |
|
|
|
|
|
]; |
|
|
// $links = $content->find("a.thumbnail")->attrs("href");
|
|
|
// $links = $content->find("a.thumbnail")->attrs("href");
|
|
|
$range = ".pb article"; |
|
|
$range = ".pb article"; |
|
|
$list = $content->rules($rules)->range($range)->query()->getData();; |
|
|
|
|
|
|
|
|
$list = $content->rules($rules)->range($range)->query()->getData(); |
|
|
dump($list->all()); |
|
|
dump($list->all()); |
|
|
|
|
|
$parsedList = array_merge([], $list->all()); |
|
|
|
|
|
$totalPageNumMatchedItems = $content->find("body > section > div.content-wrap > div > div.pagination.pagination-multi > ul > li:nth-child(8) > span")->texts(); |
|
|
|
|
|
$totalPageNumStr = Arr::get($totalPageNumMatchedItems->all(), 0); |
|
|
|
|
|
preg_match("#\d+#", $totalPageNumStr, $totalPageNumPregMatchedItems); |
|
|
|
|
|
$totalPageNum = $totalPageNumPregMatchedItems[0]; |
|
|
// $totalPageNum = body > section > div.content-wrap > div > div.pagination.pagination-multi > ul > li:nth-child(8) > span
|
|
|
// $totalPageNum = body > section > div.content-wrap > div > div.pagination.pagination-multi > ul > li:nth-child(8) > span
|
|
|
// $links->dump();
|
|
|
// $links->dump();
|
|
|
|
|
|
|
|
|
|
|
|
for ($i = 2; $i < $totalPageNum; $i++) { |
|
|
|
|
|
break; |
|
|
|
|
|
$pageUrl = $rootUrl . "/page/" . $i; |
|
|
|
|
|
$content = $this->getQueryInstance()->get($pageUrl); |
|
|
|
|
|
$parsedList[] = $content->rules($rules)->range($range)->query()->getData()->all(); |
|
|
|
|
|
CommonUtils::randomSleep(); |
|
|
|
|
|
break; |
|
|
|
|
|
} |
|
|
|
|
|
dump($parsedList); |
|
|
|
|
|
foreach ($parsedList as $item) { |
|
|
|
|
|
$this->processAlbum($item); |
|
|
|
|
|
exit; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
public function processAlbum($item) |
|
|
|
|
|
{ |
|
|
|
|
|
$content = $this->getQueryInstance()->get($item["link"]); |
|
|
|
|
|
$rules = [ |
|
|
|
|
|
"link" => [".post-page-numbers", "href"] |
|
|
|
|
|
]; |
|
|
|
|
|
$range = ".article-content > div:nth-child(1) a"; |
|
|
|
|
|
// $list = $content->rules($rules)->range($range)->query()->getData();
|
|
|
|
|
|
$list = $content->find(".article-content > div:nth-child(1) a")->attrs("href"); |
|
|
|
|
|
dump($list->all()); |
|
|
|
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
} |