<?php
|
|
|
|
namespace App\Services;
|
|
|
|
use App\Utils\CommonUtils;
|
|
use Illuminate\Support\Arr;
|
|
use QL\QueryList;
|
|
|
|
class ComicsService
|
|
{
|
|
|
|
private $queryInstance;
|
|
|
|
// private $comicRootPath = "/Volumes/Backup/HuaVid/全彩汉化韩漫第01弹";
|
|
|
|
private $comicRootPath = "/Users/shixuesen/Documents/fixnew";
|
|
|
|
public function __construct()
|
|
{
|
|
$this->queryInstance = QueryList::getInstance();
|
|
}
|
|
|
|
/**
|
|
* @return QueryList|null
|
|
*/
|
|
public function getQueryInstance(): ?QueryList
|
|
{
|
|
return $this->queryInstance;
|
|
}
|
|
|
|
public function scrapeAllAlbum($rootUrl)
|
|
{
|
|
$content = $this->getQueryInstance()->get($rootUrl);
|
|
$rules = [
|
|
// 采集文章标题
|
|
'title' => ['.thumbnail', 'title'],
|
|
// 采集链接
|
|
'link' => ['.thumbnail', 'href'],
|
|
// 采集缩略图
|
|
'totalNum' => ['body > section > div.content-wrap > div > div.pagination.pagination-multi > ul > li:nth-child(8) > span', 'text'],
|
|
];
|
|
// $links = $content->find("a.thumbnail")->attrs("href");
|
|
$range = ".pb article";
|
|
$list = $content->rules($rules)->range($range)->query()->getData();
|
|
dump($list->all());
|
|
$parsedList = array_merge([], $list->all());
|
|
$totalPageNumMatchedItems = $content->find("body > section > div.content-wrap > div > div.pagination.pagination-multi > ul > li:nth-child(8) > span")->texts();
|
|
$totalPageNumStr = Arr::get($totalPageNumMatchedItems->all(), 0);
|
|
preg_match("#\d+#", $totalPageNumStr, $totalPageNumPregMatchedItems);
|
|
$totalPageNum = $totalPageNumPregMatchedItems[0];
|
|
// $totalPageNum = body > section > div.content-wrap > div > div.pagination.pagination-multi > ul > li:nth-child(8) > span
|
|
// $links->dump();
|
|
|
|
for ($i = 2; $i < $totalPageNum; $i++) {
|
|
break;
|
|
$pageUrl = $rootUrl . "/page/" . $i;
|
|
$content = $this->getQueryInstance()->get($pageUrl);
|
|
$parsedList[] = $content->rules($rules)->range($range)->query()->getData()->all();
|
|
CommonUtils::randomSleep();
|
|
break;
|
|
}
|
|
dump($parsedList);
|
|
foreach ($parsedList as $item) {
|
|
$this->processAlbum($item);
|
|
exit;
|
|
}
|
|
}
|
|
|
|
public function processAlbum($item)
|
|
{
|
|
$content = $this->getQueryInstance()->get($item["link"]);
|
|
// 单个相册里的页码链接
|
|
$rules = [
|
|
"link" => ["", "href"]
|
|
];
|
|
$range = ".article-content > div:nth-child(2) a";
|
|
$items = $content->rules($rules)->range($range)->query()->getData();
|
|
// $items = $content->find(".article-content > div:nth-child(2) a")->attrs("href");
|
|
dump("album link list", $items->all());
|
|
$parsedItems = array_slice($items->all(), 0, count($items->all()) - 1);
|
|
$imageRules = [
|
|
"src" => ["", "src"]
|
|
];
|
|
$imageRange = "body > section > div.content-wrap > div > article > p:nth-child(4) img";
|
|
$imageItems = $content->rules($imageRules)->range($imageRange)->query()->getData();
|
|
$parsedImageItems = $imageItems->all();
|
|
|
|
// dump($items->all());exit;
|
|
foreach ($parsedItems as $parsedItem) {
|
|
break;
|
|
$content = $this->getQueryInstance()->get($parsedItem["link"]);
|
|
$imageItems = $content->rules($imageRules)->range($imageRange)->query()->getData();
|
|
$parsedImageItems = array_merge($parsedImageItems, $imageItems->all());
|
|
break;
|
|
}
|
|
$this->processSingleAlbumPage($parsedImageItems, $item["title"]);
|
|
}
|
|
|
|
public function processSingleAlbumPage($items, $title)
|
|
{
|
|
CommonService::mergeImage($this->comicRootPath . "/" . $title, array_slice($items, 0, 50));exit;
|
|
foreach ($items as $item) {
|
|
CommonService::downloadImage($this->comicRootPath . "/" . $title, $item["src"]);
|
|
}
|
|
dump(array_slice($items, 0, 10));
|
|
}
|
|
|
|
public function processImages($item)
|
|
{
|
|
|
|
}
|
|
|
|
public function processHomeBusinessPeople($url)
|
|
{
|
|
$content = $this->getQueryInstance()->get($url);
|
|
$imageRules = [
|
|
"src" => ["", "data-src"]
|
|
];
|
|
$imageRange = "#comicdetail > div:nth-child(7) img";
|
|
$items = $content->rules($imageRules)->range($imageRange)->query()->getData();
|
|
dump($items->all());
|
|
foreach ($items->all() as $item) {
|
|
if ($item["src"] != null) {
|
|
CommonService::downloadImage($this->comicRootPath . "/" . "【周二连载】继母的朋友们(作者:Red-A&頸枕) 第1~48话/", $item["src"]);
|
|
}
|
|
usleep(1000);
|
|
}
|
|
|
|
|
|
}
|
|
}
|