<?php
|
|
|
|
|
|
namespace App\Services;
|
|
|
|
|
|
use QL\QueryList;
|
|
|
|
class MM24Service
|
|
{
|
|
|
|
public function getAlbum()
|
|
{
|
|
for ($i = 1; $i < 20; $i++) {
|
|
if ($i == 1) {
|
|
$baseUrl = "https://www.mm24.cc/taotu";
|
|
} else {
|
|
$baseUrl = "https://www.mm24.cc/taotu/1-{$i}.html";
|
|
}
|
|
$baseQl = QueryList::get($baseUrl);
|
|
$albumUrls = $baseQl->find("body > div.main > div.boxs > ul > li > a")->attrs("href");
|
|
foreach ($albumUrls as $albumUrl) {
|
|
dump($albumUrl);
|
|
$this->getImage("https://www.mm24.cc" . substr($albumUrl, 0, strlen($albumUrl) - 5));
|
|
}
|
|
}
|
|
//$baseUrl = "https://www.mm24.cc/taotu/";
|
|
|
|
}
|
|
public function getImage($albumUrl)
|
|
{
|
|
$baseDir = "/Volumes/intel660p/image/bl/";
|
|
$baseUrl = "https://www.mm24.cc/";
|
|
for ($i = 1; $i < 100; $i++) {
|
|
dump("new loop start");
|
|
if ($i == 1) {
|
|
$ablumn = $albumUrl.".html";
|
|
} else {
|
|
$ablumn = $albumUrl . "_" .$i. ".html";
|
|
}
|
|
try {
|
|
$baseQl = QueryList::get($ablumn);
|
|
} catch (\Exception $e) {
|
|
// echo json_encode($e->getMessage());
|
|
return;
|
|
}
|
|
if ($i == 1) {
|
|
$title = $baseQl->find("body > div.width > div.weizhi > h1")->htmls()[0];
|
|
}
|
|
$img = $baseQl->find("#img_view > img")->attr("src");
|
|
$albumPath = $baseDir . "/" .$title;
|
|
if (!file_exists($albumPath)) {
|
|
mkdir($albumPath);
|
|
}
|
|
if (file_exists($albumPath."/".pathinfo($img)['filename'].".jpg")) {
|
|
continue;
|
|
}
|
|
$opts = array('http'=> ['header' =>
|
|
"User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36 \r\n
|
|
Referer:".$baseUrl."\r\n"
|
|
]);
|
|
dump("start send curl request");
|
|
$curl_handle=curl_init();
|
|
curl_setopt($curl_handle, CURLOPT_URL,$img);
|
|
curl_setopt($curl_handle, CURLOPT_CONNECTTIMEOUT, 20);
|
|
curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, 1);
|
|
curl_setopt($curl_handle, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36');
|
|
curl_setopt($curl_handle, CURLOPT_REFERER, $baseUrl);
|
|
$query = curl_exec($curl_handle);
|
|
dump(" exec curl request");
|
|
|
|
$j = 1;
|
|
while ($query === false) {
|
|
echo 'Curl error: ' . curl_error($curl_handle) ."\n";
|
|
echo "retry times: " .$j++ ." times \n";
|
|
sleep(1);
|
|
$sleepTime = 1000 * random_int(1000, 10000);
|
|
echo "retry sleep {$sleepTime} nano second \n";
|
|
usleep($sleepTime);
|
|
$query = curl_exec($curl_handle);
|
|
if ($j >= 100) {
|
|
break;
|
|
}
|
|
}
|
|
dump("after fail curl request");
|
|
|
|
echo curl_error($curl_handle);
|
|
$fp = fopen($albumPath."/".pathinfo($img)['filename'].".jpg", 'x');
|
|
fwrite($fp, $query);
|
|
fclose($fp);
|
|
$sleepTime = 1000 * random_int(100, 1000);
|
|
echo "after write image sleep {$sleepTime} nano second \n";
|
|
usleep($sleepTime);
|
|
}
|
|
|
|
|
|
}
|
|
}
|