|
|
@ -14,22 +14,23 @@ class XiurenjiService |
|
|
public $domainUrl = "https://www.xiurenji.vip"; |
|
|
public $domainUrl = "https://www.xiurenji.vip"; |
|
|
public $xiurenRootUrl = "https://www.xiurenji.vip/XiuRen/"; |
|
|
public $xiurenRootUrl = "https://www.xiurenji.vip/XiuRen/"; |
|
|
// public $rootDir = "/Users/shixuesen/Documents/tmp/xiuren/";
|
|
|
// public $rootDir = "/Users/shixuesen/Documents/tmp/xiuren/";
|
|
|
public $rootDir = "/Volumes/Backup/images/xiuren/"; |
|
|
|
|
|
// public $rootDir = "/Volumes/intel660p/image/xiuren/";
|
|
|
|
|
|
|
|
|
// public $rootDir = "/Volumes/Backup/images/xiuren/";
|
|
|
|
|
|
public $rootDir = "/Volumes/intel660p/image/xiuren/"; |
|
|
public $queryInstance; |
|
|
public $queryInstance; |
|
|
|
|
|
public $queryNew; |
|
|
public function __construct() |
|
|
public function __construct() |
|
|
{ |
|
|
{ |
|
|
$this->queryInstance = QueryList::getInstance(); |
|
|
$this->queryInstance = QueryList::getInstance(); |
|
|
|
|
|
$this->queryNew = new QueryList(); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
public function scrapeAlbum() |
|
|
public function scrapeAlbum() |
|
|
{ |
|
|
{ |
|
|
echo "111"; |
|
|
|
|
|
$pageSize = 20; |
|
|
$pageSize = 20; |
|
|
$pageCount = $this->getEncodeHtmlContent("https://www.xiurenji.vip/XiuRen/index.html")->find(".page span")->htmls()->get(0); |
|
|
$pageCount = $this->getEncodeHtmlContent("https://www.xiurenji.vip/XiuRen/index.html")->find(".page span")->htmls()->get(0); |
|
|
print_r($pageCount); |
|
|
print_r($pageCount); |
|
|
if ((int)$pageCount > 0) { |
|
|
if ((int)$pageCount > 0) { |
|
|
$pageCount = 40; |
|
|
|
|
|
|
|
|
$pageCount = 100; |
|
|
for ($i = 0; $i <= ceil($pageCount / $pageSize); $i++) { |
|
|
for ($i = 0; $i <= ceil($pageCount / $pageSize); $i++) { |
|
|
$urlSuffix = ""; |
|
|
$urlSuffix = ""; |
|
|
if ($i == 0) { |
|
|
if ($i == 0) { |
|
|
@ -111,7 +112,16 @@ class XiurenjiService |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
$images = $pageContent->find(".img p img")->getElements(); |
|
|
$images = $pageContent->find(".img p img")->getElements(); |
|
|
// dump($images);exit;
|
|
|
|
|
|
|
|
|
$user = $pageContent->find(".title_pc tr:eq(2) td a:eq(2)")->html(); |
|
|
|
|
|
// $user = $this->queryNew->find(".ina a:eq(-1) b")->html();
|
|
|
|
|
|
// body > div.nr3 > table.title_pc > tbody > tr:nth-child(3) > td > a:nth-child(2)
|
|
|
|
|
|
$user = iconv_gbk_to_uft8($user); |
|
|
|
|
|
dump("user is " . $user); |
|
|
|
|
|
if (str_contains($user, "#") || str_contains($user, "&") || trim($user) == "") { |
|
|
|
|
|
$user = $this->queryNew->find(".ina a:eq(-1) b")->html();; |
|
|
|
|
|
$user = iconv_gbk_to_uft8($user); |
|
|
|
|
|
dump("new user is " . $user); |
|
|
|
|
|
} |
|
|
foreach ($images as $image) { |
|
|
foreach ($images as $image) { |
|
|
usleep(random_int(10, 100) * 100); |
|
|
usleep(random_int(10, 100) * 100); |
|
|
$imageUrl = $image->getAttribute("src"); |
|
|
$imageUrl = $image->getAttribute("src"); |
|
|
@ -124,6 +134,11 @@ class XiurenjiService |
|
|
continue; |
|
|
continue; |
|
|
} |
|
|
} |
|
|
if (file_exists($dir . "/" . $imageNo . "-" . $fileInfo["basename"])) { |
|
|
if (file_exists($dir . "/" . $imageNo . "-" . $fileInfo["basename"])) { |
|
|
|
|
|
rename($dir . "/" . $imageNo . "-" . $fileInfo["basename"], $dir . "/" . trim($user) . "-" . $imageNo . "-" . $fileInfo["basename"]); |
|
|
|
|
|
$imageNo++; |
|
|
|
|
|
continue; |
|
|
|
|
|
} |
|
|
|
|
|
if (file_exists($dir . "/" . trim($user) . "-" . $imageNo . "-" . $fileInfo["basename"])) { |
|
|
$imageNo++; |
|
|
$imageNo++; |
|
|
continue; |
|
|
continue; |
|
|
} |
|
|
} |
|
|
@ -158,7 +173,7 @@ class XiurenjiService |
|
|
break; |
|
|
break; |
|
|
} while($attempts < 100); |
|
|
} while($attempts < 100); |
|
|
if ($content != "") { |
|
|
if ($content != "") { |
|
|
file_put_contents($dir . "/" . $imageNo . "-" . $fileInfo["basename"], $content); |
|
|
|
|
|
|
|
|
file_put_contents($dir . "/" . trim($user) . "-" . $imageNo . "-" . $fileInfo["basename"], $content); |
|
|
} else { |
|
|
} else { |
|
|
Log::error("image content is empty ". $trueImageUrl); |
|
|
Log::error("image content is empty ". $trueImageUrl); |
|
|
} |
|
|
} |
|
|
@ -185,7 +200,7 @@ class XiurenjiService |
|
|
curl_setopt_array($curl, array( |
|
|
curl_setopt_array($curl, array( |
|
|
CURLOPT_URL => $url, |
|
|
CURLOPT_URL => $url, |
|
|
CURLOPT_RETURNTRANSFER => true, |
|
|
CURLOPT_RETURNTRANSFER => true, |
|
|
CURLOPT_ENCODING => '', |
|
|
|
|
|
|
|
|
CURLOPT_ENCODING => 'UTF-8', |
|
|
CURLOPT_MAXREDIRS => 10, |
|
|
CURLOPT_MAXREDIRS => 10, |
|
|
CURLOPT_TIMEOUT => 0, |
|
|
CURLOPT_TIMEOUT => 0, |
|
|
CURLOPT_FOLLOWLOCATION => true, |
|
|
CURLOPT_FOLLOWLOCATION => true, |
|
|
@ -219,7 +234,10 @@ class XiurenjiService |
|
|
curl_close($curl); |
|
|
curl_close($curl); |
|
|
|
|
|
|
|
|
// echo $response;
|
|
|
// echo $response;
|
|
|
$html = iconv('gb2312','UTF-8//IGNORE', $response); |
|
|
|
|
|
|
|
|
// $html = iconv('gb2312','UTF-8//IGNORE', $response);
|
|
|
|
|
|
$html = iconv_gbk_to_uft8($response); |
|
|
|
|
|
$this->queryNew->setHtml($response); |
|
|
|
|
|
// $html = $response;
|
|
|
} catch (Exception $e) { |
|
|
} catch (Exception $e) { |
|
|
echo $e->getMessage(). "\n"; |
|
|
echo $e->getMessage(). "\n"; |
|
|
echo $e->getTraceAsString() . "\n"; |
|
|
echo $e->getTraceAsString() . "\n"; |
|
|
|