|
|
@ -12,70 +12,77 @@ class TujiguService |
|
|
private static $name_dir = [ |
|
|
private static $name_dir = [ |
|
|
// "周韦彤" => [
|
|
|
// "周韦彤" => [
|
|
|
// "dir" => "周韦彤",
|
|
|
// "dir" => "周韦彤",
|
|
|
// "code" => 16274,
|
|
|
|
|
|
|
|
|
// "code" => 1456,
|
|
|
// "name" => "周韦彤"
|
|
|
// "name" => "周韦彤"
|
|
|
// ],
|
|
|
// ],
|
|
|
// "ycc" => [
|
|
|
// "ycc" => [
|
|
|
// "dir" => "ycc",
|
|
|
// "dir" => "ycc",
|
|
|
// "code" => 22162,
|
|
|
|
|
|
|
|
|
// "code" => 459,
|
|
|
// "name" => "杨晨晨"
|
|
|
// "name" => "杨晨晨"
|
|
|
// ],
|
|
|
// ],
|
|
|
// "ry" => [
|
|
|
// "ry" => [
|
|
|
// "dir" => "忍野さら",
|
|
|
// "dir" => "忍野さら",
|
|
|
// "code" => "21250",
|
|
|
|
|
|
|
|
|
// "code" => "1875",
|
|
|
// "name" => "忍野さら"
|
|
|
// "name" => "忍野さら"
|
|
|
// ],
|
|
|
// ],
|
|
|
// "azu" => [
|
|
|
// "azu" => [
|
|
|
// "dir" => "azu",
|
|
|
// "dir" => "azu",
|
|
|
// "code" => 26002,
|
|
|
|
|
|
|
|
|
// "code" => 437,
|
|
|
// "name" => "阿朱"
|
|
|
// "name" => "阿朱"
|
|
|
// ],
|
|
|
// ],
|
|
|
// "xq" => [
|
|
|
// "xq" => [
|
|
|
// "dir" => "xq",
|
|
|
// "dir" => "xq",
|
|
|
// "code" => 22204,
|
|
|
|
|
|
|
|
|
// "code" => 2438,
|
|
|
// "name" => "小琪"
|
|
|
// "name" => "小琪"
|
|
|
// ],
|
|
|
// ],
|
|
|
// "ygh" => [
|
|
|
// "ygh" => [
|
|
|
// "dir" => "ygh",
|
|
|
// "dir" => "ygh",
|
|
|
// "code" => 15902,
|
|
|
|
|
|
|
|
|
// "code" => 550,
|
|
|
// "name" => "原干惠"
|
|
|
// "name" => "原干惠"
|
|
|
// ],
|
|
|
// ],
|
|
|
// "wyc" => [
|
|
|
// "wyc" => [
|
|
|
// "dir" => "wyc",
|
|
|
// "dir" => "wyc",
|
|
|
// "code" => 19702,
|
|
|
|
|
|
|
|
|
// "code" => 293,
|
|
|
// "name" => "王语纯"
|
|
|
// "name" => "王语纯"
|
|
|
// ],
|
|
|
// ],
|
|
|
// "zz" => [
|
|
|
|
|
|
// "dir" => "zz",
|
|
|
|
|
|
// "code" => 22899,
|
|
|
|
|
|
// "name" => "芝芝 booty"
|
|
|
|
|
|
// ],
|
|
|
|
|
|
// "hlr" => [
|
|
|
|
|
|
// "dir" => "hlr",
|
|
|
|
|
|
// "code" => 20015,
|
|
|
|
|
|
// "name" => "黄乐然"
|
|
|
|
|
|
// ],
|
|
|
|
|
|
|
|
|
"zz" => [ |
|
|
|
|
|
"dir" => "zz", |
|
|
|
|
|
"code" => 954, |
|
|
|
|
|
"name" => "芝芝 booty" |
|
|
|
|
|
], |
|
|
|
|
|
"hlr" => [ |
|
|
|
|
|
"dir" => "hlr", |
|
|
|
|
|
"code" => 1289, |
|
|
|
|
|
"name" => "黄乐然" |
|
|
|
|
|
], |
|
|
"jrq" => [ |
|
|
"jrq" => [ |
|
|
"dir" => "jrq", |
|
|
"dir" => "jrq", |
|
|
"code" => 5034, |
|
|
"code" => 5034, |
|
|
"name" => "姜仁卿" |
|
|
"name" => "姜仁卿" |
|
|
], |
|
|
], |
|
|
// "ny" => [
|
|
|
|
|
|
// "dir" => "ny",
|
|
|
|
|
|
// "code" => 26298,
|
|
|
|
|
|
// "name" => "奈月"
|
|
|
|
|
|
// ],
|
|
|
|
|
|
// "杉本有美" => [
|
|
|
|
|
|
// "dir" => "杉本有美",
|
|
|
|
|
|
// "code" => 15939,
|
|
|
|
|
|
// "name" => "杉本有美"
|
|
|
|
|
|
// ]
|
|
|
|
|
|
|
|
|
"ny" => [ |
|
|
|
|
|
"dir" => "ny", |
|
|
|
|
|
"code" => 5301, |
|
|
|
|
|
"name" => "奈月" |
|
|
|
|
|
], |
|
|
|
|
|
"杉本有美" => [ |
|
|
|
|
|
"dir" => "杉本有美", |
|
|
|
|
|
"code" => 632, |
|
|
|
|
|
"name" => "杉本有美" |
|
|
|
|
|
], |
|
|
|
|
|
"糯美子" => [ |
|
|
|
|
|
"dir" => "糯美子", |
|
|
|
|
|
"code" => 161, |
|
|
|
|
|
"name" => "糯美子" |
|
|
|
|
|
] |
|
|
|
|
|
|
|
|
]; |
|
|
]; |
|
|
|
|
|
|
|
|
public function scrapeTujiguGirls() |
|
|
public function scrapeTujiguGirls() |
|
|
{ |
|
|
{ |
|
|
foreach (self::$name_dir as $username => $name) { |
|
|
foreach (self::$name_dir as $username => $name) { |
|
|
|
|
|
// 重置下这个 map,不同网站,不同人可能会有相册 id 重复的情况
|
|
|
|
|
|
$albumCodeMap = []; |
|
|
// $baseDir = "/Users/shixuesen/Documents/tmp/image/xg/" . $name['dir'] . "/";
|
|
|
// $baseDir = "/Users/shixuesen/Documents/tmp/image/xg/" . $name['dir'] . "/";
|
|
|
$baseDir = "/Volumes/intel660p/image/xg/" . $name["dir"] . "/"; |
|
|
$baseDir = "/Volumes/intel660p/image/xg/" . $name["dir"] . "/"; |
|
|
// 处理两个网站对应图册名不一致,直接取图册 id 作为唯一性判断的 map
|
|
|
// 处理两个网站对应图册名不一致,直接取图册 id 作为唯一性判断的 map
|
|
|
@ -115,28 +122,36 @@ class TujiguService |
|
|
$totalAlbumPage = ceil($totalAlbumNum / 40); |
|
|
$totalAlbumPage = ceil($totalAlbumNum / 40); |
|
|
} |
|
|
} |
|
|
$baseAlbumUrl = "https://www.tujigu.com/t/{$name['code']}/"; |
|
|
$baseAlbumUrl = "https://www.tujigu.com/t/{$name['code']}/"; |
|
|
for ($i = 1; $i <= $totalAlbumPage; $i++) { |
|
|
|
|
|
if ($onlyOnePage || $totalAlbumPage == 1) { |
|
|
|
|
|
|
|
|
for ($i = 1; $i < $totalAlbumPage; $i++) { |
|
|
|
|
|
if ($onlyOnePage || $totalAlbumPage == 1 || $i == 0) { |
|
|
$albumQl = QueryList::get($peopleUrl); |
|
|
$albumQl = QueryList::get($peopleUrl); |
|
|
} else { |
|
|
} else { |
|
|
$albumQl = QueryList::get($baseAlbumUrl . $i . ".html"); |
|
|
|
|
|
|
|
|
$albumQl = QueryList::get($baseAlbumUrl . "index_".$i . ".html"); |
|
|
} |
|
|
} |
|
|
// dump($albumQl->getHtml());
|
|
|
// dump($albumQl->getHtml());
|
|
|
$albumList = $albumQl->find("body > div.hezi > ul > li > a")->attrs("href"); |
|
|
$albumList = $albumQl->find("body > div.hezi > ul > li > a")->attrs("href"); |
|
|
$pageAlbum = $albumList->all(); |
|
|
$pageAlbum = $albumList->all(); |
|
|
dump($pageAlbum); |
|
|
dump($pageAlbum); |
|
|
foreach ($pageAlbum as $album) { |
|
|
foreach ($pageAlbum as $album) { |
|
|
usleep(1000 * random_int(1000, 10000)); |
|
|
|
|
|
|
|
|
dump("current album page no: " . $i); |
|
|
|
|
|
usleep(10000 * random_int(1000, 10000)); |
|
|
dump("相册:", [$album]); |
|
|
dump("相册:", [$album]); |
|
|
$pageQL = QueryList::get( $album); |
|
|
$pageQL = QueryList::get( $album); |
|
|
$page = $pageQL->find("body > div.tuji > p:nth-child(6)")->htmls(); |
|
|
|
|
|
|
|
|
$page = $pageQL->find("body > div.tuji > p:nth-child(5)")->htmls(); |
|
|
|
|
|
$pageAlternative = $pageQL->find("body > div.tuji > p:nth-child(6)")->htmls(); |
|
|
$title = $pageQL->find("body > div.tuji > div.weizhi > h1")->htmls(); |
|
|
$title = $pageQL->find("body > div.tuji > div.weizhi > h1")->htmls(); |
|
|
dump($title->all()); |
|
|
dump($title->all()); |
|
|
$titleStr = $title->all()[0]; |
|
|
$titleStr = $title->all()[0]; |
|
|
preg_match("#\d+#", $page->all()[0], $result); |
|
|
|
|
|
$totalImageNum = $result[0]; |
|
|
|
|
|
|
|
|
$titleStr = preg_replace("#/#", "-", $titleStr); |
|
|
|
|
|
preg_match("#图片数量: (\d+)P#", $page->all()[0], $result); |
|
|
|
|
|
if (count($result) < 2) { |
|
|
|
|
|
preg_match("#图片数量: (\d+)P#", $pageAlternative->all()[0], $result); |
|
|
|
|
|
} |
|
|
|
|
|
$totalImageNum = $result[1]; |
|
|
for ($j = 1; $j <= $totalImageNum; $j++) { |
|
|
for ($j = 1; $j <= $totalImageNum; $j++) { |
|
|
$albumCode = substr($album, 25, 5); |
|
|
|
|
|
|
|
|
// $albumCode = substr($album, 25, 5);
|
|
|
|
|
|
$albumCode = explode("/", $album)[4]; |
|
|
|
|
|
|
|
|
$baseImageUrl = "https://lns.hywly.com/a/1/{$albumCode}/"; |
|
|
$baseImageUrl = "https://lns.hywly.com/a/1/{$albumCode}/"; |
|
|
// if ($j == 0) {
|
|
|
// if ($j == 0) {
|
|
|
// $imageName = $j . ".jpg";
|
|
|
// $imageName = $j . ".jpg";
|
|
|
|