[ "dir" => "ygh", "code" => 15902, "name" => "原干惠" ], "ycc" => [ "dir" => "ycc", "code" => 22162, "name" => "杨晨晨" ], "wyc" => [ "dir" => "wyc", "code" => 19702, "name" => "王语纯" ], "zz" => [ "dir" => "zz", "code" => 22899, "name" => "芝芝 booty" ], "hlr" => [ "dir" => "hlr", "code" => 20015, "name" => "黄乐然" ], "jrq" => [ "dir" => "jrq", "code" => 26560, "name" => "姜仁卿" ] ]; public function scrapeXiuGirls() { // 15902 原干惠 // 22162 杨晨晨 // 19702 王语纯 // 22899 芝芝 booty // 20015 黄乐然 // 26560 姜仁卿 print_r(self::$name_dir);exit; foreach (self::$name_dir as $item) { } $baseUrl = "https://www.xsnvshen.com/girl/22162"; $albumSelector = ".entryAblum > .star-mod-bd > ul > li > a"; $baseQl = QueryList::get($baseUrl); $ql = $baseQl->find($albumSelector)->attrs("href"); // /* // new dir ---- $titles = $baseQl->find($albumSelector)->attrs("*"); // print_r($titles->all());exit; $baseDir = "/Users/shixuesen/Documents/xg/ycc/"; foreach ($titles->all() as $item) { if (file_exists($baseDir .explode("/", $item['href'])[2])) { echo "old name :".$baseDir .explode("/", $item['href'])[2] . "\n"; rename($baseDir.explode("/", $item['href'])[2], $baseDir.explode("/", $item['href'])[2] ."-".$item['title']); } } exit; // new dir ---- // */ $items = $ql->all(); // $items = array_slice($items, 49); // print_r($items);exit; $baseUrl = "https://www.xsnvshen.com"; $baseDir = "/Users/shixuesen/Documents/xg/ycc/"; // $items = array_slice($items, 1); // print_r($items);exit; foreach ($items as $item) { print_r($item); $html = QueryList::get($baseUrl. $item); $title = $html->find("h1 > a")->texts(); print_r($title); // print_r(is_dir($baseDir));exit; $albumPath = ""; // if (count($title) >0 ) { // if (!file_exists($baseDir .$title[0])) { // mkdir($baseDir .$title[0]); // } // $albumPath = $baseDir.$title[0]; // } else { if (!file_exists($baseDir .explode("/", $item)[2])) { mkdir($baseDir .explode("/", $item)[2]); } $albumPath = $baseDir.explode("/", $item)[2]; // } $images = QueryList::get($baseUrl . $item)->find(".swi-hd > img")->attrs("src"); // mkdir("") // print_r($images); foreach ($images as $image) { if (file_exists($albumPath."/".pathinfo("http:".$image)['filename'].".jpg")) { continue; // unlink($albumPath."/".pathinfo("http:".$image)['filename'].".jpg"); } $opts = array('http'=> ['header' => "User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36 \r\n Referer:".$baseUrl.$item."\r\n" ]); $curl_handle=curl_init(); curl_setopt($curl_handle, CURLOPT_URL,"https:".$image); curl_setopt($curl_handle, CURLOPT_CONNECTTIMEOUT, 2000); curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, 1); curl_setopt($curl_handle, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'); curl_setopt($curl_handle, CURLOPT_REFERER, $baseUrl . $item); $query = curl_exec($curl_handle); $i = 1; while ($query === false) { echo 'Curl error: ' . curl_error($curl_handle) ."\n"; echo "retry times: " .$i++ ." times \n"; sleep(1); $query = curl_exec($curl_handle); if ($i >= 100) { break; } } echo curl_error($curl_handle); $fp = fopen($albumPath."/".pathinfo("http:".$image)['filename'].".jpg", 'x'); fwrite($fp, $query); fclose($fp); // file_put_contents("1.jpg", $query); // curl_close($curl_handle); // exit; // $context = stream_context_create($opts); // $a = file_get_contents("http:".$image, false, $context);exit; // Storage::put($item.pathinfo($image)["filename"], file_get_contents("http:".$image, false, $context)); } // exit; } } }