You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

113 lines
4.3 KiB

<?php
namespace App\Services;
use function PHPSTORM_META\type;
use QL\QueryList;
use Illuminate\Http\File;
use Illuminate\Support\Facades\Storage;
class XiuGirlsService
{
public function scrapeXiuGirls()
{
$baseUrl = "http://www.xiugirls.com/girl/22162";
$albumSelector = ".entryAblum > .star-mod-bd > ul > li > a";
$baseQl = QueryList::get($baseUrl);
$ql = $baseQl->find($albumSelector)->attrs("href");
// new dir ----
/**
$titles = $baseQl->find($albumSelector)->attrs("*");
// print_r($titles->all());exit;
$baseDir = "/Users/shixuesen/Documents/xiugirl/";
foreach ($titles->all() as $item) {
if (file_exists($baseDir .explode("/", $item['href'])[2])) {
echo "old name :".$baseDir .explode("/", $item['href'])[2] . "\n";
rename($baseDir.explode("/", $item['href'])[2], $baseDir.explode("/", $item['href'])[2] ."-".$item['title']);
}
}
exit;
*/
// new dir ----
$items = $ql->all();
$items = array_slice($items, 49);
// print_r($items);exit;
$baseUrl = "http://www.xiugirls.com";
$baseDir = "/Users/shixuesen/Documents/xiugirl/";
// $items = array_slice($items, 1);
// print_r($items);exit;
foreach ($items as $item) {
print_r($item);
$html = QueryList::get($baseUrl. $item);
$title = $html->find("h1 > a")->texts();
print_r($title);
// print_r(is_dir($baseDir));exit;
$albumPath = "";
// if (count($title) >0 ) {
// if (!file_exists($baseDir .$title[0])) {
// mkdir($baseDir .$title[0]);
// }
// $albumPath = $baseDir.$title[0];
// } else {
if (!file_exists($baseDir .explode("/", $item)[2])) {
mkdir($baseDir .explode("/", $item)[2]);
}
$albumPath = $baseDir.explode("/", $item)[2];
// }
$images = QueryList::get($baseUrl . $item)->find(".swi-hd > img")->attrs("src");
// mkdir("")
// print_r($images);
foreach ($images as $image) {
if (file_exists($albumPath."/".pathinfo("http:".$image)['filename'].".jpg")) {
continue;
// unlink($albumPath."/".pathinfo("http:".$image)['filename'].".jpg");
}
$opts = array('http'=> ['header' =>
"User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36 \r\n
Referer:".$baseUrl.$item."\r\n"
]);
$curl_handle=curl_init();
curl_setopt($curl_handle, CURLOPT_URL,"http:".$image);
curl_setopt($curl_handle, CURLOPT_CONNECTTIMEOUT, 2000);
curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl_handle, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36');
curl_setopt($curl_handle, CURLOPT_REFERER, $baseUrl . $item);
$query = curl_exec($curl_handle);
$i = 1;
while ($query === false) {
echo 'Curl error: ' . curl_error($curl_handle) ."\n";
echo "retry times: " .$i++ ." times \n";
sleep(1);
$query = curl_exec($curl_handle);
if ($i >= 100) {
break;
}
}
echo curl_error($curl_handle);
$fp = fopen($albumPath."/".pathinfo("http:".$image)['filename'].".jpg", 'x');
fwrite($fp, $query);
fclose($fp);
// file_put_contents("1.jpg", $query);
// curl_close($curl_handle);
// exit;
// $context = stream_context_create($opts);
// $a = file_get_contents("http:".$image, false, $context);exit;
// Storage::put($item.pathinfo($image)["filename"], file_get_contents("http:".$image, false, $context));
}
// exit;
}
}
}