<?php
|
|
|
|
|
|
namespace App\Services;
|
|
|
|
|
|
use function PHPSTORM_META\type;
|
|
use QL\QueryList;
|
|
use Illuminate\Http\File;
|
|
use Illuminate\Support\Facades\Storage;
|
|
|
|
class XiuGirlsService
|
|
{
|
|
public function scrapeXiuGirls()
|
|
{
|
|
$baseUrl = "http://www.xiugirls.com/girl/22162";
|
|
$albumSelector = ".entryAblum > .star-mod-bd > ul > li > a";
|
|
$baseQl = QueryList::get($baseUrl);
|
|
$ql = $baseQl->find($albumSelector)->attrs("href");
|
|
|
|
// new dir ----
|
|
/**
|
|
$titles = $baseQl->find($albumSelector)->attrs("*");
|
|
// print_r($titles->all());exit;
|
|
$baseDir = "/Users/shixuesen/Documents/xiugirl/";
|
|
|
|
foreach ($titles->all() as $item) {
|
|
if (file_exists($baseDir .explode("/", $item['href'])[2])) {
|
|
echo "old name :".$baseDir .explode("/", $item['href'])[2] . "\n";
|
|
rename($baseDir.explode("/", $item['href'])[2], $baseDir.explode("/", $item['href'])[2] ."-".$item['title']);
|
|
}
|
|
}
|
|
exit;
|
|
*/
|
|
// new dir ----
|
|
|
|
|
|
$items = $ql->all();
|
|
$items = array_slice($items, 49);
|
|
// print_r($items);exit;
|
|
|
|
$baseUrl = "http://www.xiugirls.com";
|
|
$baseDir = "/Users/shixuesen/Documents/xiugirl/";
|
|
// $items = array_slice($items, 1);
|
|
// print_r($items);exit;
|
|
foreach ($items as $item) {
|
|
|
|
print_r($item);
|
|
$html = QueryList::get($baseUrl. $item);
|
|
$title = $html->find("h1 > a")->texts();
|
|
print_r($title);
|
|
// print_r(is_dir($baseDir));exit;
|
|
$albumPath = "";
|
|
// if (count($title) >0 ) {
|
|
// if (!file_exists($baseDir .$title[0])) {
|
|
// mkdir($baseDir .$title[0]);
|
|
// }
|
|
// $albumPath = $baseDir.$title[0];
|
|
// } else {
|
|
if (!file_exists($baseDir .explode("/", $item)[2])) {
|
|
mkdir($baseDir .explode("/", $item)[2]);
|
|
}
|
|
$albumPath = $baseDir.explode("/", $item)[2];
|
|
// }
|
|
$images = QueryList::get($baseUrl . $item)->find(".swi-hd > img")->attrs("src");
|
|
// mkdir("")
|
|
// print_r($images);
|
|
foreach ($images as $image) {
|
|
if (file_exists($albumPath."/".pathinfo("http:".$image)['filename'].".jpg")) {
|
|
continue;
|
|
// unlink($albumPath."/".pathinfo("http:".$image)['filename'].".jpg");
|
|
}
|
|
$opts = array('http'=> ['header' =>
|
|
"User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36 \r\n
|
|
Referer:".$baseUrl.$item."\r\n"
|
|
]);
|
|
$curl_handle=curl_init();
|
|
curl_setopt($curl_handle, CURLOPT_URL,"http:".$image);
|
|
curl_setopt($curl_handle, CURLOPT_CONNECTTIMEOUT, 2000);
|
|
curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, 1);
|
|
curl_setopt($curl_handle, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36');
|
|
curl_setopt($curl_handle, CURLOPT_REFERER, $baseUrl . $item);
|
|
$query = curl_exec($curl_handle);
|
|
$i = 1;
|
|
while ($query === false) {
|
|
echo 'Curl error: ' . curl_error($curl_handle) ."\n";
|
|
echo "retry times: " .$i++ ." times \n";
|
|
sleep(1);
|
|
$query = curl_exec($curl_handle);
|
|
if ($i >= 100) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
echo curl_error($curl_handle);
|
|
$fp = fopen($albumPath."/".pathinfo("http:".$image)['filename'].".jpg", 'x');
|
|
fwrite($fp, $query);
|
|
fclose($fp);
|
|
// file_put_contents("1.jpg", $query);
|
|
// curl_close($curl_handle);
|
|
// exit;
|
|
|
|
// $context = stream_context_create($opts);
|
|
|
|
// $a = file_get_contents("http:".$image, false, $context);exit;
|
|
// Storage::put($item.pathinfo($image)["filename"], file_get_contents("http:".$image, false, $context));
|
|
|
|
}
|
|
|
|
// exit;
|
|
}
|
|
}
|
|
}
|