@ -105,7 +105,7 @@ class NewXiuGirlsService
}
// $items = array_slice($items, 1);
// print_r($items);exit;
print_r ( $items ); continue ;
foreach ( $items as $item ) {
usleep ( random_int ( 1000 , 10000 ) * 1000 );
echo " 相册子链接: " . $item . " \n " ;
@ -306,4 +306,142 @@ class NewXiuGirlsService
// exit;
}
}
public function singleAlbum ( $iUser = " ycc " , $albumOuterCode = 21429 )
{
$NUM_OF_ATTEMPTS = 50 ;
// self::$name_dir[1];
print_r ([ self :: $name_dir [ $iUser ]]);
foreach ([ self :: $name_dir [ $iUser ]] as $username => $name ) {
$username = $iUser ;
dump ( " here { $username } , { $name [ 'code' ] } " );
usleep ( random_int ( 1000 , 10000 ) * 1000 );
$peopleUrl = " https://xsnvshen.com/girl/ " ;
$peopleUrl .= $name [ 'code' ];
$albumSelector = " .entryAblum > .star-mod-bd > ul > li > a " ;
$baseQl = QueryList :: get ( $peopleUrl );
$ql = $baseQl -> find ( $albumSelector ) -> attrs ( " href " );
$items = $ql -> all ();
// $items = array_slice($items, 49);
// print_r($items);exit;
$baseUrl = " https://www.xsnvshen.com " ;
// $baseDir = "/Users/shixuesen/Documents/xg/" . $name['dir']. "/";
$baseDir = " /Volumes/intel660p/image/xg/ " . $name [ 'dir' ] . " / " ;
if ( ! file_exists ( $baseDir )) {
mkdir ( $baseDir );
}
// $items = array_slice($items, 1);
print_r ( $items );
$items = [ " /album/21429 " ];
foreach ( $items as $item ) {
print_r ( $item );
if ( $item != " /album/ " . $albumOuterCode ) {
echo $item . " \t " . " /album/ " . $albumOuterCode . " \n " ;
echo " 000000000000000000 " ;
continue ;
}
usleep ( random_int ( 1000 , 10000 ) * 1000 );
echo " 相册子链接: " . $item . " \n " ;
$queryItemUrlTimes = 1 ;
do {
try {
$html = ( new \QL\QueryList ) -> get ( $baseUrl . $item );
} catch ( \Exception $e ) {
\Log :: error ( " 查询相册子链接失败,将重试, 异常信息: " . $e -> getMessage ());
$sleepTime = 1000 * random_int ( 1000 , 10000 );
echo " 查询相册子链接失败 sleep { $sleepTime } nano second \n " ;
usleep ( $sleepTime );
$queryItemUrlTimes ++ ;
}
break ;
} while ( $queryItemUrlTimes < 10 );
$title = $html -> find ( " h1 > a " ) -> texts ();
dump ( $title );
echo " 相册名: " . $title [ 0 ] . " \n " ;
while ( trim ( $title [ 0 ]) == " 古诗文 " ) {
echo " here error happenned \n " ;
// dump($html->getHtml());
usleep ( random_int ( 1 , 1000 ) * 50000 );
$html = QueryList :: get ( $baseUrl . $item );
$title = $html -> find ( " h1 > a " ) -> texts ();
// continue;
}
// break;
$albumPath = " " ;
$albumCode = explode ( " / " , $item )[ 2 ];
if ( ! file_exists ( $baseDir . explode ( " / " , $item )[ 2 ] . " - " . $title [ 0 ])) {
mkdir ( $baseDir . explode ( " / " , $item )[ 2 ] . " - " . $title [ 0 ]);
}
$albumPath = $baseDir . explode ( " / " , $item )[ 2 ] . " - " . $title [ 0 ];
$attempts = 0 ;
$images = [];
do {
try {
$images = QueryList :: get ( $baseUrl . $item ) -> find ( " .swi-hd > img " ) -> attrs ( " src " );
} catch ( \Exception $e ) {
echo $e -> getTraceAsString () . " \n " ;
$sleepTime = 10000 * random_int ( 100 , 1000 );
echo " sleep { $sleepTime } nano second \n " ;
usleep ( $sleepTime );
$attempts ++ ;
continue ;
}
break ;
} while ( $attempts <= $NUM_OF_ATTEMPTS );
foreach ( $images as $image ) {
if ( strpos ( $image , " thumb_600x900 " ) !== false ) {
$image = str_replace ( " thumb_600x900/ " , " " , $image );
}
/**
* todo 修改
* CommonService :: downloadImage ( $albumPath , " https: " . $image );
*/
if ( file_exists ( $albumPath . " / " . pathinfo ( " http: " . $image )[ 'filename' ] . " .jpg " ) || file_exists ( $albumPath . " / " . $username . " - " . $albumCode . " - " . pathinfo ( " http: " . $image )[ 'filename' ] . " .jpg " )) {
continue ;
}
$opts = array ( 'http' => [ 'header' =>
" User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36 \r \n
Referer : " . $baseUrl . $item . " \r\n "
]);
$curl_handle = curl_init ();
curl_setopt ( $curl_handle , CURLOPT_URL , " https: " . $image );
curl_setopt ( $curl_handle , CURLOPT_CONNECTTIMEOUT , 2000 );
curl_setopt ( $curl_handle , CURLOPT_RETURNTRANSFER , 1 );
curl_setopt ( $curl_handle , CURLOPT_USERAGENT , 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36' );
curl_setopt ( $curl_handle , CURLOPT_REFERER , $baseUrl . $item );
$query = curl_exec ( $curl_handle );
$i = 1 ;
while ( $query === false ) {
echo 'Curl error: ' . curl_error ( $curl_handle ) . " \n " ;
echo " retry times: " . $i ++ . " times \n " ;
sleep ( 1 );
$sleepTime = 1000 * random_int ( 1000 , 10000 );
echo " retry sleep { $sleepTime } nano second \n " ;
usleep ( $sleepTime );
$query = curl_exec ( $curl_handle );
if ( $i >= 100 ) {
break ;
}
}
echo curl_error ( $curl_handle );
$fp = fopen ( $albumPath . " / " . $username . " - " . $albumCode . " - " . pathinfo ( " http: " . $image )[ 'filename' ] . " .jpg " , 'x' );
fwrite ( $fp , $query );
fclose ( $fp );
$sleepTime = 1000 * random_int ( 100 , 1000 );
echo " after write image sleep { $sleepTime } nano second \n " ;
usleep ( $sleepTime );
}
usleep ( 1000 * random_int ( 100 , 1000 ));
}
}
exit ;
}
}