| @ -0,0 +1,160 @@ | |||
| <?php | |||
| namespace App\Services; | |||
| use Illuminate\Support\Arr; | |||
| use Illuminate\Support\Facades\Log; | |||
| use InstagramAPI\Response\Model\Item; | |||
| class Ins24Service | |||
| { | |||
| public function scrapeUsers() | |||
| { | |||
| $baseImageDir = "/Users/shixuesen/OneDrive/Pictures/instagram/"; | |||
| try { | |||
| $userList = file("/Users/shixuesen/OneDrive/Pictures/instagram/user.txt"); | |||
| // $userList = array_slice($userList, 17); | |||
| // $userList = ['1992.ai_']; | |||
| // print_r($userList);exit; | |||
| // print_r($userList); | |||
| foreach ($userList as $userName) { | |||
| usleep(random_int(100, 1000) * 10000); | |||
| $trueName = trim($userName); | |||
| $thisUserImageDir = $baseImageDir . $trueName . "/"; | |||
| $maxId = ""; | |||
| $pageNo = 1; | |||
| for (; ;) { | |||
| try { | |||
| $data = $this->queryImage($trueName, $pageNo, $maxId); | |||
| foreach ($data["resourceList"] as $resource) { | |||
| $res = $this->downloadFile($resource["mediaURL"], 0, $thisUserImageDir); | |||
| usleep(random_int(100, 1000) * 10000); | |||
| if ($res == 0) { | |||
| break 2; | |||
| } | |||
| } | |||
| $maxId = $data["maxId"]; | |||
| $pageNo ++; | |||
| } catch (\Exception $e) { | |||
| Log::error("ins get user id for name error: " . $e->getMessage() . " username is " . $userName); | |||
| } | |||
| } | |||
| } | |||
| } catch (\Exception $e) { | |||
| } | |||
| } | |||
| private function queryImage($userName, $pageNo = 1, $maxId = "") { | |||
| $curl = curl_init(); | |||
| curl_setopt_array($curl, array( | |||
| CURLOPT_URL => "https://www.ins246.com/ins/search.html", | |||
| CURLOPT_RETURNTRANSFER => true, | |||
| CURLOPT_ENCODING => "", | |||
| CURLOPT_MAXREDIRS => 10, | |||
| CURLOPT_TIMEOUT => 0, | |||
| CURLOPT_FOLLOWLOCATION => true, | |||
| CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, | |||
| CURLOPT_CUSTOMREQUEST => "POST", | |||
| // CURLOPT_POSTFIELDS => "search=https%3A%2F%2Fwww.instagram.com%2Fnz0502%2F&pageNo=1&type=1&maxId=", | |||
| CURLOPT_POSTFIELDS => "search=https://www.instagram.com/$userName/&pageNo=$pageNo&type=1&maxId=$maxId", | |||
| CURLOPT_HTTPHEADER => array( | |||
| "authority: www.ins246.com", | |||
| "pragma: no-cache", | |||
| "cache-control: no-cache", | |||
| "accept: */*", | |||
| "x-requested-with: XMLHttpRequest", | |||
| "user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36", | |||
| "content-type: application/x-www-form-urlencoded; charset=UTF-8", | |||
| "origin: https://www.ins246.com", | |||
| "sec-fetch-site: same-origin", | |||
| "sec-fetch-mode: cors", | |||
| "sec-fetch-dest: empty", | |||
| "referer: https://www.ins246.com/ins/gosearch.html", | |||
| "accept-language: en-US,en;q=0.9,zh-TW;q=0.8,zh;q=0.7,ja;q=0.6,zh-CN;q=0.5", | |||
| "cookie: __cfduid=d94e7974e4bbec11a34d72efca94204591604573518; JSESSIONID=EF65EB47872FB6EFEFADA05C8CD140EA; _ga=GA1.2.343334980.1604573522; _gid=GA1.2.1674010257.1604573522; JSESSIONID=6A642D962A119268816A05B9B3D7C8A0" | |||
| ), | |||
| )); | |||
| $response = curl_exec($curl); | |||
| curl_close($curl); | |||
| $formatResponse = json_decode($response, true); | |||
| if ($formatResponse["code"] == 200) { | |||
| return $formatResponse["data"]; | |||
| } | |||
| // echo $response; | |||
| } | |||
| function downloadFile($filenameUrl, $flag = 0, $filePrefix = "") | |||
| { | |||
| //echo $filenameUrl;exit; | |||
| $filePathInfo = pathinfo($filenameUrl); | |||
| $filename = $filePathInfo['filename']; | |||
| // echo $filename; | |||
| // echo "\n"; | |||
| $pos = strpos($filename, "?"); | |||
| // echo "pos: ".$pos; | |||
| // echo "\n"; | |||
| if ($pos > 0) { | |||
| $filename = substr($filename, 0, $pos); | |||
| } | |||
| // echo $filename;exit; | |||
| // if ($filename == "33020038_640464766303508_27725890796388352_n.jpg"){ | |||
| // $flag = 1; | |||
| // } | |||
| if (file_exists($filePrefix . $filename) || file_exists($filePrefix . $filename . ".back")) { | |||
| echo "\n file exists " . $filePrefix . $filename; | |||
| return 0; | |||
| } | |||
| try { | |||
| $cn_match = "https://www.ins246.com/ins/gosearch.html"; | |||
| $options = array( | |||
| 'ssl' => array( | |||
| 'verify_peer' => true, | |||
| 'cafile' => "/Users/shixuesen/Downloads/cacert.pem", | |||
| 'ciphers' => 'HIGH:TLSv1.2:TLSv1.1:TLSv1.0:!SSLv3:!SSLv2', | |||
| 'CN_match' => $cn_match, | |||
| 'disable_compression' => true, | |||
| ) | |||
| ); | |||
| $context = stream_context_create($options); | |||
| $ch = curl_init(); | |||
| curl_setopt($ch, CURLOPT_URL, $filenameUrl); | |||
| curl_setopt($ch, CURLOPT_VERBOSE, 1); | |||
| curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | |||
| curl_setopt($ch, CURLOPT_AUTOREFERER, false); | |||
| curl_setopt($ch, CURLOPT_REFERER, "https://www.ins246.com/ins/gosearch.html"); | |||
| curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); | |||
| curl_setopt($ch, CURLOPT_HEADER, 0); | |||
| $image = curl_exec($ch); | |||
| curl_close($ch); | |||
| // $image = file_get_contents($filenameUrl, false, null); | |||
| } catch (\Throwable $e) { | |||
| var_dump($e->getMessage()); | |||
| $this->logFailUrl($filePrefix, $filenameUrl); | |||
| return -1; | |||
| } | |||
| echo "new filename: " . $filePrefix . $filename . "\n"; | |||
| $downloadResult = file_put_contents($filePrefix . $filename, $image); | |||
| if (!$downloadResult) { | |||
| $this->logFailUrl($filePrefix, $filenameUrl); | |||
| return -1; | |||
| } else { | |||
| return 1; | |||
| } | |||
| } | |||
| } | |||