diff --git a/.idea/graph-tutorial.iml b/.idea/graph-tutorial.iml index f0aa528..e671eb0 100644 --- a/.idea/graph-tutorial.iml +++ b/.idea/graph-tutorial.iml @@ -9,6 +9,9 @@ + + + @@ -31,6 +34,9 @@ + + + @@ -68,6 +74,7 @@ + @@ -114,6 +121,7 @@ + diff --git a/.idea/php.xml b/.idea/php.xml index a061ba3..58df4ea 100644 --- a/.idea/php.xml +++ b/.idea/php.xml @@ -118,6 +118,14 @@ + + + + + + + + diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index b041d7a..115d568 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -2,6 +2,7 @@ namespace App\Console; +use App\Services\XiuGirlsService; use Illuminate\Console\Scheduling\Schedule; use Illuminate\Foundation\Console\Kernel as ConsoleKernel; use App\Services\InstagramService; @@ -29,8 +30,11 @@ class Kernel extends ConsoleKernel // $schedule->command('inspire') // ->hourly(); $schedule->call(function () { - $service = new InstagramService(); - $service->scrapeLikedUsers(); +// $service = new InstagramService(); +// $service->scrapeLikedUsers(); +// $service->scanLocalFiles(); + $xiuService = new XiuGirlsService(); + $xiuService->scrapeXiuGirls(); echo "111"; })->everyMinute(); } diff --git a/app/Http/Controllers/InstagramController.php b/app/Http/Controllers/InstagramController.php index 0e3c7f8..30ffff8 100644 --- a/app/Http/Controllers/InstagramController.php +++ b/app/Http/Controllers/InstagramController.php @@ -8,6 +8,7 @@ use Illuminate\Http\Request; use Microsoft\Graph\Exception\GraphException; use Microsoft\Graph\Graph; use Microsoft\Graph\Model; +use Illuminate\Support\Facades\Log; class InstagramController extends Controller @@ -50,11 +51,27 @@ class InstagramController extends Controller // var_dump($event);exit; $subItemUrl = '/me/drive/items/' . $event->getId(). '/children?'.http_build_query($queryParams); $images = $graph->createRequest('GET', $subItemUrl) - ->setReturnType(Model\DriveItem::class) +// ->setReturnType(Model\DriveItem::class) ->execute(); - dd($images); - exit; - var_dump($event); +// dd($images->getBody()); + $body = $images->getBody(); + $values = $body['value']; + foreach ($values as $value) { + Log::debug("current user".$event->getName() ." current image " . $value['name']); + Instagram::where('image_name', $value['name'])->update(['is_uploaded' => 1]); + } + while (array_key_exists('@odata.nextLink', $body)) { + $nextSubItemUrl = $body['@odata.nextLink']; + $response = $graph->createRequest('GET', $nextSubItemUrl) + ->execute(); + $body = $response->getBody(); + $values = $body['value']; + foreach ($values as $value) { + Log::debug("current user".$event->getName() ." current image " . $value['name']); + Instagram::where('image_name', $value['name'])->update(['is_uploaded' => 1]); + } + } +// var_dump($event); } // $viewData['events'] = $events; diff --git a/app/Instagram.php b/app/Instagram.php index f336edb..fea1198 100644 --- a/app/Instagram.php +++ b/app/Instagram.php @@ -17,5 +17,5 @@ class Instagram extends Model // protected $table = "instagram"; - protected $fillable = ["image_name", "username", "image_url"]; + protected $fillable = ["image_name", "username", "image_url", 'is_uploaded']; } diff --git a/app/Services/InstagramService.php b/app/Services/InstagramService.php index 63d5753..eccc016 100644 --- a/app/Services/InstagramService.php +++ b/app/Services/InstagramService.php @@ -93,6 +93,14 @@ class InstagramService return $filename; } + public function checkFileExists($fileName, $isDb = true, $filePrefix = '') { + if ($isDb) { + return Ins::where('image_name', $fileName)->where('is_uploaded', 1)->count() > 0; + } else { + return file_exists($filePrefix . $fileName) || file_exists($filePrefix . $fileName . ".back"); + } + } + public function scrapeLikedUsers() { $ig = new Instagram($this->debug, $this->truncatedDebug); @@ -113,7 +121,8 @@ class InstagramService try { $userList = file("/Users/shixuesen/OneDrive/Pictures/instagram/user.txt"); - $userList = array_slice($userList, 17); +// $userList = array_slice($userList, 17); +// $userList = ['1992.ai_']; // print_r($userList);exit; // print_r($userList); foreach ($userList as $userName) { @@ -184,6 +193,7 @@ class InstagramService // This will be a null value again when we've reached the last page! // And we will stop looping through pages as soon as maxId becomes null. $maxId = $response->getNextMaxId(); + echo "\n new maxId: ". $maxId ."\n"; // Sleep for 5 seconds before requesting the next page. This is just an // example of an okay sleep time. It is very important that your scripts @@ -226,4 +236,30 @@ class InstagramService echo 'Something went wrong: ' . $e->getMessage() . "\n"; } } + + public function scanLocalFiles($baseDir = '/Users/shixuesen/OneDrive/Pictures/instagram/') + { + $dirs = scandir($baseDir, 1); + foreach ($dirs as $subDir) { + if ($subDir == '.' || $subDir == '..') { + continue; + } + $subPath = $baseDir . $subDir; + if (is_dir($subPath)) { + $files = scandir($subPath); + foreach ($files as $file){ + if ($file == '.' || $file == '..') { + continue; + } + echo "now file: ". $file. "\n"; + Ins::firstOrCreate(['image_name' => $file], [ + 'username' => $subDir, + 'is_uploaded' => 1, + 'image_url' => '' + ]); + } + + } + } + } } \ No newline at end of file diff --git a/app/Services/XiuGirlsService.php b/app/Services/XiuGirlsService.php new file mode 100644 index 0000000..7a01345 --- /dev/null +++ b/app/Services/XiuGirlsService.php @@ -0,0 +1,94 @@ + .star-mod-bd > ul > li > a"; + $ql = QueryList::get($baseUrl)->find($albumSelector)->attrs("href"); + $items = $ql->all(); + $items = array_slice($items, 49); +// print_r($items);exit; + + $baseUrl = "http://www.xiugirls.com"; + $baseDir = "/Users/shixuesen/Documents/xiugirl/"; +// $items = array_slice($items, 1); +// print_r($items);exit; + foreach ($items as $item) { + + print_r($item); + $html = QueryList::get($baseUrl. $item); + $title = $html->find("h1 > a")->texts(); + print_r($title); +// print_r(is_dir($baseDir));exit; + $albumPath = ""; +// if (count($title) >0 ) { +// if (!file_exists($baseDir .$title[0])) { +// mkdir($baseDir .$title[0]); +// } +// $albumPath = $baseDir.$title[0]; +// } else { + if (!file_exists($baseDir .explode("/", $item)[2])) { + mkdir($baseDir .explode("/", $item)[2]); + } + $albumPath = $baseDir.explode("/", $item)[2]; +// } + $images = QueryList::get($baseUrl . $item)->find(".swi-hd > img")->attrs("src"); +// mkdir("") +// print_r($images); + foreach ($images as $image) { + if (file_exists($albumPath."/".pathinfo("http:".$image)['filename'].".jpg")) { + continue; + // unlink($albumPath."/".pathinfo("http:".$image)['filename'].".jpg"); + } + $opts = array('http'=> ['header' => + "User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36 \r\n + Referer:".$baseUrl.$item."\r\n" + ]); + $curl_handle=curl_init(); + curl_setopt($curl_handle, CURLOPT_URL,"http:".$image); + curl_setopt($curl_handle, CURLOPT_CONNECTTIMEOUT, 2000); + curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($curl_handle, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'); + curl_setopt($curl_handle, CURLOPT_REFERER, $baseUrl . $item); + $query = curl_exec($curl_handle); + $i = 1; + while ($query === false) { + echo 'Curl error: ' . curl_error($curl_handle) ."\n"; + echo "retry times: " .$i++ ." times \n"; + sleep(1); + $query = curl_exec($curl_handle); + if ($i >= 100) { + break; + } + } + + echo curl_error($curl_handle); + $fp = fopen($albumPath."/".pathinfo("http:".$image)['filename'].".jpg", 'x'); + fwrite($fp, $query); + fclose($fp); +// file_put_contents("1.jpg", $query); +// curl_close($curl_handle); +// exit; + +// $context = stream_context_create($opts); + +// $a = file_get_contents("http:".$image, false, $context);exit; +// Storage::put($item.pathinfo($image)["filename"], file_get_contents("http:".$image, false, $context)); + + } + +// exit; + } + } +} \ No newline at end of file diff --git a/composer.json b/composer.json index f01e0b5..d8c0c44 100644 --- a/composer.json +++ b/composer.json @@ -12,6 +12,7 @@ "barryvdh/laravel-ide-helper": "^2.5", "doctrine/dbal": "^2.9", "fideloper/proxy": "^4.0", + "jaeger/querylist": "^4.1", "laravel/framework": "5.7.*", "laravel/tinker": "^1.0", "league/oauth2-client": "dev-master", diff --git a/composer.lock b/composer.lock index fc5afba..b3e7dd2 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "def97ea3aae1e41be78340bb99ea6723", + "content-hash": "dfcabc6cfdad87f334d118b901a24414", "packages": [ { "name": "barryvdh/laravel-ide-helper", @@ -261,6 +261,208 @@ ], "time": "2017-08-20T08:06:53+00:00" }, + { + "name": "cache/adapter-common", + "version": "1.1.0", + "source": { + "type": "git", + "url": "https://github.com/php-cache/adapter-common.git", + "reference": "6320bb5f5574cb88438059b59f8708da6b6f1d32" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-cache/adapter-common/zipball/6320bb5f5574cb88438059b59f8708da6b6f1d32", + "reference": "6320bb5f5574cb88438059b59f8708da6b6f1d32", + "shasum": "", + "mirrors": [ + { + "url": "https://dl.laravel-china.org/%package%/%reference%.%type%", + "preferred": true + } + ] + }, + "require": { + "cache/tag-interop": "^1.0", + "php": "^5.6 || ^7.0", + "psr/cache": "^1.0", + "psr/log": "^1.0", + "psr/simple-cache": "^1.0" + }, + "require-dev": { + "cache/integration-tests": "^0.16", + "phpunit/phpunit": "^5.7.21" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.1-dev" + } + }, + "autoload": { + "psr-4": { + "Cache\\Adapter\\Common\\": "" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Aaron Scherer", + "email": "aequasi@gmail.com", + "homepage": "https://github.com/aequasi" + }, + { + "name": "Tobias Nyholm", + "email": "tobias.nyholm@gmail.com", + "homepage": "https://github.com/nyholm" + } + ], + "description": "Common classes for PSR-6 adapters", + "homepage": "http://www.php-cache.com/en/latest/", + "keywords": [ + "cache", + "psr-6", + "tag" + ], + "time": "2018-07-08T13:04:33+00:00" + }, + { + "name": "cache/filesystem-adapter", + "version": "1.0.0", + "source": { + "type": "git", + "url": "https://github.com/php-cache/filesystem-adapter.git", + "reference": "d50680b6dabbe39f9831f5fc9efa61c09d936017" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-cache/filesystem-adapter/zipball/d50680b6dabbe39f9831f5fc9efa61c09d936017", + "reference": "d50680b6dabbe39f9831f5fc9efa61c09d936017", + "shasum": "", + "mirrors": [ + { + "url": "https://dl.laravel-china.org/%package%/%reference%.%type%", + "preferred": true + } + ] + }, + "require": { + "cache/adapter-common": "^1.0", + "league/flysystem": "^1.0", + "php": "^5.6 || ^7.0", + "psr/cache": "^1.0", + "psr/simple-cache": "^1.0" + }, + "provide": { + "psr/cache-implementation": "^1.0" + }, + "require-dev": { + "cache/integration-tests": "^0.16", + "phpunit/phpunit": "^5.7.21" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0-dev" + } + }, + "autoload": { + "psr-4": { + "Cache\\Adapter\\Filesystem\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Aaron Scherer", + "email": "aequasi@gmail.com", + "homepage": "https://github.com/aequasi" + }, + { + "name": "Tobias Nyholm", + "email": "tobias.nyholm@gmail.com", + "homepage": "https://github.com/nyholm" + } + ], + "description": "A PSR-6 cache implementation using filesystem. This implementation supports tags", + "homepage": "http://www.php-cache.com/en/latest/", + "keywords": [ + "cache", + "filesystem", + "psr-6", + "tag" + ], + "time": "2017-07-16T21:09:25+00:00" + }, + { + "name": "cache/tag-interop", + "version": "1.0.0", + "source": { + "type": "git", + "url": "https://github.com/php-cache/tag-interop.git", + "reference": "c7496dd81530f538af27b4f2713cde97bc292832" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-cache/tag-interop/zipball/c7496dd81530f538af27b4f2713cde97bc292832", + "reference": "c7496dd81530f538af27b4f2713cde97bc292832", + "shasum": "", + "mirrors": [ + { + "url": "https://dl.laravel-china.org/%package%/%reference%.%type%", + "preferred": true + } + ] + }, + "require": { + "php": "^5.5 || ^7.0", + "psr/cache": "^1.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.0-dev" + } + }, + "autoload": { + "psr-4": { + "Cache\\TagInterop\\": "" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Tobias Nyholm", + "email": "tobias.nyholm@gmail.com", + "homepage": "https://github.com/nyholm" + }, + { + "name": "Nicolas Grekas", + "email": "p@tchwork.com", + "homepage": "https://github.com/nicolas-grekas" + } + ], + "description": "Framework interoperable interfaces for tags", + "homepage": "http://www.php-cache.com/en/latest/", + "keywords": [ + "cache", + "psr", + "psr6", + "tag" + ], + "time": "2017-03-13T09:14:27+00:00" + }, { "name": "clue/http-proxy-react", "version": "v1.3.0", @@ -1662,6 +1864,153 @@ ], "time": "2018-12-04T20:46:45+00:00" }, + { + "name": "jaeger/g-http", + "version": "V1.6.0", + "source": { + "type": "git", + "url": "https://github.com/jae-jae/GHttp.git", + "reference": "eb34d266a07c687aef45087370ef47d48321bd2e" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/jae-jae/GHttp/zipball/eb34d266a07c687aef45087370ef47d48321bd2e", + "reference": "eb34d266a07c687aef45087370ef47d48321bd2e", + "shasum": "", + "mirrors": [ + { + "url": "https://dl.laravel-china.org/%package%/%reference%.%type%", + "preferred": true + } + ] + }, + "require": { + "cache/filesystem-adapter": "^1.0", + "guzzlehttp/guzzle": "^6.2" + }, + "type": "library", + "autoload": { + "psr-4": { + "Jaeger\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Jaeger", + "email": "JaegerCode@gmail.com" + } + ], + "description": "Simple Http client base on GuzzleHttp", + "time": "2018-12-12T04:21:15+00:00" + }, + { + "name": "jaeger/phpquery-single", + "version": "0.9.9", + "source": { + "type": "git", + "url": "https://github.com/jae-jae/phpQuery-single.git", + "reference": "6c12e3a1648dc619f16920f96082f397d74e0127" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/jae-jae/phpQuery-single/zipball/6c12e3a1648dc619f16920f96082f397d74e0127", + "reference": "6c12e3a1648dc619f16920f96082f397d74e0127", + "shasum": "", + "mirrors": [ + { + "url": "https://dl.laravel-china.org/%package%/%reference%.%type%", + "preferred": true + } + ] + }, + "require": { + "php": ">=5.3.0" + }, + "type": "library", + "autoload": { + "classmap": [ + "phpQuery.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Tobiasz Cudnik", + "email": "tobiasz.cudnik@gmail.com", + "homepage": "https://github.com/TobiaszCudnik", + "role": "Developer" + }, + { + "name": "Jaeger", + "role": "Packager" + } + ], + "description": "phpQuery单文件版本,是Querylist的依赖(http://querylist.cc/),phpQuery项目主页:http://code.google.com/p/phpquery/", + "homepage": "http://code.google.com/p/phpquery/", + "time": "2017-12-07T02:13:37+00:00" + }, + { + "name": "jaeger/querylist", + "version": "V4.1.0", + "source": { + "type": "git", + "url": "https://github.com/jae-jae/QueryList.git", + "reference": "df9e3bbf1943b77ac7ab91bd3fc59643fe458044" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/jae-jae/QueryList/zipball/df9e3bbf1943b77ac7ab91bd3fc59643fe458044", + "reference": "df9e3bbf1943b77ac7ab91bd3fc59643fe458044", + "shasum": "", + "mirrors": [ + { + "url": "https://dl.laravel-china.org/%package%/%reference%.%type%", + "preferred": true + } + ] + }, + "require": { + "jaeger/g-http": "^1.1", + "jaeger/phpquery-single": "^0.9", + "php": ">=7.0", + "tightenco/collect": "^5" + }, + "require-dev": { + "phpunit/phpunit": "^7.5", + "symfony/var-dumper": "^3.3" + }, + "type": "library", + "autoload": { + "psr-4": { + "QL\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Jaeger", + "email": "JaegerCode@gmail.com" + } + ], + "description": "Simple, elegant, extensible PHP Web Scraper (crawler/spider),Use the css3 dom selector,Based on phpQuery! 简洁、优雅、可扩展的PHP采集工具(爬虫),基于phpQuery。", + "homepage": "http://querylist.cc", + "keywords": [ + "QueryList", + "phpQuery", + "spider" + ], + "time": "2018-12-12T07:29:31+00:00" + }, { "name": "jakub-onderka/php-console-color", "version": "v0.2", @@ -3158,6 +3507,58 @@ ], "time": "2016-01-26T13:27:02+00:00" }, + { + "name": "psr/cache", + "version": "1.0.1", + "source": { + "type": "git", + "url": "https://github.com/php-fig/cache.git", + "reference": "d11b50ad223250cf17b86e38383413f5a6764bf8" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-fig/cache/zipball/d11b50ad223250cf17b86e38383413f5a6764bf8", + "reference": "d11b50ad223250cf17b86e38383413f5a6764bf8", + "shasum": "", + "mirrors": [ + { + "url": "https://dl.laravel-china.org/%package%/%reference%.%type%", + "preferred": true + } + ] + }, + "require": { + "php": ">=5.3.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0.x-dev" + } + }, + "autoload": { + "psr-4": { + "Psr\\Cache\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "PHP-FIG", + "homepage": "http://www.php-fig.org/" + } + ], + "description": "Common interface for caching libraries", + "keywords": [ + "cache", + "psr", + "psr-6" + ], + "time": "2016-08-06T20:24:11+00:00" + }, { "name": "psr/container", "version": "1.0.0", @@ -5280,6 +5681,62 @@ ], "time": "2019-01-03T09:07:35+00:00" }, + { + "name": "tightenco/collect", + "version": "v5.7.26", + "source": { + "type": "git", + "url": "https://github.com/tightenco/collect.git", + "reference": "c1a36a2a8a0aa731c1acdcd83f57724ffe630d00" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/tightenco/collect/zipball/c1a36a2a8a0aa731c1acdcd83f57724ffe630d00", + "reference": "c1a36a2a8a0aa731c1acdcd83f57724ffe630d00", + "shasum": "", + "mirrors": [ + { + "url": "https://dl.laravel-china.org/%package%/%reference%.%type%", + "preferred": true + } + ] + }, + "require": { + "php": "^7.1.3", + "symfony/var-dumper": ">=3.4 <5" + }, + "require-dev": { + "mockery/mockery": "^1.0", + "nesbot/carbon": "^1.26.3", + "phpunit/phpunit": "^7.0" + }, + "type": "library", + "autoload": { + "files": [ + "src/Collect/Support/helpers.php", + "src/Collect/Support/alias.php" + ], + "psr-4": { + "Tightenco\\Collect\\": "src/Collect" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Taylor Otwell", + "email": "taylorotwell@gmail.com" + } + ], + "description": "Collect - Illuminate Collections as a separate package.", + "keywords": [ + "collection", + "laravel" + ], + "time": "2019-02-13T19:40:13+00:00" + }, { "name": "tijsverkoyen/css-to-inline-styles", "version": "2.2.1", diff --git a/public/index.php b/public/index.php index 4584cbc..f269dbe 100644 --- a/public/index.php +++ b/public/index.php @@ -6,6 +6,7 @@ * @package Laravel * @author Taylor Otwell */ +set_time_limit(0); define('LARAVEL_START', microtime(true));