argument("path")); $list = scandir($path); foreach ($list as $file) { if ($file == "." || $file == "..") { continue; } // echo strlen($file) . "\n"; // continue; if (strlen($file) > 100) { $repeat = self::get_max_substring($file); echo "repeat =============: " . $repeat . "\n"; // continue; if (strlen($repeat) > 10) { $newName = $path . DIRECTORY_SEPARATOR . str_replace_first($repeat, "", $file); if (!is_file($newName)) { echo "old filename: $file rename to " . $newName . "\n"; rename($path . DIRECTORY_SEPARATOR . $file, $path . DIRECTORY_SEPARATOR . str_replace_first($repeat, "", $file)); } else { echo $newName . "exists"; } // echo "$file \n rename to " . $newName . "\n"; } # code... } } exit; // FileUtils::listFiles($path); $filename = "大忽悠丝袜第104弹-001-清纯空姐装油亮连裤袜学生美女换上油亮灰丝女警装户外换高跟凉鞋展示美腿-【成功案例第104弹续集】清纯空姐装油亮连裤袜学生美女换上油亮灰丝女警装户外换高跟凉鞋展示美腿-720p"; // $filename = "你好,世界.txt"; $max_substring = self::get_max_substring($filename); $newFile = str_replace_first($max_substring, "", $filename); echo $newFile; // 输出 "world" } function get_max_substring($filename) { $suffix_array = self::get_suffix_array($filename); $max_substring_length = 0; $max_substring_index = 0; for ($i = 1; $i < count($suffix_array); $i++) { $substring_length = self::get_common_prefix_length($suffix_array[$i - 1], $suffix_array[$i], $filename); if ($substring_length > $max_substring_length) { $max_substring_length = $substring_length; $max_substring_index = $suffix_array[$i]; } } $max_substring = mb_substr($filename, $max_substring_index, $max_substring_length, 'UTF-8'); return $max_substring; } function get_suffix_array($str) { $suffix_array = array(); for ($i = 0; $i < mb_strlen($str, 'UTF-8'); $i++) { $suffix_array[] = $i; } usort($suffix_array, function ($a, $b) use ($str) { return strcmp(mb_substr($str, $a, null, 'UTF-8'), mb_substr($str, $b, null, 'UTF-8')); }); return $suffix_array; } function get_common_prefix_length($a, $b, $str) { $length = 0; while (isset($str[$a]) && isset($str[$b])) { $char_a = mb_substr($str, $a, 1, 'UTF-8'); $char_b = mb_substr($str, $b, 1, 'UTF-8'); if ($char_a == $char_b) { $length++; $a++; $b++; } else if (self::is_chinese($char_a) && self::is_chinese($char_b)) { $length++; $a++; $b++; } else { break; } } return $length; } function is_chinese($char) { $pattern = '/[\x{4e00}-\x{9fa5}]/u'; return preg_match($pattern, $char); } }