'RenameFilesWithAccentedAndDiacriticalLatinChars.pl' के बाद, PERL स्क्रिप्ट उच्चारण और लैंगिक लैटिन वर्णों वाली फ़ाइलों का नाम बदलती है:
- This PERL script starts from the folder given in parameter, or else from
the current folder.
- It recursively searches for files with characters belonging to 80 - FF of
CP 1250, CP 1252, CP 1254 and CP 1257 (mostly accented Latin characters)
or Latin characters having diacritical marks.
- It calculates new file names by removing the accents and diacritical marks
only from Latin characters (For example, Été --> Ete).
- It displays all proposed renaming and perhaps conflicts, and asks the user
for global approval.
- If the user has approved, it renames all files having no conflict.
विकल्प '- बैच' इंटरैक्टिव प्रश्नों से बचाता है। देखभाल के साथ प्रयोग करें।
विकल्प '-' विकल्प के रूप में व्याख्या करने के लिए अगले पैरामीटर से बचाता है।
Special Warning :
- यह स्क्रिप्ट मूल रूप से यूटीएफ -8 में एन्कोड किया गया था, और ऐसा ही रहना चाहिए।
- यह स्क्रिप्ट कई फाइलों का नाम बदल सकती है।
- फाइल नाम सैद्धांतिक रूप से केवल यूटीएफ -8 के साथ एन्कोड किए गए हैं। लेकिन कुछ फाइलें
नामों में विरासत एन्कोडिंग वाले कुछ वर्ण भी शामिल हो सकते हैं।
- लेखक ने स्थिरता जांच, मजबूती, संघर्ष के लिए प्रयासों को लागू किया है
उचित एन्कोडिंग का पता लगाने और उपयोग।
इसलिए इस स्क्रिप्ट को केवल उच्चारण और विशिष्टता को हटाकर फ़ाइलों का नाम बदलना चाहिए
लैटिन वर्णों से अंक।
- लेकिन इस स्क्रिप्ट का परीक्षण केवल सीमित संख्या में ओएस के तहत किया गया है
(विंडोज़, मैक ओएस एक्स, लिनक्स) और सीमित संख्या में टर्मिनल एन्कोडिंग
(सीपी 850, आईएसओ -885 9 -1, यूटीएफ -8)।
- तो, अजीब परिस्थितियों में, यह स्क्रिप्ट कई फाइलों का नाम बदल सकती है
यादृच्छिक नाम।
- इसलिए, इस स्क्रिप्ट का उपयोग देखभाल के साथ किया जाना चाहिए, और चरम के साथ संशोधित किया जाना चाहिए
देखभाल (आंतरिक तारों, इनपुट, आउटपुट और कमांड के एन्कोडिंग से सावधान रहें)
#!/usr/bin/perl -w
#=============================================================================
#
# Copyright 2010 Etienne URBAH
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details at
# http://www.gnu.org/licenses/gpl.html
#
# For usage and SPECIAL WARNING, see the 'Help' section below.
#
#=============================================================================
use 5.008_000; # For correct Unicode support
use warnings;
use strict;
use Encode;
$| = 1; # Autoflush STDOUT
#-----------------------------------------------------------------------------
# Function ucRemoveEolUnderscoreDash :
# Set Uppercase, remove End of line, Underscores and Dashes
#-----------------------------------------------------------------------------
sub ucRemoveEolUnderscoreDash
{
local $_ = uc($_[0]);
chomp;
tr/_\-//d;
$_;
}
#-----------------------------------------------------------------------------
# Constants
#-----------------------------------------------------------------------------
my $Encoding_Western = 'ISO-8859-1';
my $Encoding_Central = 'ISO-8859-2';
my $Encoding_Baltic = 'ISO-8859-4';
my $Encoding_Turkish = 'ISO-8859-9';
my $Encoding_W_Euro = 'ISO-8859-15';
my $Code_Page_OldWest = 850;
my $Code_Page_Central = 1250;
my $Code_Page_Western = 1252;
my $Code_Page_Turkish = 1254;
my $Code_Page_Baltic = 1257;
my $Code_Page_UTF8 = 65001;
my $HighBitSetChars = pack('C*', 0x80..0xFF);
my %superEncodings =
( &ucRemoveEolUnderscoreDash($Encoding_Western), 'cp'.$Code_Page_Western,
&ucRemoveEolUnderscoreDash($Encoding_Central), 'cp'.$Code_Page_Central,
&ucRemoveEolUnderscoreDash($Encoding_Baltic), 'cp'.$Code_Page_Baltic,
&ucRemoveEolUnderscoreDash($Encoding_Turkish), 'cp'.$Code_Page_Turkish,
&ucRemoveEolUnderscoreDash($Encoding_W_Euro), 'cp'.$Code_Page_Western,
&ucRemoveEolUnderscoreDash('cp'.$Code_Page_OldWest),
'cp'.$Code_Page_Western );
my %EncodingNames = ( 'cp'.$Code_Page_Central, 'Central European',
'cp'.$Code_Page_Western, 'Western European',
'cp'.$Code_Page_Turkish, ' Turkish ',
'cp'.$Code_Page_Baltic, ' Baltic ' );
my %NonAccenChars = (
#--------------------------------#
'cp'.$Code_Page_Central, # Central European (cp1250) #
#--------------------------------#
#€_‚_„…†‡_‰Š‹ŚŤŽŹ_‘’“”•–—_™š›śťžź#
'E_,_,.++_%s_STZZ_````.--_Ts_stzz'.
# ˇ˘Ł¤Ą¦§¨©Ş«¬®Ż°±˛ł´µ¶·¸ąş»Ľ˝ľż#
'_``LoAlS`CS_--RZ`+,l`uP.,as_L~lz'.
#ŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢß#
'RAAAALCCCEEEEIIDDNNOOOOxRUUUUYTS'.
#ŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙#
'raaaalccceeeeiiddnnoooo%ruuuuyt`',
#--------------------------------#
'cp'.$Code_Page_Western, # Western European (cp1252) #
#--------------------------------#
#€_‚ƒ„…†‡ˆ‰Š‹Œ_Ž__‘’“”•–—˜™š›œ_žŸ#
'E_,f,.++^%s_O_Z__````.--~Ts_o_zY'.
# ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿#
'_!cLoYlS`Ca_--R-`+23`uP.,10_qh3_'.
#ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß#
'AAAAAAACEEEEIIIIDNOOOOOxOUUUUYTS'.
#àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ#
'aaaaaaaceeeeiiiidnooooo%ouuuuyty',
#--------------------------------#
'cp'.$Code_Page_Turkish, # Turkish (cp1254) #
#--------------------------------#
#€_‚ƒ„…†‡ˆ‰Š‹Œ____‘’“”•–—˜™š›œ__Ÿ#
'E_,f,.++^%s_O____````.--~Ts_o__Y'.
# ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿#
'_!cLoYlS`Ca_--R-`+23`uP.,10_qh3_'.
#ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏĞÑÒÓÔÕÖרÙÚÛÜİŞß#
'AAAAAAACEEEEIIIIGNOOOOOxOUUUUISS'.
#àáâãäåæçèéêëìíîïğñòóôõö÷øùúûüışÿ#
'aaaaaaaceeeeiiiignooooo%ouuuuisy',
#--------------------------------#
'cp'.$Code_Page_Baltic, # Baltic (cp1257) #
#--------------------------------#
#€_‚_„…†‡_‰_‹_¨ˇ¸_‘’“”•–—_™_›_¯˛_#
'E_,_,.++_%___``,_````.--_T___-,_'.
# �¢£¤�¦§Ø©Ŗ«¬®Æ°±²³´µ¶·ø¹ŗ»¼½¾æ#
'__cLo_lSOCR_--RA`+23`uP.o1r_qh3a'.
#ĄĮĀĆÄÅĘĒČÉŹĖĢĶĪĻŠŃŅÓŌÕÖ×ŲŁŚŪÜŻŽß#
'AIACAAEECEZEGKILSNNOOOOxULSUUZZS'.
#ąįāćäåęēčéźėģķīļšńņóōõö÷ųłśūüżž˙#
'aiacaaeecezegkilsnnoooo%ulsuuzz`' );
my %AccentedChars;
my $AccentedChars = '';
my $NonAccenChars = '';
for ( $Code_Page_Central, $Code_Page_Western,
$Code_Page_Turkish, $Code_Page_Baltic )
{
$AccentedChars{'cp'.$_} = decode('cp'.$_, $HighBitSetChars);
$AccentedChars .= $AccentedChars{'cp'.$_};
$NonAccenChars .= $NonAccenChars{'cp'.$_};
}
#print "\n", length($NonAccenChars), ' ', $NonAccenChars,"\n";
#print "\n", length($AccentedChars), ' ', $AccentedChars,"\n";
my $QuotedMetaNonAccenChars = quotemeta($NonAccenChars);
my $DiacriticalChars = '';
for ( 0x0300..0x036F, 0x1DC0..0x1DFF )
{ $DiacriticalChars .= chr($_) }
#-----------------------------------------------------------------------------
# Parse options and parameters
#-----------------------------------------------------------------------------
my $b_Help = 0;
my $b_Interactive = 1;
my $b_UTF8 = 0;
my $b_Parameter = 0;
my $Folder;
for ( @ARGV )
{
if ( lc($_) eq '--' )
{ $b_Parameter = 1 }
elsif ( (not $b_Parameter) and (lc($_) eq '--batch') )
{ $b_Interactive = 0 }
elsif ( (not $b_Parameter) and (lc($_) eq '--utf8') )
{ $b_UTF8 = 1 }
elsif ( $b_Parameter or (substr($_, 0, 1) ne '-') )
{
if ( defined($Folder) )
{ die "$0 accepts only 1 parameter\n" }
else
{ $Folder = $_ }
}
else
{ $b_Help = 1 }
}
#-----------------------------------------------------------------------------
# Help
#-----------------------------------------------------------------------------
if ( $b_Help )
{
die << "END_OF_HELP"
$0 [--help] [--batch] [--] [folder]
This script renames files with accented and diacritical Latin characters :
- This PERL script starts from the folder given in parameter, or else from
the current folder.
- It recursively searches for files with characters belonging to 80 - FF of
CP 1250, CP 1252, CP 1254 and CP 1257 (mostly accented Latin characters)
or Latin characters having diacritical marks.
- It calculates new file names by removing the accents and diacritical marks
only from Latin characters (For example, Été --> Ete).
- It displays all proposed renaming and perhaps conflicts, and asks the user
for global approval.
- If the user has approved, it renames all files having no conflict.
विकल्प '- बैच' इंटरैक्टिव प्रश्नों से बचाता है। देखभाल के साथ प्रयोग करें।
विकल्प '-' विकल्प के रूप में व्याख्या करने के लिए अगले पैरामीटर से बचाता है।
SPECIAL WARNING :
- This script was originally encoded in UTF-8, and should stay so.
- This script may rename a lot of files.
- Files names are theoretically all encoded only with UTF-8. But some file
names may be found to contain also some characters having legacy encoding.
- The author has applied efforts for consistency checks, robustness, conflict
detection and use of appropriate encoding.
So this script should only rename files by removing accents and diacritical
marks from Latin characters.
- But this script has been tested only under a limited number of OS
(Windows, Mac OS X, Linux) and a limited number of terminal encodings
(CP 850, ISO-8859-1, UTF-8).
- So, under weird circumstances, this script could rename many files with
random names.
- Therefore, this script should be used with care, and modified with extreme
care (beware encoding of internal strings, inputs, outputs and commands)
END_OF_HELP
}
#-----------------------------------------------------------------------------
# If requested, change current folder
#-----------------------------------------------------------------------------
if ( defined($Folder) )
{ chdir($Folder) or die "Can NOT set '$Folder' as current folder\n" }
#-----------------------------------------------------------------------------
# Following instruction is MANDATORY.
# The return value should be non-zero, but on some systems it is zero.
#-----------------------------------------------------------------------------
utf8::decode($AccentedChars);
# or die "$0: '\$AccentedChars' should be UTF-8 but is NOT.\n";
#-----------------------------------------------------------------------------
# Check consistency on 'tr'
#-----------------------------------------------------------------------------
$_ = $AccentedChars;
eval "tr/$AccentedChars/$QuotedMetaNonAccenChars/";
if ( [email protected] ) { warn [email protected] }
if ( [email protected] or ($_ ne $NonAccenChars) )
{ die "$0: Consistency check on 't