#! /home/phd/.local/bin/python3 # -*- coding: koi8-r -*- import argparse import sys from m_lib.defenc import default_encoding def parse_args(default=None): parser = argparse.ArgumentParser(description='Recode filenames') parser.add_argument('-f', '--from-encoding', help='from encoding') parser.add_argument('-t', '--to-encoding', help='to encoding') parser.add_argument('filename', nargs='*' if default else '+', default=[default], help='filenames to recode') args = parser.parse_args() from_encoding = args.from_encoding to_encoding = args.to_encoding if from_encoding: if to_encoding: pass # Everything is defined, no need to guess elif from_encoding == 'utf-8': if default_encoding == 'utf-8': sys.exit('Cannot guess to_encoding') else: to_encoding = default_encoding else: to_encoding = 'utf-8' elif to_encoding: if to_encoding == default_encoding: if default_encoding == 'utf-8': sys.exit('Cannot guess from_encoding') else: from_encoding = 'utf-8' else: from_encoding = default_encoding else: if default_encoding == 'utf-8': sys.exit('Cannot guess encodings') else: from_encoding = default_encoding to_encoding = 'utf-8' return from_encoding, to_encoding, args.filename def build_recode(from_encoding, to_encoding): if from_encoding == "translit": if to_encoding == "koi8-r": from m_lib.rus.lat2rus import lat2koi as _recode elif to_encoding == "cp1251": from m_lib.rus.lat2rus import lat2win as _recode else: raise NotImplementedError( "destination encoding must be koi8-r or cp1251, " "not `%s'" % to_encoding) elif to_encoding == "translit": if from_encoding == "koi8-r": from m_lib.rus.rus2lat import koi2lat as _recode elif from_encoding == "cp1251": from m_lib.rus.rus2lat import win2lat as _recode else: raise NotImplementedError( "source encoding must be koi8-r or cp1251, " "not `%s'" % from_encoding) from m_lib.rus.rus2lat import koi2lat_d koi2lat_d["ÿ"] = '' # remove apostrophs - koi2lat_d["ø"] = '' # they are not very good characters in filenames koi2lat_d["ß"] = '' # especially on Windoze koi2lat_d["Ø"] = '' # :-) elif from_encoding == "url": try: from_encoding, to_encoding = to_encoding.split('/') except ValueError: from_encoding = to_encoding from urllib.parse import unquote from m_lib.opstring import recode def _recode(s): s = unquote(s) if from_encoding != to_encoding: s = recode(s, from_encoding, to_encoding, "replace") return s elif to_encoding == "url": try: from_encoding, to_encoding = from_encoding.split('/') except ValueError: to_encoding = from_encoding from urllib.parse import quote from m_lib.opstring import recode def _recode(s): if from_encoding != to_encoding: s = recode(s, from_encoding, to_encoding, "replace") # wget treats them as safe # vvvvvvvvvvvvv return quote(s, safe=";/?:@&=+$,()'") else: def _recode(s): return s.encode(to_encoding, "surrogateescape").\ decode(from_encoding, "surrogateescape") return _recode if __name__ == "__main__": import os from_encoding, to_encoding, filenames = parse_args() _recode = build_recode(from_encoding, to_encoding) for filename in filenames: new_name = _recode(filename) if not isinstance(new_name, str): new_name = new_name.decode() if new_name != filename: sys.stdout.buffer.write( b'%s %s / %s -> %s\n' % ( from_encoding.encode(), to_encoding.encode(), filename.encode(default_encoding, 'replace'), new_name.encode(to_encoding, 'replace')) ) os.rename(filename, new_name)