import os import chardet import codecs import logging import argparse class ConvertCode(object): '''convert file encode''' def __init__(self, debug=False): super(ConvertCode, self).__init__() parser = argparse.ArgumentParser( description='Convert file encode to utf8.') parser.add_argument( "--dir", help="set a file directory to convert, eg: /workspace. default: ./", type=str, default="./") parser.add_argument( "--suffix", help="set the file suffix to convert, eg: *.java. default: *.*", type=str, default="*.*") args = parser.parse_args() if args.dir: self.dir = args.dir if args.suffix: self.suffix = args.suffix if debug == True: logging.basicConfig(level=logging.DEBUG) @staticmethod def WriteFile(filePath, u, encoding="utf-8"): with codecs.open(filePath, "w", encoding) as f: f.write(u) @staticmethod def GBK_2_UTF8(src, dst): ''' 检测编码,coding可能检测不到编码,有异常 ''' f = open(src, "rb") coding = chardet.detect(f.read())["encoding"] f.close() if coding != "utf-8": with codecs.open(src, "r", coding) as f: try: ConvertCode.WriteFile(dst, f.read(), encoding="utf-8") try: print(src + " " + coding + " to utf-8 converted!") except Exception: print("print error") except Exception: print(src + " " + coding + " read error") def run(self): ''' 把目录中的*.java编码由gbk转换为utf-8 ''' for parent, dirnames, filenames in os.walk(self.dir): for dirname in dirnames: # 递归函数,遍历所有子文件夹 self.run(dirname) for filename in filenames: if self.suffix=="*.*": pass elif filename.endswith(".java"): ConvertCode.GBK_2_UTF8(os.path.join(parent, filename), os.path.join(parent, filename)) if __name__ == "__main__": converter = ConvertCode() converter.run()