12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364 |
- import os
- import chardet
- import codecs
- import logging
- import argparse
- class ConvertCode(object):
- '''convert file encode'''
- def __init__(self, debug=False):
- super(ConvertCode, self).__init__()
- parser = argparse.ArgumentParser(
- description='Convert file encode to utf8.')
- parser.add_argument(
- "--dir", help="set a file directory to convert, eg: /workspace. default: ./", type=str, default="./")
- parser.add_argument(
- "--suffix", help="set the file suffix to convert, eg: *.java. default: *.*", type=str, default="*.*")
- args = parser.parse_args()
- if args.dir:
- self.dir = args.dir
- if args.suffix:
- self.suffix = args.suffix
- if debug == True:
- logging.basicConfig(level=logging.DEBUG)
- @staticmethod
- def WriteFile(filePath, u, encoding="utf-8"):
- with codecs.open(filePath, "w", encoding) as f:
- f.write(u)
- @staticmethod
- def GBK_2_UTF8(src, dst):
- ''' 检测编码,coding可能检测不到编码,有异常 '''
- f = open(src, "rb")
- coding = chardet.detect(f.read())["encoding"]
- f.close()
- if coding != "utf-8":
- with codecs.open(src, "r", coding) as f:
- try:
- ConvertCode.WriteFile(dst, f.read(), encoding="utf-8")
- try:
- print(src + " " + coding + " to utf-8 converted!")
- except Exception:
- print("print error")
- except Exception:
- print(src + " " + coding + " read error")
- def run(self):
- ''' 把目录中的*.java编码由gbk转换为utf-8 '''
- for parent, dirnames, filenames in os.walk(self.dir):
- for dirname in dirnames:
- # 递归函数,遍历所有子文件夹
- self.run(dirname)
- for filename in filenames:
- if self.suffix=="*.*":
- pass
- elif filename.endswith(".java"):
- ConvertCode.GBK_2_UTF8(os.path.join(parent, filename),
- os.path.join(parent, filename))
- if __name__ == "__main__":
- converter = ConvertCode()
- converter.run()
|