#!/usr/bin/env python # -*- encoding: utf-8 -*- ''' @Contact : liuyuqi.gov@msn.cn @Time : 2023/03/08 23:05:51 @License : Copyright © 2017-2022 liuyuqi. All Rights Reserved. @Desc : b.csv数据预处理,获取前500个热门英语单词,长度小于5 ''' import pandas as pd if __name__=='__main__': with open("data/b.csv", "r", encoding="utf-8") as file: res=file.readlines() res=[x.strip().lower() for x in res] res = pd.Series(res).drop_duplicates() data = pd.DataFrame(res, columns=["name"]) data['strlen'] = data['name'].str.len() # data.sort_values(by='strlen', inplace=True) data=data[data['strlen'] < 5 ] data["name"][:500].to_csv("res3.csv", index=False, header=None)