123456789101112131415161718192021 |
- #!/usr/bin/env python
- # -*- encoding: utf-8 -*-
- '''
- @Contact : liuyuqi.gov@msn.cn
- @Time : 2023/03/08 23:05:51
- @License : Copyright © 2017-2022 liuyuqi. All Rights Reserved.
- @Desc : b.csv数据预处理,获取前500个热门英语单词,长度小于5
- '''
- import pandas as pd
- if __name__=='__main__':
- with open("data/b.csv", "r", encoding="utf-8") as file:
- res=file.readlines()
- res=[x.strip().lower() for x in res]
- res = pd.Series(res).drop_duplicates()
- data = pd.DataFrame(res, columns=["name"])
- data['strlen'] = data['name'].str.len()
- # data.sort_values(by='strlen', inplace=True)
- data=data[data['strlen'] < 5 ]
- data["name"][:500].to_csv("res3.csv", index=False, header=None)
|