instance.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. '''
  4. 按照app对instance分类,存储一个新的instance.csv文件,后面添加了cpu,mem,disk,P,M,PM等几列
  5. @Auther :liuyuqi.gov@msn.cn
  6. @Time :2018/7/6 16:13
  7. @File :instance.py
  8. '''
  9. from configparser import ConfigParser
  10. import pandas as pd
  11. cf = ConfigParser()
  12. config_path = "../conf/config.ini"
  13. section_name = "data_file_name"
  14. cf.read(config_path)
  15. app_interference = cf.get(section_name, "app_interference")
  16. app = cf.get(section_name, "app")
  17. instance_deploy = cf.get(section_name, "instance_deploy")
  18. machine_resources = cf.get(section_name, "machine_resources")
  19. # app
  20. df1 = pd.read_csv(app, encoding="utf-8")
  21. # instance
  22. df3 = pd.read_csv(instance_deploy, header=None,
  23. names=list(["instanceid", "appid", "machineid"]))
  24. # instance分类统计
  25. group1 = df3.groupby("appid").count()
  26. print(type(group1))
  27. # print(group1["instanceid"].sort_values(ascending=False))
  28. # plt.plot(group1["instanceid"].sort_values(ascending=False))
  29. # plt.savefig("../submit/group1.jpg")
  30. # 找到每个instance消耗的disk
  31. df3["cpu"] = None
  32. df3["disk"] = None
  33. df3["mem"] = None
  34. df3["P"] = None
  35. df3["M"] = None
  36. df3["PM"] = None
  37. for i in range(0, int(cf.get("table_size", "instance_size"))):
  38. # df1[df1["appid"] == df3["appid"][i]]["disk"]返回一个pd.Series对象(列表),其实只有一个值,需要选定第一个即可
  39. df3["mem"][i] = df1[df1["appid"] == df3["appid"][i]]["mem_avg"].values[0]
  40. df3["cpu"][i] = df1[df1["appid"] == df3["appid"][i]]["cpu_avg"].values[0]
  41. df3["disk"][i] = df1[df1["appid"] == df3["appid"][i]]["disk"].values[0]
  42. df3["P"][i] = df1[df1["appid"] == df3["appid"][i]]["P"].values[0]
  43. df3["M"][i] = df1[df1["appid"] == df3["appid"][i]]["M"].values[0]
  44. df3["PM"][i] = df1[df1["appid"] == df3["appid"][i]]["PM"].values[0]
  45. # ascending=False 降序
  46. df3 = df3.sort_values(ascending=False, by="disk")
  47. df3.to_csv("../data/instance.csv")