Browse Source

继续优化代码

liuyuqi-dellpc 6 years ago
parent
commit
02714c6e9c
3 changed files with 46 additions and 24 deletions
  1. 39 20
      code/data_preview.py
  2. 1 0
      code/read_file.py
  3. 6 4
      code/read_file2.py

+ 39 - 20
code/data_preview.py

@@ -6,17 +6,32 @@
 @File :data_preview.py
 @File :data_preview.py
 '''
 '''
 
 
-# 后台做图,不需要GUI
-# %matplotlib inline
+# 后台做图,不需要GUI需要在头部第一行加入下面两行代码
+# %matplotlib inline   jupyter中加入这一行
 import matplotlib
 import matplotlib
 matplotlib.use('Agg')
 matplotlib.use('Agg')
+
 # 数据预览
 # 数据预览
-import numpy as np,pandas as pd
+import pandas as pd
 import matplotlib.pyplot as plt
 import matplotlib.pyplot as plt
+from configparser import ConfigParser
+
+# step1: 数据参数初始化
+
+cf = ConfigParser()
+config_path = "../conf/config.ini"
+section_name = "data_file_name"
+cf.read(config_path)
+
+app_interference = cf.get(section_name, "app_interference")
+app_resources = cf.get(section_name, "app_resources")
+instance_deploy = cf.get(section_name, "instance_deploy")
+machine_resources = cf.get(section_name, "machine_resources")
 
 
 def for_df1():
 def for_df1():
     # 应用app表: 应用id/cpu占用量/内存占用/磁盘占用/P/M/PM等指标
     # 应用app表: 应用id/cpu占用量/内存占用/磁盘占用/P/M/PM等指标
-    df1=pd.read_csv("../data/scheduling_preliminary_app_resources_20180606.csv", header=None,names=list(["appid", "cpu", "mem", "disk", "P", "M", "PM"]))
+    df1 = pd.read_csv(app_resources, header=None,
+                      names=list(["appid", "cpu", "mem", "disk", "P", "M", "PM"]))
     print(df1.dtypes)
     print(df1.dtypes)
     # appid    object
     # appid    object
     # cpu      object
     # cpu      object
@@ -30,19 +45,20 @@ def for_df1():
     # [5 rows x 7 columns]
     # [5 rows x 7 columns]
     # print(df1.head())
     # print(df1.head())
     # app_3 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0....... 0 0
     # app_3 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0.5 | 0....... 0 0
-    tmp=df1["cpu"].str.split('|',expand=True).astype('float')
+    tmp = df1["cpu"].str.split('|', expand=True).astype('float')
     # [5 rows x 98 columns]
     # [5 rows x 98 columns]
-    df1["cpu"]=tmp.T.mean().T #转置,求均值,再转置回来,这样求得一行的均值。
+    df1["cpu"] = tmp.T.mean().T  # 转置,求均值,再转置回来,这样求得一行的均值。
 
 
-    tmp=df1["mem"].str.split('|',expand=True).astype('float')
+    tmp = df1["mem"].str.split('|', expand=True).astype('float')
     # [5 rows x 98 columns]
     # [5 rows x 98 columns]
-    df1["mem"]=tmp.T.mean().T #转置,求均值,再转置回来,这样求得一行的均值。
+    df1["mem"] = tmp.T.mean().T  # 转置,求均值,再转置回来,这样求得一行的均值。
     print(df1.head())
     print(df1.head())
-    print("总共应用:",df1["appid"].unique().shape)
+    print("总共应用:", df1["appid"].unique().shape)
+
 
 
 def for_df2():
 def for_df2():
     # 主机表 :宿主机id/ cpu规格/mem规格/disk规格/P上限/M上限/PM上限
     # 主机表 :宿主机id/ cpu规格/mem规格/disk规格/P上限/M上限/PM上限
-    df2=pd.read_csv("../data/scheduling_preliminary_machine_resources_20180606.csv", header=None,names=list(
+    df2 = pd.read_csv(machine_resources, header=None, names=list(
         ["machineid", "cpu", "mem", "disk", "P", "M", "PM"]))
         ["machineid", "cpu", "mem", "disk", "P", "M", "PM"]))
     # df2 = pd.DataFrame(pd.read_csv("../data/scheduling_preliminary_machine_resources_20180606.csv", header=None),columns=list(["machineid", "cpu", "mem", "disk", "P", "M", "PM"]))
     # df2 = pd.DataFrame(pd.read_csv("../data/scheduling_preliminary_machine_resources_20180606.csv", header=None),columns=list(["machineid", "cpu", "mem", "disk", "P", "M", "PM"]))
     print(df2.dtypes)
     print(df2.dtypes)
@@ -57,30 +73,32 @@ def for_df2():
     # (6000, 7)
     # (6000, 7)
     print(df2.head())
     print(df2.head())
     # machine_3   32   64   600  7  3   7
     # machine_3   32   64   600  7  3   7
-    print("总共主机:",df2["machineid"].unique().shape)
+    print("总共主机:", df2["machineid"].unique().shape)
     # 6000
     # 6000
 
 
     # 这里主机主要就两类:
     # 这里主机主要就两类:
     # machine_1	32	64	600  	7	3	7    数量:3000
     # machine_1	32	64	600  	7	3	7    数量:3000
     # machine_2	92	288	1024	7	7	9   数量:3000
     # machine_2	92	288	1024	7	7	9   数量:3000
 
 
+
 def for_df3():
 def for_df3():
     # 主机machine/实例instance/应用app 关系表
     # 主机machine/实例instance/应用app 关系表
-    df3=pd.read_csv("../data/scheduling_preliminary_instance_deploy_20180606.csv", header=None,names=list(["instanceid", "appid", "machineid"]))
+    df3 = pd.read_csv(instance_deploy, header=None,
+                      names=list(["instanceid", "appid", "machineid"]))
     print(df3.dtypes)
     print(df3.dtypes)
-    print("df数据大小:",df3.shape)
-    print("instance唯一数量:",df3["instanceid"].unique().shape)
+    print("df数据大小:", df3.shape)
+    print("instance唯一数量:", df3["instanceid"].unique().shape)
     # print(df2["instanceid"])
     # print(df2["instanceid"])
-    print("总共实例:",df3["instanceid"].unique().shape)
-
+    print("总共实例:", df3["instanceid"].unique().shape)
 
 
 
 
 def for_df4():
 def for_df4():
     # 主机和实例表。部署appid1的insterference最多可以部署n个appid2
     # 主机和实例表。部署appid1的insterference最多可以部署n个appid2
-    df=pd.read_csv("../data/scheduling_preliminary_app_interference_20180606.csv",header=None,names=list(["appid1","appid2","max_interference"]))
+    df = pd.read_csv(app_interference, header=None,
+                     names=list(["appid1", "appid2", "max_interference"]))
     # 查看数据类型
     # 查看数据类型
     # print(df.dtypes)
     # print(df.dtypes)
-    print("df数据大小:",df.shape)
+    print("df数据大小:", df.shape)
 
 
     # 查看头尾部数据
     # 查看头尾部数据
     # app_8361  app_2163  0
     # app_8361  app_2163  0
@@ -103,9 +121,10 @@ def for_df4():
     # 第三列
     # 第三列
 
 
     # 描述性统计
     # 描述性统计
-    print("数据预览:",df.describe())
+    print("数据预览:", df.describe())
 
 
     plt.plot(df["max_interference"])
     plt.plot(df["max_interference"])
     plt.savefig("../submit/fig1.png")
     plt.savefig("../submit/fig1.png")
 
 
-for_df4()
+
+for_df4()

+ 1 - 0
code/read_file.py

@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 '''
 '''
+使用 csv读取数据
 @Auther :liuyuqi.gov@msn.cn
 @Auther :liuyuqi.gov@msn.cn
 @Time :2018/7/4 16:46
 @Time :2018/7/4 16:46
 @File :read_file.py
 @File :read_file.py

+ 6 - 4
code/read_file2.py

@@ -1,15 +1,16 @@
 #!/usr/bin/env python
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 '''
 '''
+使用pandas 读取数据
 @Auther :liuyuqi.gov@msn.cn
 @Auther :liuyuqi.gov@msn.cn
 @Time :2018/7/4 17:15
 @Time :2018/7/4 17:15
 @File :read_file2.py
 @File :read_file2.py
 '''
 '''
 
 
-import pandas as pd,numpy as np
-import os,sys
 from  configparser import ConfigParser
 from  configparser import ConfigParser
 
 
+import pandas as pd
+
 config_path = "../conf/config.ini"
 config_path = "../conf/config.ini"
 section_name = "data_file_name"
 section_name = "data_file_name"
 cf = ConfigParser()
 cf = ConfigParser()
@@ -19,5 +20,6 @@ cf.read(config_path)
 # instance_deploy
 # instance_deploy
 # machine_resources
 # machine_resources
 
 
-df=pd.read_csv(cf.get(section_name, "app_interference"),encoding="utf-8")
-print(df)
+df = pd.read_csv(cf.get(section_name, "app_interference"), encoding="utf-8")
+print("数据类型:", df.dtypes)
+print("数据大小:", df.shape)