{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd \n", "import numpy as np \n", "import matplotlib.pyplot as plt\n", "import os,sys,re,time" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "tags": [] }, "outputs": [], "source": [ "df =pd.read_csv(\"../data/res.csv\",header=None)\n", "df=df.drop([0,1,2],axis=1).reset_index()" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "# 删除重复的\n", "df=df.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " index 3 4 5 6 7 8\n", "0 0 2015-07-01 科目一第01考试点申通 3 0 0 0\n", "1 1 2015-07-01 科目一第05考试点马陆 83 0 0 0\n", "2 2 2015-07-01 科目一第09考试点刘行 16 0 0 0\n", "3 3 2015-07-01 科目一第50考试点银都 0 0 0 2\n", "4 4 2015-07-01 科目一第16考试点福赐 3 0 0 0\n" ] } ], "source": [ "df[3]= pd.to_datetime(df[3],format='%Y%m%d')\n", "# df[5]=df[5].replace(\"--\",0).apply(pd.to_numeric)\n", "df[5]=df[5].replace(\"--\",0).astype(int) # 科目一\n", "df[6]=df[6].replace(\"--\",0).astype(int)\n", "df[7]=df[7].replace(\"--\",0).astype(int)\n", "df[8]=df[8].replace(\"--\",0).astype(int)\n", "print(df.head())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 聚合函数统计\n", "\n", "可以看到 科目1,2,3,4等,上海总共有249各考点。 \n", "\n", "前十考点:\n", "第01考点市中心, 第08考点大众 , 第07考点马陆 , 第02考点安技 , 第22考点恒通 , 第04考点小昆山, 第27考点和悦 , 第17考点邮佳 , 第11考点刘行 , 三分所\n", "\n" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 4 index 3 5 6 7 8\n", "61 第01考点市中心 2107 2107 2107 2107 2107 2107\n", "87 第08考点大众 1372 1372 1372 1372 1372 1372\n", "84 第07考点马陆 1288 1288 1288 1288 1288 1288\n", "65 第02考点安技 1233 1233 1233 1233 1233 1233\n", "130 第22考点恒通 1229 1229 1229 1229 1229 1229\n", "71 第04考点小昆山 1148 1148 1148 1148 1148 1148\n", "146 第27考点和悦 1140 1140 1140 1140 1140 1140\n", "115 第17考点邮佳 1089 1089 1089 1089 1089 1089\n", "96 第11考点刘行 1049 1049 1049 1049 1049 1049\n", "3 三分所 1047 1047 1047 1047 1047 1047\n" ] } ], "source": [ "# 统计多少个考点\n", "# df[4].apply(lambda x:x[:]).tolist()\n", "df[4]=df[4].str.replace('科目一','')\n", "df[4]=df[4].str.replace('科目二','')\n", "df[4]=df[4].str.replace('科目三','')\n", "df_line = df.groupby(4).count().reset_index().sort_values(by=[5],ascending=False)\n", "print(df_line.head(10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## " ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " index 4 5 6 7 8\n", "3 \n", "2017-07-31 1618 1618 1618 1618 1618 1618\n", "2018-08-31 903 903 903 903 903 903\n", "2016-08-31 847 847 847 847 847 847\n", "2017-04-01 744 744 744 744 744 744\n", "2017-04-05 732 732 732 732 732 732\n", "... ... ... ... ... ... ...\n", "2019-08-18 1 1 1 1 1 1\n", "2019-09-14 1 1 1 1 1 1\n", "2020-08-09 1 1 1 1 1 1\n", "2019-10-02 1 1 1 1 1 1\n", "2020-08-30 1 1 1 1 1 1\n", "\n", "[969 rows x 6 columns]\n" ] } ], "source": [ "print(df.groupby(3).count().sort_values(by=[4],ascending=False)) # select date,sum(1),sum(2),sum(3) from car group by date" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.6.0 64-bit ('root': conda)", "language": "python", "name": "python36064bitrootconda12dcd85ef9c147fdbdf4c10492696076" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 4 }