{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 上市公司分析报告\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 数据合并整理\n", "\n", "地区分类数据和行业分类数据整理为一个表,排序保存" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd \n", "\n", "df_sse_diqu = pd.read_csv('sse_diqu.csv') # 地区名称\t股票代码\t名称\n", "df_sse_hangye = pd.read_csv('sse_hangye.csv') # 行业名称\t行业代码\t股票代码\t名称\n", "\n", "# 合并为一个表: 行业名称\t行业代码\t地区名称\t股票代码\t名称\n", "# sort 地区名称 行业名称\n", "df_sse_diqu_hangye = pd.merge(df_sse_diqu, df_sse_hangye, on='股票代码', how='left')\n", "df_sse_diqu_hangye = df_sse_diqu_hangye.sort_values(by=['地区名称', '行业名称'])\n", "df_sse_diqu_hangye['名称'] = df_sse_diqu_hangye['名称_x']\n", "df_sse_diqu_hangye = df_sse_diqu_hangye.drop(['名称_x', '名称_y'], axis=1)\n", "df_sse_diqu_hangye = df_sse_diqu_hangye[['地区名称', '行业名称', '股票代码', '名称']]\n", "df_sse_diqu_hangye.to_csv('sse_diqu_hangye.csv', index=False)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "对 2018 年的数据进去重" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os,sys,re\n", "\n", "years = [ 2018, 2019, 2020, 2021, 2022, 2023 ]\n", "for year in years:\n", " file_path = f'股东大会公告链接_{year}.xlsx'\n", " if os.path.exists(file_path):\n", " df_2018 = pd.read_excel(file_path)\n", " # 根据 df_2018['年报链接'] 列中的值进行去重\n", " df_2018.drop_duplicates(subset=['年报链接'],keep='first',inplace=True)\n", " df_2018.to_excel(f'股东大会公告链接_{year}_rep.xlsx',index=False)\n", "\n" ] } ], "metadata": { "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 2 }