{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "a8be6839",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import janitor\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from matplotlib.ticker import MaxNLocator\n",
    "import math\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "211ba466",
   "metadata": {},
   "outputs": [],
   "source": [
    "outdir=\"EU_CH_scope\"\n",
    "\n",
    "appln = pd.read_csv(f\"{outdir}/tls_201_scope.csv\")\n",
    "\n",
    "appln_title = pd.read_csv(f\"{outdir}/tls_202_scope.csv\")\n",
    "\n",
    "pers = pd.read_csv(f\"{outdir}/tls_206_scope.csv\")\n",
    "\n",
    "appln_pers = pd.read_csv(f\"{outdir}/tls_207_scope.csv\")\n",
    "\n",
    "appln_cpc = pd.read_csv(f\"{outdir}/tls_224_scope.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "f878b151",
   "metadata": {},
   "outputs": [],
   "source": [
    "# workdir_path=r\"CPCTitleList202302\"\n",
    "# # outfile='wos_extract_complete.csv'\n",
    "# # with_header=True\n",
    "# cpc_ids = pd.DataFrame()\n",
    "# for root, dirs, files in os.walk(workdir_path):\n",
    "#     for filename in files:\n",
    "#         path=os.path.join(root, filename)\n",
    "#         section = pd.read_csv(path, sep='\\t', header=None)\n",
    "#         cpc_ids=pd.concat([cpc_ids,section], ignore_index=True)\n",
    "# cpc_ids.columns =[\"cpc_id\",\"idk\",\"cpc_name\"]\n",
    "# cpc_ids = cpc_ids.drop(columns=\"idk\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "id": "95ea20da",
   "metadata": {},
   "outputs": [],
   "source": [
    "parsed = {x: [] for x in ['code', 'title', 'section', 'class', 'subclass', 'group', 'main_group']}\n",
    "for letter in 'ABCDEFGHY':\n",
    "    file = f'CPCTitleList202302/cpc-section-{letter}_20230201.txt'\n",
    "    with open(file) as f:\n",
    "        for line in f:\n",
    "            vals = line.strip().split('\\t')\n",
    "            if len(vals) == 2:\n",
    "                parsed['code'].append(vals[0])\n",
    "                parsed['title'].append(vals[1])\n",
    "            elif len(vals) == 3:\n",
    "                parsed['code'].append(vals[0])\n",
    "                parsed['title'].append(vals[2])\n",
    "\n",
    "\n",
    "\n",
    "for i in range(len(parsed['code'])):\n",
    "    code = parsed['code'][i]\n",
    "    main_group = code.split('/')[-1] if \"/\" in code else None\n",
    "    group = code.split('/')[0][4:] if len(code) >= 5 else None\n",
    "    subclass = code[3] if len(code) >= 4 else None\n",
    "    class_ = code[1:3] if len(code) >= 3 else None\n",
    "    section = code[0] if len(code) >= 1 else None\n",
    "    \n",
    "    parsed['main_group'].append(main_group)\n",
    "    parsed['group'].append(group)\n",
    "    parsed['subclass'].append(subclass)\n",
    "    parsed['class'].append(class_)\n",
    "    parsed['section'].append(section)\n",
    "\n",
    "cpc_ids2023 = pd.DataFrame.from_dict(parsed)\n",
    "cpc_ids2023['cpc_version']=2023\n",
    "cpc_ids2022 = pd.read_csv(\"CPC_data/cpc_titles_2022.csv\")\n",
    "cpc_ids2022['cpc_version']=2022\n",
    "cpc_ids = pd.concat([cpc_ids2023,cpc_ids2022], ignore_index=True)\n",
    "cpc_ids = cpc_ids.rename(columns={\"code\":\"cpc_id\",\"title\":\"cpc_name\"}).drop_duplicates(subset=\"cpc_id\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "id": "907d9c3e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cpc_id</th>\n",
       "      <th>cpc_name</th>\n",
       "      <th>section</th>\n",
       "      <th>class</th>\n",
       "      <th>subclass</th>\n",
       "      <th>group</th>\n",
       "      <th>main_group</th>\n",
       "      <th>cpc_version</th>\n",
       "      <th>cpc_taxonomy</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>HUMAN NECESSITIES</td>\n",
       "      <td>A</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>2023</td>\n",
       "      <td>[(A, HUMAN NECESSITIES)]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A01</td>\n",
       "      <td>AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI...</td>\n",
       "      <td>A</td>\n",
       "      <td>01</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>2023</td>\n",
       "      <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A01B</td>\n",
       "      <td>SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS...</td>\n",
       "      <td>A</td>\n",
       "      <td>01</td>\n",
       "      <td>B</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>2023</td>\n",
       "      <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A01B1/00</td>\n",
       "      <td>Hand tools (edge trimmers for lawns A01G3/06  ...</td>\n",
       "      <td>A</td>\n",
       "      <td>01</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>00</td>\n",
       "      <td>2023</td>\n",
       "      <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A01B1/02</td>\n",
       "      <td>Spades; Shovels {(hand-operated dredgers E02F3...</td>\n",
       "      <td>A</td>\n",
       "      <td>01</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>02</td>\n",
       "      <td>2023</td>\n",
       "      <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     cpc_id                                           cpc_name section class   \n",
       "0         A                                  HUMAN NECESSITIES       A  None  \\\n",
       "1       A01  AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI...       A    01   \n",
       "2      A01B  SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS...       A    01   \n",
       "3  A01B1/00  Hand tools (edge trimmers for lawns A01G3/06  ...       A    01   \n",
       "4  A01B1/02  Spades; Shovels {(hand-operated dredgers E02F3...       A    01   \n",
       "\n",
       "  subclass group main_group  cpc_version   \n",
       "0     None  None       None         2023  \\\n",
       "1     None  None       None         2023   \n",
       "2        B  None       None         2023   \n",
       "3        B     1         00         2023   \n",
       "4        B     1         02         2023   \n",
       "\n",
       "                                        cpc_taxonomy  \n",
       "0                           [(A, HUMAN NECESSITIES)]  \n",
       "1  [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...  \n",
       "2  [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...  \n",
       "3  [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...  \n",
       "4  [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...  "
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "id": "1be8971a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "40 cpc_ids not found\n",
      "0.11297201118422912 % lost\n"
     ]
    }
   ],
   "source": [
    "appln_cpc[\"cpc_id\"] = appln_cpc[\"cpc_class_symbol\"].str.replace(\" \",\"\")\n",
    "appln_cpc_tax = appln_cpc.merge(cpc_ids, on=\"cpc_id\", how=\"left\")\n",
    "\n",
    "print(len(appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique()), \"cpc_ids not found\")\n",
    "print(len(appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique())/len(appln_cpc_tax[\"cpc_id\"].unique())*100, \"% lost\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "id": "b1274c34",
   "metadata": {},
   "outputs": [],
   "source": [
    "cpc_dict = dict(zip(cpc_ids.cpc_id.str.replace(\" \",\"\"), cpc_ids.cpc_name))\n",
    "# cpc_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "id": "2a7e39ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "def cpc_classifier(id_text):\n",
    "    taxonomy = []\n",
    "    iter_text = id_text.replace(\" \",\"\")\n",
    "    for i in range(len(iter_text)+1):\n",
    "        tax_id = iter_text[:i]\n",
    "        tax_name = cpc_dict.get(iter_text[:i])\n",
    "        if tax_name:\n",
    "            taxonomy.append((tax_id,tax_name))\n",
    "    return taxonomy\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "id": "e31a013f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('A', 'HUMAN NECESSITIES'),\n",
       " ('A01',\n",
       "  'AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTING; TRAPPING; FISHING'),\n",
       " ('A01B',\n",
       "  'SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS, DETAILS, OR ACCESSORIES OF AGRICULTURAL MACHINES OR IMPLEMENTS, IN GENERAL (making or covering furrows or holes for sowing, planting, or manuring A01C5/00; soil working for engineering purposes E01, E02, E21; {measuring areas for agricultural purposes G01B})'),\n",
       " ('A01B1/06',\n",
       "  'Hoes; Hand cultivators {(rakes A01D7/00; forks A01D9/00; picks B25D)}'),\n",
       " ('A01B1/065', '{powered}')]"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cpc_classifier(\"A01B1/065\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "id": "f09a616c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cpc_id</th>\n",
       "      <th>cpc_name</th>\n",
       "      <th>section</th>\n",
       "      <th>class</th>\n",
       "      <th>subclass</th>\n",
       "      <th>group</th>\n",
       "      <th>main_group</th>\n",
       "      <th>cpc_version</th>\n",
       "      <th>cpc_taxonomy</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>HUMAN NECESSITIES</td>\n",
       "      <td>A</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>2023</td>\n",
       "      <td>[(A, HUMAN NECESSITIES)]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A01</td>\n",
       "      <td>AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI...</td>\n",
       "      <td>A</td>\n",
       "      <td>01</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>2023</td>\n",
       "      <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A01B</td>\n",
       "      <td>SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS...</td>\n",
       "      <td>A</td>\n",
       "      <td>01</td>\n",
       "      <td>B</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>2023</td>\n",
       "      <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A01B1/00</td>\n",
       "      <td>Hand tools (edge trimmers for lawns A01G3/06  ...</td>\n",
       "      <td>A</td>\n",
       "      <td>01</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>00</td>\n",
       "      <td>2023</td>\n",
       "      <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A01B1/02</td>\n",
       "      <td>Spades; Shovels {(hand-operated dredgers E02F3...</td>\n",
       "      <td>A</td>\n",
       "      <td>01</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>02</td>\n",
       "      <td>2023</td>\n",
       "      <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     cpc_id                                           cpc_name section class   \n",
       "0         A                                  HUMAN NECESSITIES       A  None  \\\n",
       "1       A01  AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI...       A    01   \n",
       "2      A01B  SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS...       A    01   \n",
       "3  A01B1/00  Hand tools (edge trimmers for lawns A01G3/06  ...       A    01   \n",
       "4  A01B1/02  Spades; Shovels {(hand-operated dredgers E02F3...       A    01   \n",
       "\n",
       "  subclass group main_group  cpc_version   \n",
       "0     None  None       None         2023  \\\n",
       "1     None  None       None         2023   \n",
       "2        B  None       None         2023   \n",
       "3        B     1         00         2023   \n",
       "4        B     1         02         2023   \n",
       "\n",
       "                                        cpc_taxonomy  \n",
       "0                           [(A, HUMAN NECESSITIES)]  \n",
       "1  [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...  \n",
       "2  [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...  \n",
       "3  [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...  \n",
       "4  [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...  "
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cpc_ids[\"cpc_taxonomy\"] = cpc_ids[\"cpc_id\"].map(cpc_classifier)\n",
    "cpc_ids.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "id": "f3fa8bf3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "40 cpc_ids not found\n",
      "0.11297201118422912 % lost\n"
     ]
    }
   ],
   "source": [
    "appln_cpc[\"cpc_id\"] = appln_cpc[\"cpc_class_symbol\"].str.replace(\" \",\"\")\n",
    "appln_cpc_tax = appln_cpc.merge(cpc_ids, on=\"cpc_id\", how=\"left\")\n",
    "print(len(appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique()), \"cpc_ids not found\")\n",
    "print(len(appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique())/len(appln_cpc_tax[\"cpc_id\"].unique())*100, \"% lost\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "id": "58701721",
   "metadata": {},
   "outputs": [],
   "source": [
    "# appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ca631acf",
   "metadata": {},
   "source": [
    "## 'AI' keywords"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "6c3baa5b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cpc_id</th>\n",
       "      <th>cpc_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>12725</th>\n",
       "      <td>A61B1/000096</td>\n",
       "      <td>{using artificial intelligence}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13764</th>\n",
       "      <td>A61B5/7264</td>\n",
       "      <td>{Classification of physiological signals or da...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45143</th>\n",
       "      <td>B23K31/006</td>\n",
       "      <td>{relating to using of neural networks}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47746</th>\n",
       "      <td>B25J9/161</td>\n",
       "      <td>{Hardware, e.g. neural networks, fuzzy logic, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53935</th>\n",
       "      <td>B29C66/965</td>\n",
       "      <td>{using artificial neural networks}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>240426</th>\n",
       "      <td>H04Q2213/343</td>\n",
       "      <td>Neural network</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>240673</th>\n",
       "      <td>H04R25/507</td>\n",
       "      <td>{implemented by neural network or fuzzy logic}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>246159</th>\n",
       "      <td>Y10S128/924</td>\n",
       "      <td>using artificial intelligence</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>246160</th>\n",
       "      <td>Y10S128/925</td>\n",
       "      <td>Neural network</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>250570</th>\n",
       "      <td>Y10S706/00</td>\n",
       "      <td>Data processing: artificial intelligence</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>105 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              cpc_id                                           cpc_name\n",
       "12725   A61B1/000096                    {using artificial intelligence}\n",
       "13764     A61B5/7264  {Classification of physiological signals or da...\n",
       "45143     B23K31/006             {relating to using of neural networks}\n",
       "47746      B25J9/161  {Hardware, e.g. neural networks, fuzzy logic, ...\n",
       "53935     B29C66/965                 {using artificial neural networks}\n",
       "...              ...                                                ...\n",
       "240426  H04Q2213/343                                     Neural network\n",
       "240673    H04R25/507     {implemented by neural network or fuzzy logic}\n",
       "246159   Y10S128/924                      using artificial intelligence\n",
       "246160   Y10S128/925                                     Neural network\n",
       "250570    Y10S706/00           Data processing: artificial intelligence\n",
       "\n",
       "[105 rows x 2 columns]"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cpc_ids[cpc_ids[\"cpc_name\"].str.lower().str.contains(\"machine learn|neural network|deep learn|deep network|artificial intelligence\", regex=True)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "id": "2e8368b4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>appln_id</th>\n",
       "      <th>appln_auth</th>\n",
       "      <th>appln_nr</th>\n",
       "      <th>appln_kind</th>\n",
       "      <th>appln_filing_date</th>\n",
       "      <th>appln_filing_year</th>\n",
       "      <th>appln_nr_original</th>\n",
       "      <th>ipr_type</th>\n",
       "      <th>receiving_office</th>\n",
       "      <th>internat_appln_id</th>\n",
       "      <th>...</th>\n",
       "      <th>earliest_pat_publn_id</th>\n",
       "      <th>granted</th>\n",
       "      <th>docdb_family_id</th>\n",
       "      <th>inpadoc_family_id</th>\n",
       "      <th>docdb_family_size</th>\n",
       "      <th>nb_citing_docdb_fam</th>\n",
       "      <th>nb_applicants</th>\n",
       "      <th>nb_inventors</th>\n",
       "      <th>appln_title_lg</th>\n",
       "      <th>appln_title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>340657036</td>\n",
       "      <td>EP</td>\n",
       "      <td>12000117</td>\n",
       "      <td>A</td>\n",
       "      <td>2012-01-09</td>\n",
       "      <td>2012</td>\n",
       "      <td>12000117</td>\n",
       "      <td>PI</td>\n",
       "      <td></td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>407623142</td>\n",
       "      <td>Y</td>\n",
       "      <td>45507394</td>\n",
       "      <td>340657036</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>en</td>\n",
       "      <td>Rotating membrane filter disc apparatus</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>340982410</td>\n",
       "      <td>EP</td>\n",
       "      <td>12151915</td>\n",
       "      <td>A</td>\n",
       "      <td>2012-01-20</td>\n",
       "      <td>2012</td>\n",
       "      <td>12151915</td>\n",
       "      <td>PI</td>\n",
       "      <td></td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>365158710</td>\n",
       "      <td>Y</td>\n",
       "      <td>45531220</td>\n",
       "      <td>340982410</td>\n",
       "      <td>2</td>\n",
       "      <td>16</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>en</td>\n",
       "      <td>Heating-Cooling-Capacity measurement controlli...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>341078960</td>\n",
       "      <td>EP</td>\n",
       "      <td>12700310</td>\n",
       "      <td>A</td>\n",
       "      <td>2012-01-11</td>\n",
       "      <td>2012</td>\n",
       "      <td>12700310</td>\n",
       "      <td>PI</td>\n",
       "      <td></td>\n",
       "      <td>340778427</td>\n",
       "      <td>...</td>\n",
       "      <td>413564969</td>\n",
       "      <td>Y</td>\n",
       "      <td>45491582</td>\n",
       "      <td>340778427</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "      <td>TRANSMISSION DEVICE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>341078962</td>\n",
       "      <td>EP</td>\n",
       "      <td>12700311</td>\n",
       "      <td>A</td>\n",
       "      <td>2012-01-11</td>\n",
       "      <td>2012</td>\n",
       "      <td>12700311</td>\n",
       "      <td>PI</td>\n",
       "      <td></td>\n",
       "      <td>340778431</td>\n",
       "      <td>...</td>\n",
       "      <td>413564970</td>\n",
       "      <td>Y</td>\n",
       "      <td>45491583</td>\n",
       "      <td>340778431</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "      <td>TRANSMISSION DEVICE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>341127772</td>\n",
       "      <td>EP</td>\n",
       "      <td>12700372</td>\n",
       "      <td>A</td>\n",
       "      <td>2012-01-02</td>\n",
       "      <td>2012</td>\n",
       "      <td>12700372</td>\n",
       "      <td>PI</td>\n",
       "      <td></td>\n",
       "      <td>340460188</td>\n",
       "      <td>...</td>\n",
       "      <td>421840120</td>\n",
       "      <td>Y</td>\n",
       "      <td>45495923</td>\n",
       "      <td>340460188</td>\n",
       "      <td>4</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>en</td>\n",
       "      <td>POWER CONTROL IN A WIRELESS COMMUNICATION SYST...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    appln_id appln_auth  appln_nr appln_kind appln_filing_date   \n",
       "0  340657036         EP  12000117         A         2012-01-09  \\\n",
       "1  340982410         EP  12151915         A         2012-01-20   \n",
       "2  341078960         EP  12700310         A         2012-01-11   \n",
       "3  341078962         EP  12700311         A         2012-01-11   \n",
       "4  341127772         EP  12700372         A         2012-01-02   \n",
       "\n",
       "   appln_filing_year appln_nr_original ipr_type receiving_office   \n",
       "0               2012          12000117       PI                   \\\n",
       "1               2012          12151915       PI                    \n",
       "2               2012          12700310       PI                    \n",
       "3               2012          12700311       PI                    \n",
       "4               2012          12700372       PI                    \n",
       "\n",
       "   internat_appln_id  ... earliest_pat_publn_id granted docdb_family_id   \n",
       "0                  0  ...             407623142       Y        45507394  \\\n",
       "1                  0  ...             365158710       Y        45531220   \n",
       "2          340778427  ...             413564969       Y        45491582   \n",
       "3          340778431  ...             413564970       Y        45491583   \n",
       "4          340460188  ...             421840120       Y        45495923   \n",
       "\n",
       "  inpadoc_family_id  docdb_family_size  nb_citing_docdb_fam nb_applicants   \n",
       "0         340657036                  3                    6             1  \\\n",
       "1         340982410                  2                   16             2   \n",
       "2         340778427                  3                    2             1   \n",
       "3         340778431                  3                    3             1   \n",
       "4         340460188                  4                    8             1   \n",
       "\n",
       "   nb_inventors  appln_title_lg   \n",
       "0             2              en  \\\n",
       "1             6              en   \n",
       "2             1              en   \n",
       "3             1              en   \n",
       "4             2              en   \n",
       "\n",
       "                                         appln_title  \n",
       "0            Rotating membrane filter disc apparatus  \n",
       "1  Heating-Cooling-Capacity measurement controlli...  \n",
       "2                                TRANSMISSION DEVICE  \n",
       "3                                TRANSMISSION DEVICE  \n",
       "4  POWER CONTROL IN A WIRELESS COMMUNICATION SYST...  \n",
       "\n",
       "[5 rows x 28 columns]"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "appln_data = appln.merge(appln_title, on=\"appln_id\")\n",
    "appln_data.head()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}