{ "cells": [ { "cell_type": "code", "execution_count": 60, "id": "a8be6839", "metadata": {}, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "import janitor\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from matplotlib.ticker import MaxNLocator\n", "import math\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 61, "id": "211ba466", "metadata": {}, "outputs": [], "source": [ "outdir=\"WESTERN_CH_scope\"\n", "\n", "appln = pd.read_csv(f\"{outdir}/tls_201_scope.csv\")\n", "\n", "appln_title = pd.read_csv(f\"{outdir}/tls_202_scope.csv\")\n", "\n", "pers = pd.read_csv(f\"{outdir}/tls_206_scope.csv\")\n", "\n", "appln_pers = pd.read_csv(f\"{outdir}/tls_207_scope.csv\")\n", "\n", "appln_cpc = pd.read_csv(f\"{outdir}/tls_224_scope.csv\")" ] }, { "cell_type": "code", "execution_count": 62, "id": "f878b151", "metadata": {}, "outputs": [], "source": [ "# workdir_path=r\"CPCTitleList202302\"\n", "# # outfile='wos_extract_complete.csv'\n", "# # with_header=True\n", "# cpc_ids = pd.DataFrame()\n", "# for root, dirs, files in os.walk(workdir_path):\n", "# for filename in files:\n", "# path=os.path.join(root, filename)\n", "# section = pd.read_csv(path, sep='\\t', header=None)\n", "# cpc_ids=pd.concat([cpc_ids,section], ignore_index=True)\n", "# cpc_ids.columns =[\"cpc_id\",\"idk\",\"cpc_name\"]\n", "# cpc_ids = cpc_ids.drop(columns=\"idk\")" ] }, { "cell_type": "code", "execution_count": 63, "id": "95ea20da", "metadata": {}, "outputs": [], "source": [ "parsed = {x: [] for x in ['code', 'title', 'section', 'class', 'subclass', 'group', 'main_group']}\n", "for letter in 'ABCDEFGHY':\n", " file = f'CPC_data/CPCTitleList202302/cpc-section-{letter}_20230201.txt'\n", " with open(file) as f:\n", " for line in f:\n", " vals = line.strip().split('\\t')\n", " if len(vals) == 2:\n", " parsed['code'].append(vals[0])\n", " parsed['title'].append(vals[1])\n", " elif len(vals) == 3:\n", " parsed['code'].append(vals[0])\n", " parsed['title'].append(vals[2])\n", "\n", "\n", "\n", "for i in range(len(parsed['code'])):\n", " code = parsed['code'][i]\n", " main_group = code.split('/')[-1] if \"/\" in code else None\n", " group = code.split('/')[0][4:] if len(code) >= 5 else None\n", " subclass = code[3] if len(code) >= 4 else None\n", " class_ = code[1:3] if len(code) >= 3 else None\n", " section = code[0] if len(code) >= 1 else None\n", " \n", " parsed['main_group'].append(main_group)\n", " parsed['group'].append(group)\n", " parsed['subclass'].append(subclass)\n", " parsed['class'].append(class_)\n", " parsed['section'].append(section)\n", "\n", "cpc_ids2023 = pd.DataFrame.from_dict(parsed)\n", "cpc_ids2023['cpc_version']=2023\n", "cpc_ids2022 = pd.read_csv(\"CPC_data/cpc_titles_2022.csv\")\n", "cpc_ids2022['cpc_version']=2022\n", "cpc_ids = pd.concat([cpc_ids2023,cpc_ids2022], ignore_index=True)\n", "cpc_ids = cpc_ids.rename(columns={\"code\":\"cpc_id\",\"title\":\"cpc_name\"}).drop_duplicates(subset=\"cpc_id\")" ] }, { "cell_type": "code", "execution_count": 63, "id": "907d9c3e", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 64, "id": "1be8971a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "70 cpc_ids not found\n", "0.07344840249724569 % lost\n" ] } ], "source": [ "appln_cpc[\"cpc_id\"] = appln_cpc[\"cpc_class_symbol\"].str.replace(\" \",\"\")\n", "appln_cpc_tax = appln_cpc.merge(cpc_ids, on=\"cpc_id\", how=\"left\")\n", "\n", "print(len(appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique()), \"cpc_ids not found\")\n", "print(len(appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique())/len(appln_cpc_tax[\"cpc_id\"].unique())*100, \"% lost\")" ] }, { "cell_type": "code", "execution_count": 65, "id": "b1274c34", "metadata": {}, "outputs": [], "source": [ "cpc_dict = dict(zip(cpc_ids.cpc_id.str.replace(\" \",\"\"), cpc_ids.cpc_name))\n", "# cpc_dict" ] }, { "cell_type": "code", "execution_count": 66, "id": "2a7e39ee", "metadata": {}, "outputs": [], "source": [ "def cpc_classifier(id_text):\n", " taxonomy = []\n", " iter_text = id_text.replace(\" \",\"\")\n", " for i in range(len(iter_text)+1):\n", " tax_id = iter_text[:i]\n", " tax_name = cpc_dict.get(iter_text[:i])\n", " if tax_name:\n", " taxonomy.append((tax_id,tax_name))\n", " return taxonomy\n", " " ] }, { "cell_type": "code", "execution_count": 67, "id": "e31a013f", "metadata": {}, "outputs": [ { "data": { "text/plain": "[('A', 'HUMAN NECESSITIES'),\n ('A01',\n 'AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTING; TRAPPING; FISHING'),\n ('A01B',\n 'SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS, DETAILS, OR ACCESSORIES OF AGRICULTURAL MACHINES OR IMPLEMENTS, IN GENERAL (making or covering furrows or holes for sowing, planting, or manuring A01C5/00; soil working for engineering purposes E01, E02, E21; {measuring areas for agricultural purposes G01B})'),\n ('A01B1/06',\n 'Hoes; Hand cultivators {(rakes A01D7/00; forks A01D9/00; picks B25D)}'),\n ('A01B1/065', '{powered}')]" }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cpc_classifier(\"A01B1/065\")" ] }, { "cell_type": "code", "execution_count": 68, "id": "f09a616c", "metadata": {}, "outputs": [ { "data": { "text/plain": " cpc_id cpc_name section class \n0 A HUMAN NECESSITIES A None \\\n1 A01 AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI... A 01 \n2 A01B SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS... A 01 \n3 A01B1/00 Hand tools (edge trimmers for lawns A01G3/06 ... A 01 \n4 A01B1/02 Spades; Shovels {(hand-operated dredgers E02F3... A 01 \n\n subclass group main_group cpc_version \n0 None None None 2023 \\\n1 None None None 2023 \n2 B None None 2023 \n3 B 1 00 2023 \n4 B 1 02 2023 \n\n version https://git-lfs.github.com/spec/v1 \n0 NaN \\\n1 NaN \n2 NaN \n3 NaN \n4 NaN \n\n cpc_taxonomy \n0 [(A, HUMAN NECESSITIES)] \n1 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n2 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n3 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n4 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cpc_idcpc_namesectionclasssubclassgroupmain_groupcpc_versionversion https://git-lfs.github.com/spec/v1cpc_taxonomy
0AHUMAN NECESSITIESANoneNoneNoneNone2023NaN[(A, HUMAN NECESSITIES)]
1A01AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI...A01NoneNoneNone2023NaN[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...
2A01BSOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS...A01BNoneNone2023NaN[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...
3A01B1/00Hand tools (edge trimmers for lawns A01G3/06 ...A01B1002023NaN[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...
4A01B1/02Spades; Shovels {(hand-operated dredgers E02F3...A01B1022023NaN[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...
\n
" }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cpc_ids[\"cpc_taxonomy\"] = cpc_ids[\"cpc_id\"].fillna(\"\").map(cpc_classifier)\n", "cpc_ids.head()" ] }, { "cell_type": "code", "execution_count": 69, "id": "f3fa8bf3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "70 cpc_ids not found\n", "0.07344840249724569 % lost\n" ] } ], "source": [ "appln_cpc[\"cpc_id\"] = appln_cpc[\"cpc_class_symbol\"].str.replace(\" \",\"\")\n", "appln_cpc_tax = appln_cpc.merge(cpc_ids, on=\"cpc_id\", how=\"left\")\n", "print(len(appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique()), \"cpc_ids not found\")\n", "print(len(appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique())/len(appln_cpc_tax[\"cpc_id\"].unique())*100, \"% lost\")" ] }, { "cell_type": "code", "execution_count": 70, "id": "58701721", "metadata": {}, "outputs": [], "source": [ "# appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique()" ] }, { "cell_type": "markdown", "id": "ca631acf", "metadata": {}, "source": [ "## 'AI/Big Data' keywords" ] }, { "cell_type": "code", "execution_count": 71, "outputs": [ { "data": { "text/plain": "'neural network|machine learn|deep learn|remote sensing|convolutional neural|internet of things|feature extraction|genetic algorithm|big data|artificial intelligence|data driven|support vector machine|logistic regression not p=|optimization algorithm|principal component analysis|artificial neural network|swarm optimization|regularization|linear regression not p=|optimization algorithm|random forest|cloud computing|reinforcement learning|computer vision|kalman filter|image processing|data mining|evolutionary algorithm|edge computing|supervised learning|computational modeling|pattern recognition|image classification|long short-term memor|robotics|image segmentation|convex optimization|covariance matri|attention mechanism|markov chain|object detection not brain|clustering algorithm|recurrent neural network|data augmentation|transfer learning|adversarial network|decision tree|multi agent system|fuzzy set|convolutional network|image reconstruction|data analytic|smart grid|autoencoder|fuzzy logic|radial basis function|bayesian network|dimensionality reduction|face recognition not brain|gaussian process|anomaly detection|k-nearest neighbor|natural language processing|monte carlo method|large dataset|gradient descent|support vector regression|extreme learning machine|perceptron|model selection|ensemble learning|representation learning|recommender system|target tracking|singular value decomposition|feature learning|smart city|sentiment analy|markov decision process|k-means clustering|independent component analysis|brain computer interface|human-computer interaction|markov chain monte carlo|hierarchical clustering|semantic web|semi-supervised learning|human-robot interact|knowledge graph|speech recognition not brain|ensemble model|fog computing|mapreduce|evolutionary computation|data science|text mining|generative model|active learning|swarm intelligence|multi-task learning|language model|collaborative filtering|backpropagation|machine vision|computer-aided diagnosis|gated recurrent unit|lagrange multiplier|expert system|learning rate|hadoop|markov process|nonlinear optimization|learning system|self-organizing map|smart manufacturing|smart home|few shot learning|few-shot learning|meta-learning|meta learning|adversarial training|zero-shot learning|word embedding|expectation maximization algorithm|stochastic gradient descent|ridge regression|deep belief network|non-negative matrix factorization|affective computing|latent dirichlet allocation|kernel method|kernel learning|feature engineering|variational inference|image representation|manifold learning|adversarial example|knowledge distillation|time series forecast|variational autoencoder|lasso regression|smart energy|dbscan|multi-label classification|intelligent robot|ubiquitous computing|gaussian mixture models|smart technolog|boltzmann machine|smart buildings|predictive analytic|pervasive computing|smart agriculture|capsule network|human-in-the-loop|intelligent agent|ai applications|word vector|transformer model|facial recognition|unstructured data|restricted boltzmann machine|albert|lifelong learning|autonomous agents|chatbot|cholesky decomposition|nosql|nosql|explainable ai|seq2seq|probabilistic graphical model|qr decomposition|unsupervised deep learning|data warehouse|quantum machine learning|continual learning|smart environment|multimodal learning|smart health|artificial immune system|swarm robotics|kernel machine|latent factor model|eigendecomposition|adversarial machine|adversarial machine learning|smart mobility|sequence-to-sequence model|eigen decomposition|adversarial robustness|smart parking|adversarial neural|roberta|bidirectional encoder representations from transformer|locally linear embedding|hebbian learning|one-shot learning|multimodal representation|smart tourism|entity extraction|adaptive moment estimation|ontology learning|topic modeling|relational database'" }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "keywords_oklist_source= r'..\\WOS\\kw_token_ranked_bibliometrics_okset.xlsx'\n", "keyword_df = pd.read_excel(keywords_oklist_source)\n", "keywords = keyword_df[keyword_df[\"u_Priority (done)\"].isin([\"High\",\"Medium\"])][\"kw_token\"].str.replace('\"','').tolist()\n", "keywords = [kw.replace(\"*\",\"\").replace(\"$\",\"\").lower() for kw in keywords if (\"?\" not in kw and len(kw)>3)]\n", "keywords = \"|\".join([kw for kw in keywords if kw not in [\"classifier\",\"clustering\",\"loss function\",'classification']]+[\"relational database\"])\n", "keywords" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 72, "outputs": [ { "data": { "text/plain": " cpc_id cpc_name \n12725 A61B1/000096 {using artificial intelligence} \\\n13764 A61B5/7264 {Classification of physiological signals or da... \n13897 A61B6/52 {Devices using data or image processing specia... \n14016 A61B8/52 {Devices using data or image processing specia... \n15252 A61B2018/0069 {using fuzzy logic} \n... ... ... \n250685 Y10S707/99946 Object-oriented database structure network \n250686 Y10S707/99947 Object-oriented database structure reference \n250687 Y10S707/99948 Application of database or data structure, e.g... \n250688 Y10S707/99951 File or database maintenance \n250703 Y10S715/968 interface for database querying and retrieval \n\n section class subclass group main_group cpc_version \n12725 A 61 B 1 000096 2023 \\\n13764 A 61 B 5 7264 2023 \n13897 A 61 B 6 52 2023 \n14016 A 61 B 8 52 2023 \n15252 A 61 B 2018 0069 2023 \n... ... ... ... ... ... ... \n250685 Y 10 S 707 99946 2023 \n250686 Y 10 S 707 99947 2023 \n250687 Y 10 S 707 99948 2023 \n250688 Y 10 S 707 99951 2023 \n250703 Y 10 S 715 968 2023 \n\n version https://git-lfs.github.com/spec/v1 \n12725 NaN \\\n13764 NaN \n13897 NaN \n14016 NaN \n15252 NaN \n... ... \n250685 NaN \n250686 NaN \n250687 NaN \n250688 NaN \n250703 NaN \n\n cpc_taxonomy \n12725 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n13764 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n13897 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n14016 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n15252 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n... ... \n250685 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n250686 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n250687 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n250688 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n250703 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n\n[317 rows x 10 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cpc_idcpc_namesectionclasssubclassgroupmain_groupcpc_versionversion https://git-lfs.github.com/spec/v1cpc_taxonomy
12725A61B1/000096{using artificial intelligence}A61B10000962023NaN[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...
13764A61B5/7264{Classification of physiological signals or da...A61B572642023NaN[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...
13897A61B6/52{Devices using data or image processing specia...A61B6522023NaN[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...
14016A61B8/52{Devices using data or image processing specia...A61B8522023NaN[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...
15252A61B2018/0069{using fuzzy logic}A61B201800692023NaN[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...
.................................
250685Y10S707/99946Object-oriented database structure networkY10S707999462023NaN[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...
250686Y10S707/99947Object-oriented database structure referenceY10S707999472023NaN[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...
250687Y10S707/99948Application of database or data structure, e.g...Y10S707999482023NaN[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...
250688Y10S707/99951File or database maintenanceY10S707999512023NaN[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...
250703Y10S715/968interface for database querying and retrievalY10S7159682023NaN[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...
\n

317 rows × 10 columns

\n
" }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#dummy search\n", "scope_df = cpc_ids[cpc_ids[\"cpc_name\"].str.lower().str.contains(\"machine learn|neural network|deep learn|deep network|artificial intel*| big data|database|recommender system|computer vision|image processing|language model|language processing|fuzzy logic|principal component|image classification|video classification\", regex=True, na=False)]\n", "scope_df" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 73, "outputs": [ { "data": { "text/plain": " cpc_id cpc_name \n12725 A61B1/000096 {using artificial intelligence} \\\n13746 A61B5/72 {Signal processing specially adapted for physi... \n13764 A61B5/7264 {Classification of physiological signals or da... \n13897 A61B6/52 {Devices using data or image processing specia... \n14016 A61B8/52 {Devices using data or image processing specia... \n... ... ... \n246159 Y10S128/924 using artificial intelligence \n246160 Y10S128/925 Neural network \n248454 Y10S323/909 Remote sensing \n250570 Y10S706/00 Data processing: artificial intelligence \n250571 Y10S706/90 Fuzzy logic \n\n section class subclass group main_group cpc_version \n12725 A 61 B 1 000096 2023 \\\n13746 A 61 B 5 72 2023 \n13764 A 61 B 5 7264 2023 \n13897 A 61 B 6 52 2023 \n14016 A 61 B 8 52 2023 \n... ... ... ... ... ... ... \n246159 Y 10 S 128 924 2023 \n246160 Y 10 S 128 925 2023 \n248454 Y 10 S 323 909 2023 \n250570 Y 10 S 706 00 2023 \n250571 Y 10 S 706 90 2023 \n\n version https://git-lfs.github.com/spec/v1 \n12725 NaN \\\n13746 NaN \n13764 NaN \n13897 NaN \n14016 NaN \n... ... \n246159 NaN \n246160 NaN \n248454 NaN \n250570 NaN \n250571 NaN \n\n cpc_taxonomy \n12725 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n13746 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n13764 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n13897 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n14016 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n... ... \n246159 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n246160 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n248454 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n250570 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n250571 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n\n[358 rows x 10 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cpc_idcpc_namesectionclasssubclassgroupmain_groupcpc_versionversion https://git-lfs.github.com/spec/v1cpc_taxonomy
12725A61B1/000096{using artificial intelligence}A61B10000962023NaN[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...
13746A61B5/72{Signal processing specially adapted for physi...A61B5722023NaN[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...
13764A61B5/7264{Classification of physiological signals or da...A61B572642023NaN[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...
13897A61B6/52{Devices using data or image processing specia...A61B6522023NaN[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...
14016A61B8/52{Devices using data or image processing specia...A61B8522023NaN[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...
.................................
246159Y10S128/924using artificial intelligenceY10S1289242023NaN[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...
246160Y10S128/925Neural networkY10S1289252023NaN[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...
248454Y10S323/909Remote sensingY10S3239092023NaN[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...
250570Y10S706/00Data processing: artificial intelligenceY10S706002023NaN[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...
250571Y10S706/90Fuzzy logicY10S706902023NaN[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...
\n

358 rows × 10 columns

\n
" }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scope_df = cpc_ids[cpc_ids[\"cpc_name\"].str.lower().str.contains(keywords, regex=True, na=False)]\n", "scope_df" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 74, "id": "6c3baa5b", "metadata": {}, "outputs": [], "source": [ "scope_ids = scope_df[\"cpc_id\"].unique()\n", "cpc_ids[\"data_scope\"] = cpc_ids[\"cpc_id\"].isin(scope_ids)\n", "cpc_ids.to_csv(f\"{outdir}/cpc_defs.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 75, "outputs": [ { "data": { "text/plain": "'WESTERN_CH_scope'" }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "outdir" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 76, "outputs": [ { "data": { "text/plain": " cpc_id cpc_name \n0 A HUMAN NECESSITIES \\\n1 A01 AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI... \n2 A01B SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS... \n3 A01B1/00 Hand tools (edge trimmers for lawns A01G3/06 ... \n4 A01B1/02 Spades; Shovels {(hand-operated dredgers E02F3... \n... ... ... \n260486 Y10T483/1873 Indexing matrix \n260487 Y10T483/1882 Rotary disc \n260488 Y10T483/1891 Chain or belt \n260489 Y10T483/19 Miscellaneous \n260490 NaN NaN \n\n section class subclass group main_group cpc_version \n0 A None None None None 2023 \\\n1 A 01 None None None 2023 \n2 A 01 B None None 2023 \n3 A 01 B 1 00 2023 \n4 A 01 B 1 02 2023 \n... ... ... ... ... ... ... \n260486 Y 10 T 483 1873 2023 \n260487 Y 10 T 483 1882 2023 \n260488 Y 10 T 483 1891 2023 \n260489 Y 10 T 483 19 2023 \n260490 NaN NaN NaN NaN NaN 2022 \n\n version https://git-lfs.github.com/spec/v1 \n0 NaN \\\n1 NaN \n2 NaN \n3 NaN \n4 NaN \n... ... \n260486 NaN \n260487 NaN \n260488 NaN \n260489 NaN \n260490 oid sha256:f138d6bdf2939ba576b96b633d81366123b... \n\n cpc_taxonomy data_scope \n0 [(A, HUMAN NECESSITIES)] False \n1 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... False \n2 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... False \n3 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... False \n4 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... False \n... ... ... \n260486 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... False \n260487 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... False \n260488 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... False \n260489 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... False \n260490 [] False \n\n[260491 rows x 11 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cpc_idcpc_namesectionclasssubclassgroupmain_groupcpc_versionversion https://git-lfs.github.com/spec/v1cpc_taxonomydata_scope
0AHUMAN NECESSITIESANoneNoneNoneNone2023NaN[(A, HUMAN NECESSITIES)]False
1A01AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI...A01NoneNoneNone2023NaN[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...False
2A01BSOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS...A01BNoneNone2023NaN[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...False
3A01B1/00Hand tools (edge trimmers for lawns A01G3/06 ...A01B1002023NaN[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...False
4A01B1/02Spades; Shovels {(hand-operated dredgers E02F3...A01B1022023NaN[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...False
....................................
260486Y10T483/1873Indexing matrixY10T48318732023NaN[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...False
260487Y10T483/1882Rotary discY10T48318822023NaN[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...False
260488Y10T483/1891Chain or beltY10T48318912023NaN[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...False
260489Y10T483/19MiscellaneousY10T483192023NaN[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...False
260490NaNNaNNaNNaNNaNNaNNaN2022oid sha256:f138d6bdf2939ba576b96b633d81366123b...[]False
\n

260491 rows × 11 columns

\n
" }, "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cpc_ids" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 13, "id": "2e8368b4", "metadata": {}, "outputs": [ { "data": { "text/plain": " appln_id appln_auth appln_nr appln_kind appln_filing_date \n0 340657036 EP 12000117 A 2012-01-09 \\\n1 340982410 EP 12151915 A 2012-01-20 \n2 341078960 EP 12700310 A 2012-01-11 \n3 341078962 EP 12700311 A 2012-01-11 \n4 341127772 EP 12700372 A 2012-01-02 \n\n appln_filing_year appln_nr_original ipr_type receiving_office \n0 2012 12000117 PI \\\n1 2012 12151915 PI \n2 2012 12700310 PI \n3 2012 12700311 PI \n4 2012 12700372 PI \n\n internat_appln_id ... earliest_pat_publn_id granted docdb_family_id \n0 0 ... 407623142 Y 45507394 \\\n1 0 ... 365158710 Y 45531220 \n2 340778427 ... 413564969 Y 45491582 \n3 340778431 ... 413564970 Y 45491583 \n4 340460188 ... 421840120 Y 45495923 \n\n inpadoc_family_id docdb_family_size nb_citing_docdb_fam nb_applicants \n0 340657036 3 6 1 \\\n1 340982410 2 16 2 \n2 340778427 3 2 1 \n3 340778431 3 3 1 \n4 340460188 4 8 1 \n\n nb_inventors appln_title_lg \n0 2 en \\\n1 6 en \n2 1 en \n3 1 en \n4 2 en \n\n appln_title \n0 Rotating membrane filter disc apparatus \n1 Heating-Cooling-Capacity measurement controlli... \n2 TRANSMISSION DEVICE \n3 TRANSMISSION DEVICE \n4 POWER CONTROL IN A WIRELESS COMMUNICATION SYST... \n\n[5 rows x 28 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
appln_idappln_authappln_nrappln_kindappln_filing_dateappln_filing_yearappln_nr_originalipr_typereceiving_officeinternat_appln_id...earliest_pat_publn_idgranteddocdb_family_idinpadoc_family_iddocdb_family_sizenb_citing_docdb_famnb_applicantsnb_inventorsappln_title_lgappln_title
0340657036EP12000117A2012-01-09201212000117PI0...407623142Y455073943406570363612enRotating membrane filter disc apparatus
1340982410EP12151915A2012-01-20201212151915PI0...365158710Y4553122034098241021626enHeating-Cooling-Capacity measurement controlli...
2341078960EP12700310A2012-01-11201212700310PI340778427...413564969Y454915823407784273211enTRANSMISSION DEVICE
3341078962EP12700311A2012-01-11201212700311PI340778431...413564970Y454915833407784313311enTRANSMISSION DEVICE
4341127772EP12700372A2012-01-02201212700372PI340460188...421840120Y454959233404601884812enPOWER CONTROL IN A WIRELESS COMMUNICATION SYST...
\n

5 rows × 28 columns

\n
" }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "appln_data = appln.merge(appln_title, on=\"appln_id\")\n", "appln_data.head()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" } }, "nbformat": 4, "nbformat_minor": 5 }