ADD: PATSTAT CPC taxonomy and AI/ML/DB related groupsearch based on WOS keywords

main
radvanyimome 2 years ago
parent fe67cff886
commit 444158b9f0

File diff suppressed because one or more lines are too long

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 60,
"id": "a8be6839",
"metadata": {},
"outputs": [],
@ -19,7 +19,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 61,
"id": "211ba466",
"metadata": {},
"outputs": [],
@ -39,7 +39,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 62,
"id": "f878b151",
"metadata": {},
"outputs": [],
@ -59,7 +59,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 63,
"id": "95ea20da",
"metadata": {},
"outputs": [],
@ -103,7 +103,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 63,
"id": "907d9c3e",
"metadata": {},
"outputs": [],
@ -111,7 +111,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 64,
"id": "1be8971a",
"metadata": {},
"outputs": [
@ -134,7 +134,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 65,
"id": "b1274c34",
"metadata": {},
"outputs": [],
@ -145,7 +145,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 66,
"id": "2a7e39ee",
"metadata": {},
"outputs": [],
@ -164,7 +164,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 67,
"id": "e31a013f",
"metadata": {},
"outputs": [
@ -172,7 +172,7 @@
"data": {
"text/plain": "[('A', 'HUMAN NECESSITIES'),\n ('A01',\n 'AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTING; TRAPPING; FISHING'),\n ('A01B',\n 'SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS, DETAILS, OR ACCESSORIES OF AGRICULTURAL MACHINES OR IMPLEMENTS, IN GENERAL (making or covering furrows or holes for sowing, planting, or manuring A01C5/00; soil working for engineering purposes E01, E02, E21; {measuring areas for agricultural purposes G01B})'),\n ('A01B1/06',\n 'Hoes; Hand cultivators {(rakes A01D7/00; forks A01D9/00; picks B25D)}'),\n ('A01B1/065', '{powered}')]"
},
"execution_count": 10,
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
@ -183,7 +183,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 68,
"id": "f09a616c",
"metadata": {},
"outputs": [
@ -192,7 +192,7 @@
"text/plain": " cpc_id cpc_name section class \n0 A HUMAN NECESSITIES A None \\\n1 A01 AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI... A 01 \n2 A01B SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS... A 01 \n3 A01B1/00 Hand tools (edge trimmers for lawns A01G3/06 ... A 01 \n4 A01B1/02 Spades; Shovels {(hand-operated dredgers E02F3... A 01 \n\n subclass group main_group cpc_version \n0 None None None 2023 \\\n1 None None None 2023 \n2 B None None 2023 \n3 B 1 00 2023 \n4 B 1 02 2023 \n\n version https://git-lfs.github.com/spec/v1 \n0 NaN \\\n1 NaN \n2 NaN \n3 NaN \n4 NaN \n\n cpc_taxonomy \n0 [(A, HUMAN NECESSITIES)] \n1 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n2 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n3 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n4 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>cpc_id</th>\n <th>cpc_name</th>\n <th>section</th>\n <th>class</th>\n <th>subclass</th>\n <th>group</th>\n <th>main_group</th>\n <th>cpc_version</th>\n <th>version https://git-lfs.github.com/spec/v1</th>\n <th>cpc_taxonomy</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>A</td>\n <td>HUMAN NECESSITIES</td>\n <td>A</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES)]</td>\n </tr>\n <tr>\n <th>1</th>\n <td>A01</td>\n <td>AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI...</td>\n <td>A</td>\n <td>01</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n </tr>\n <tr>\n <th>2</th>\n <td>A01B</td>\n <td>SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS...</td>\n <td>A</td>\n <td>01</td>\n <td>B</td>\n <td>None</td>\n <td>None</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n </tr>\n <tr>\n <th>3</th>\n <td>A01B1/00</td>\n <td>Hand tools (edge trimmers for lawns A01G3/06 ...</td>\n <td>A</td>\n <td>01</td>\n <td>B</td>\n <td>1</td>\n <td>00</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n </tr>\n <tr>\n <th>4</th>\n <td>A01B1/02</td>\n <td>Spades; Shovels {(hand-operated dredgers E02F3...</td>\n <td>A</td>\n <td>01</td>\n <td>B</td>\n <td>1</td>\n <td>02</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 12,
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
@ -204,7 +204,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 69,
"id": "f3fa8bf3",
"metadata": {},
"outputs": [
@ -226,7 +226,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 70,
"id": "58701721",
"metadata": {},
"outputs": [],
@ -244,13 +244,13 @@
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": 71,
"outputs": [
{
"data": {
"text/plain": "'neural network|machine learn|deep learn|remote sensing|convolutional neural|internet of things|feature extraction|genetic algorithm|big data|artificial intelligence|data driven|support vector machine|logistic regression not p=|optimization algorithm|principal component analysis|artificial neural network|swarm optimization|regularization|linear regression not p=|optimization algorithm|random forest|cloud computing|reinforcement learning|computer vision|kalman filter|image processing|data mining|evolutionary algorithm|edge computing|supervised learning|computational modeling|pattern recognition|image classification|long short-term memor|robotics|image segmentation|convex optimization|covariance matri|attention mechanism|markov chain|object detection not brain|clustering algorithm|recurrent neural network|data augmentation|transfer learning|adversarial network|decision tree|multi agent system|fuzzy set|convolutional network|image reconstruction|data analytic|smart grid|autoencoder|fuzzy logic|radial basis function|bayesian network|dimensionality reduction|face recognition not brain|gaussian process|anomaly detection|k-nearest neighbor|natural language processing|monte carlo method|large dataset|gradient descent|support vector regression|extreme learning machine|perceptron|model selection|ensemble learning|representation learning|recommender system|target tracking|singular value decomposition|feature learning|smart city|sentiment analy|markov decision process|k-means clustering|independent component analysis|brain computer interface|human-computer interaction|markov chain monte carlo|hierarchical clustering|semantic web|semi-supervised learning|human-robot interact|knowledge graph|speech recognition not brain|ensemble model|fog computing|mapreduce|evolutionary computation|data science|text mining|generative model|active learning|swarm intelligence|multi-task learning|language model|collaborative filtering|backpropagation|machine vision|computer-aided diagnosis|gated recurrent unit|lagrange multiplier|expert system|learning rate|hadoop|markov process|nonlinear optimization|learning system|self-organizing map|smart manufacturing|smart home|few shot learning|few-shot learning|meta-learning|meta learning|adversarial training|zero-shot learning|word embedding|expectation maximization algorithm|stochastic gradient descent|ridge regression|deep belief network|non-negative matrix factorization|affective computing|latent dirichlet allocation|kernel method|kernel learning|feature engineering|variational inference|image representation|manifold learning|adversarial example|knowledge distillation|time series forecast|variational autoencoder|lasso regression|smart energy|dbscan|multi-label classification|intelligent robot|ubiquitous computing|gaussian mixture models|smart technolog|boltzmann machine|smart buildings|predictive analytic|pervasive computing|smart agriculture|capsule network|human-in-the-loop|intelligent agent|ai applications|word vector|transformer model|facial recognition|unstructured data|restricted boltzmann machine|albert|lifelong learning|autonomous agents|chatbot|cholesky decomposition|nosql|nosql|explainable ai|seq2seq|probabilistic graphical model|qr decomposition|unsupervised deep learning|data warehouse|quantum machine learning|continual learning|smart environment|multimodal learning|smart health|artificial immune system|swarm robotics|kernel machine|latent factor model|eigendecomposition|adversarial machine|adversarial machine learning|smart mobility|sequence-to-sequence model|eigen decomposition|adversarial robustness|smart parking|adversarial neural|roberta|bidirectional encoder representations from transformer|locally linear embedding|hebbian learning|one-shot learning|multimodal representation|smart tourism|entity extraction|adaptive moment estimation|ontology learning|topic modeling|relational database'"
},
"execution_count": 49,
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
@ -269,19 +269,29 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 72,
"outputs": [
{
"data": {
"text/plain": " cpc_id cpc_name \n12725 A61B1/000096 {using artificial intelligence} \\\n13764 A61B5/7264 {Classification of physiological signals or da... \n13897 A61B6/52 {Devices using data or image processing specia... \n14016 A61B8/52 {Devices using data or image processing specia... \n15252 A61B2018/0069 {using fuzzy logic} \n... ... ... \n250685 Y10S707/99946 Object-oriented database structure network \n250686 Y10S707/99947 Object-oriented database structure reference \n250687 Y10S707/99948 Application of database or data structure, e.g... \n250688 Y10S707/99951 File or database maintenance \n250703 Y10S715/968 interface for database querying and retrieval \n\n section class subclass group main_group cpc_version \n12725 A 61 B 1 000096 2023 \\\n13764 A 61 B 5 7264 2023 \n13897 A 61 B 6 52 2023 \n14016 A 61 B 8 52 2023 \n15252 A 61 B 2018 0069 2023 \n... ... ... ... ... ... ... \n250685 Y 10 S 707 99946 2023 \n250686 Y 10 S 707 99947 2023 \n250687 Y 10 S 707 99948 2023 \n250688 Y 10 S 707 99951 2023 \n250703 Y 10 S 715 968 2023 \n\n version https://git-lfs.github.com/spec/v1 \n12725 NaN \\\n13764 NaN \n13897 NaN \n14016 NaN \n15252 NaN \n... ... \n250685 NaN \n250686 NaN \n250687 NaN \n250688 NaN \n250703 NaN \n\n cpc_taxonomy \n12725 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n13764 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n13897 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n14016 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n15252 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n... ... \n250685 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n250686 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n250687 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n250688 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n250703 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n\n[317 rows x 10 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>cpc_id</th>\n <th>cpc_name</th>\n <th>section</th>\n <th>class</th>\n <th>subclass</th>\n <th>group</th>\n <th>main_group</th>\n <th>cpc_version</th>\n <th>version https://git-lfs.github.com/spec/v1</th>\n <th>cpc_taxonomy</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>12725</th>\n <td>A61B1/000096</td>\n <td>{using artificial intelligence}</td>\n <td>A</td>\n <td>61</td>\n <td>B</td>\n <td>1</td>\n <td>000096</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...</td>\n </tr>\n <tr>\n <th>13764</th>\n <td>A61B5/7264</td>\n <td>{Classification of physiological signals or da...</td>\n <td>A</td>\n <td>61</td>\n <td>B</td>\n <td>5</td>\n <td>7264</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...</td>\n </tr>\n <tr>\n <th>13897</th>\n <td>A61B6/52</td>\n <td>{Devices using data or image processing specia...</td>\n <td>A</td>\n <td>61</td>\n <td>B</td>\n <td>6</td>\n <td>52</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...</td>\n </tr>\n <tr>\n <th>14016</th>\n <td>A61B8/52</td>\n <td>{Devices using data or image processing specia...</td>\n <td>A</td>\n <td>61</td>\n <td>B</td>\n <td>8</td>\n <td>52</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...</td>\n </tr>\n <tr>\n <th>15252</th>\n <td>A61B2018/0069</td>\n <td>{using fuzzy logic}</td>\n <td>A</td>\n <td>61</td>\n <td>B</td>\n <td>2018</td>\n <td>0069</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>250685</th>\n <td>Y10S707/99946</td>\n <td>Object-oriented database structure network</td>\n <td>Y</td>\n <td>10</td>\n <td>S</td>\n <td>707</td>\n <td>99946</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...</td>\n </tr>\n <tr>\n <th>250686</th>\n <td>Y10S707/99947</td>\n <td>Object-oriented database structure reference</td>\n <td>Y</td>\n <td>10</td>\n <td>S</td>\n <td>707</td>\n <td>99947</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...</td>\n </tr>\n <tr>\n <th>250687</th>\n <td>Y10S707/99948</td>\n <td>Application of database or data structure, e.g...</td>\n <td>Y</td>\n <td>10</td>\n <td>S</td>\n <td>707</td>\n <td>99948</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...</td>\n </tr>\n <tr>\n <th>250688</th>\n <td>Y10S707/99951</td>\n <td>File or database maintenance</td>\n <td>Y</td>\n <td>10</td>\n <td>S</td>\n <td>707</td>\n <td>99951</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...</td>\n </tr>\n <tr>\n <th>250703</th>\n <td>Y10S715/968</td>\n <td>interface for database querying and retrieval</td>\n <td>Y</td>\n <td>10</td>\n <td>S</td>\n <td>715</td>\n <td>968</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...</td>\n </tr>\n </tbody>\n</table>\n<p>317 rows × 10 columns</p>\n</div>"
},
"execution_count": 54,
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#dummy search\n",
"scope_df = cpc_ids[cpc_ids[\"cpc_name\"].str.lower().str.contains(\"machine learn|neural network|deep learn|deep network|artificial intel*| big data|database|recommender system|computer vision|image processing|language model|language processing|fuzzy logic|principal component|image classification|video classification\", regex=True, na=False)]\n",
"scope_df"
],
@ -291,7 +301,29 @@
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": 73,
"outputs": [
{
"data": {
"text/plain": " cpc_id cpc_name \n12725 A61B1/000096 {using artificial intelligence} \\\n13746 A61B5/72 {Signal processing specially adapted for physi... \n13764 A61B5/7264 {Classification of physiological signals or da... \n13897 A61B6/52 {Devices using data or image processing specia... \n14016 A61B8/52 {Devices using data or image processing specia... \n... ... ... \n246159 Y10S128/924 using artificial intelligence \n246160 Y10S128/925 Neural network \n248454 Y10S323/909 Remote sensing \n250570 Y10S706/00 Data processing: artificial intelligence \n250571 Y10S706/90 Fuzzy logic \n\n section class subclass group main_group cpc_version \n12725 A 61 B 1 000096 2023 \\\n13746 A 61 B 5 72 2023 \n13764 A 61 B 5 7264 2023 \n13897 A 61 B 6 52 2023 \n14016 A 61 B 8 52 2023 \n... ... ... ... ... ... ... \n246159 Y 10 S 128 924 2023 \n246160 Y 10 S 128 925 2023 \n248454 Y 10 S 323 909 2023 \n250570 Y 10 S 706 00 2023 \n250571 Y 10 S 706 90 2023 \n\n version https://git-lfs.github.com/spec/v1 \n12725 NaN \\\n13746 NaN \n13764 NaN \n13897 NaN \n14016 NaN \n... ... \n246159 NaN \n246160 NaN \n248454 NaN \n250570 NaN \n250571 NaN \n\n cpc_taxonomy \n12725 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n13746 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n13764 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n13897 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n14016 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n... ... \n246159 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n246160 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n248454 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n250570 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n250571 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n\n[358 rows x 10 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>cpc_id</th>\n <th>cpc_name</th>\n <th>section</th>\n <th>class</th>\n <th>subclass</th>\n <th>group</th>\n <th>main_group</th>\n <th>cpc_version</th>\n <th>version https://git-lfs.github.com/spec/v1</th>\n <th>cpc_taxonomy</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>12725</th>\n <td>A61B1/000096</td>\n <td>{using artificial intelligence}</td>\n <td>A</td>\n <td>61</td>\n <td>B</td>\n <td>1</td>\n <td>000096</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...</td>\n </tr>\n <tr>\n <th>13746</th>\n <td>A61B5/72</td>\n <td>{Signal processing specially adapted for physi...</td>\n <td>A</td>\n <td>61</td>\n <td>B</td>\n <td>5</td>\n <td>72</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...</td>\n </tr>\n <tr>\n <th>13764</th>\n <td>A61B5/7264</td>\n <td>{Classification of physiological signals or da...</td>\n <td>A</td>\n <td>61</td>\n <td>B</td>\n <td>5</td>\n <td>7264</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...</td>\n </tr>\n <tr>\n <th>13897</th>\n <td>A61B6/52</td>\n <td>{Devices using data or image processing specia...</td>\n <td>A</td>\n <td>61</td>\n <td>B</td>\n <td>6</td>\n <td>52</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...</td>\n </tr>\n <tr>\n <th>14016</th>\n <td>A61B8/52</td>\n <td>{Devices using data or image processing specia...</td>\n <td>A</td>\n <td>61</td>\n <td>B</td>\n <td>8</td>\n <td>52</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE...</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>246159</th>\n <td>Y10S128/924</td>\n <td>using artificial intelligence</td>\n <td>Y</td>\n <td>10</td>\n <td>S</td>\n <td>128</td>\n <td>924</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...</td>\n </tr>\n <tr>\n <th>246160</th>\n <td>Y10S128/925</td>\n <td>Neural network</td>\n <td>Y</td>\n <td>10</td>\n <td>S</td>\n <td>128</td>\n <td>925</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...</td>\n </tr>\n <tr>\n <th>248454</th>\n <td>Y10S323/909</td>\n <td>Remote sensing</td>\n <td>Y</td>\n <td>10</td>\n <td>S</td>\n <td>323</td>\n <td>909</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...</td>\n </tr>\n <tr>\n <th>250570</th>\n <td>Y10S706/00</td>\n <td>Data processing: artificial intelligence</td>\n <td>Y</td>\n <td>10</td>\n <td>S</td>\n <td>706</td>\n <td>00</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...</td>\n </tr>\n <tr>\n <th>250571</th>\n <td>Y10S706/90</td>\n <td>Fuzzy logic</td>\n <td>Y</td>\n <td>10</td>\n <td>S</td>\n <td>706</td>\n <td>90</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...</td>\n </tr>\n </tbody>\n</table>\n<p>358 rows × 10 columns</p>\n</div>"
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scope_df = cpc_ids[cpc_ids[\"cpc_name\"].str.lower().str.contains(keywords, regex=True, na=False)]\n",
"scope_df"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 74,
"id": "6c3baa5b",
"metadata": {},
"outputs": [],
@ -303,13 +335,13 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": 75,
"outputs": [
{
"data": {
"text/plain": "'WESTERN_CH_scope'"
},
"execution_count": 59,
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
@ -323,14 +355,14 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 76,
"outputs": [
{
"data": {
"text/plain": " cpc_id cpc_name \n0 A HUMAN NECESSITIES \\\n1 A01 AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI... \n2 A01B SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS... \n3 A01B1/00 Hand tools (edge trimmers for lawns A01G3/06 ... \n4 A01B1/02 Spades; Shovels {(hand-operated dredgers E02F3... \n... ... ... \n260486 Y10T483/1873 Indexing matrix \n260487 Y10T483/1882 Rotary disc \n260488 Y10T483/1891 Chain or belt \n260489 Y10T483/19 Miscellaneous \n260490 NaN NaN \n\n section class subclass group main_group cpc_version \n0 A None None None None 2023 \\\n1 A 01 None None None 2023 \n2 A 01 B None None 2023 \n3 A 01 B 1 00 2023 \n4 A 01 B 1 02 2023 \n... ... ... ... ... ... ... \n260486 Y 10 T 483 1873 2023 \n260487 Y 10 T 483 1882 2023 \n260488 Y 10 T 483 1891 2023 \n260489 Y 10 T 483 19 2023 \n260490 NaN NaN NaN NaN NaN 2022 \n\n version https://git-lfs.github.com/spec/v1 \n0 NaN \\\n1 NaN \n2 NaN \n3 NaN \n4 NaN \n... ... \n260486 NaN \n260487 NaN \n260488 NaN \n260489 NaN \n260490 oid sha256:f138d6bdf2939ba576b96b633d81366123b... \n\n cpc_taxonomy data_scope \n0 [(A, HUMAN NECESSITIES)] False \n1 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... False \n2 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... False \n3 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... False \n4 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... False \n... ... ... \n260486 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... False \n260487 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... False \n260488 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... False \n260489 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... False \n260490 [] False \n\n[260491 rows x 11 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>cpc_id</th>\n <th>cpc_name</th>\n <th>section</th>\n <th>class</th>\n <th>subclass</th>\n <th>group</th>\n <th>main_group</th>\n <th>cpc_version</th>\n <th>version https://git-lfs.github.com/spec/v1</th>\n <th>cpc_taxonomy</th>\n <th>data_scope</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>A</td>\n <td>HUMAN NECESSITIES</td>\n <td>A</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES)]</td>\n <td>False</td>\n </tr>\n <tr>\n <th>1</th>\n <td>A01</td>\n <td>AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI...</td>\n <td>A</td>\n <td>01</td>\n <td>None</td>\n <td>None</td>\n <td>None</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n <td>False</td>\n </tr>\n <tr>\n <th>2</th>\n <td>A01B</td>\n <td>SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS...</td>\n <td>A</td>\n <td>01</td>\n <td>B</td>\n <td>None</td>\n <td>None</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n <td>False</td>\n </tr>\n <tr>\n <th>3</th>\n <td>A01B1/00</td>\n <td>Hand tools (edge trimmers for lawns A01G3/06 ...</td>\n <td>A</td>\n <td>01</td>\n <td>B</td>\n <td>1</td>\n <td>00</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n <td>False</td>\n </tr>\n <tr>\n <th>4</th>\n <td>A01B1/02</td>\n <td>Spades; Shovels {(hand-operated dredgers E02F3...</td>\n <td>A</td>\n <td>01</td>\n <td>B</td>\n <td>1</td>\n <td>02</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n <td>False</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>260486</th>\n <td>Y10T483/1873</td>\n <td>Indexing matrix</td>\n <td>Y</td>\n <td>10</td>\n <td>T</td>\n <td>483</td>\n <td>1873</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...</td>\n <td>False</td>\n </tr>\n <tr>\n <th>260487</th>\n <td>Y10T483/1882</td>\n <td>Rotary disc</td>\n <td>Y</td>\n <td>10</td>\n <td>T</td>\n <td>483</td>\n <td>1882</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...</td>\n <td>False</td>\n </tr>\n <tr>\n <th>260488</th>\n <td>Y10T483/1891</td>\n <td>Chain or belt</td>\n <td>Y</td>\n <td>10</td>\n <td>T</td>\n <td>483</td>\n <td>1891</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...</td>\n <td>False</td>\n </tr>\n <tr>\n <th>260489</th>\n <td>Y10T483/19</td>\n <td>Miscellaneous</td>\n <td>Y</td>\n <td>10</td>\n <td>T</td>\n <td>483</td>\n <td>19</td>\n <td>2023</td>\n <td>NaN</td>\n <td>[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE...</td>\n <td>False</td>\n </tr>\n <tr>\n <th>260490</th>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>2022</td>\n <td>oid sha256:f138d6bdf2939ba576b96b633d81366123b...</td>\n <td>[]</td>\n <td>False</td>\n </tr>\n </tbody>\n</table>\n<p>260491 rows × 11 columns</p>\n</div>"
},
"execution_count": 56,
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}

Loading…
Cancel
Save