diff --git a/.gitignore b/.gitignore index 435d880..0cf5551 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -/PATSTAT/EU_CH_scope/cpc_defs.csv /misc_code/ /PATSTAT/appln_data.xlsx /PATSTAT/person_data.xlsx diff --git a/PATSTAT/WESTERN_CH_scope/cpc_defs.csv b/PATSTAT/WESTERN_CH_scope/cpc_defs.csv new file mode 100644 index 0000000..f6dc1fe --- /dev/null +++ b/PATSTAT/WESTERN_CH_scope/cpc_defs.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab0f4acc10a622f8a162a9f1f2aaf39b06799f65feab44412aed2dc2d6f27cf8 +size 159305379 diff --git a/PATSTAT/WESTERN_CH_scope/scope_cpc_defs.csv b/PATSTAT/WESTERN_CH_scope/scope_cpc_defs.csv new file mode 100644 index 0000000..a062eab --- /dev/null +++ b/PATSTAT/WESTERN_CH_scope/scope_cpc_defs.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76479394378a76774399904f0aa8104a2fdf0d2ec39d22a928a0c07eb80e6e0c +size 209293 diff --git a/PATSTAT/patstat_cpc_parse.ipynb b/PATSTAT/patstat_cpc_parse.ipynb index 7302714..057f87c 100644 --- a/PATSTAT/patstat_cpc_parse.ipynb +++ b/PATSTAT/patstat_cpc_parse.ipynb @@ -244,30 +244,99 @@ }, { "cell_type": "code", - "execution_count": 22, - "id": "6c3baa5b", - "metadata": {}, + "execution_count": 49, + "outputs": [ + { + "data": { + "text/plain": "'neural network|machine learn|deep learn|remote sensing|convolutional neural|internet of things|feature extraction|genetic algorithm|big data|artificial intelligence|data driven|support vector machine|logistic regression not p=|optimization algorithm|principal component analysis|artificial neural network|swarm optimization|regularization|linear regression not p=|optimization algorithm|random forest|cloud computing|reinforcement learning|computer vision|kalman filter|image processing|data mining|evolutionary algorithm|edge computing|supervised learning|computational modeling|pattern recognition|image classification|long short-term memor|robotics|image segmentation|convex optimization|covariance matri|attention mechanism|markov chain|object detection not brain|clustering algorithm|recurrent neural network|data augmentation|transfer learning|adversarial network|decision tree|multi agent system|fuzzy set|convolutional network|image reconstruction|data analytic|smart grid|autoencoder|fuzzy logic|radial basis function|bayesian network|dimensionality reduction|face recognition not brain|gaussian process|anomaly detection|k-nearest neighbor|natural language processing|monte carlo method|large dataset|gradient descent|support vector regression|extreme learning machine|perceptron|model selection|ensemble learning|representation learning|recommender system|target tracking|singular value decomposition|feature learning|smart city|sentiment analy|markov decision process|k-means clustering|independent component analysis|brain computer interface|human-computer interaction|markov chain monte carlo|hierarchical clustering|semantic web|semi-supervised learning|human-robot interact|knowledge graph|speech recognition not brain|ensemble model|fog computing|mapreduce|evolutionary computation|data science|text mining|generative model|active learning|swarm intelligence|multi-task learning|language model|collaborative filtering|backpropagation|machine vision|computer-aided diagnosis|gated recurrent unit|lagrange multiplier|expert system|learning rate|hadoop|markov process|nonlinear optimization|learning system|self-organizing map|smart manufacturing|smart home|few shot learning|few-shot learning|meta-learning|meta learning|adversarial training|zero-shot learning|word embedding|expectation maximization algorithm|stochastic gradient descent|ridge regression|deep belief network|non-negative matrix factorization|affective computing|latent dirichlet allocation|kernel method|kernel learning|feature engineering|variational inference|image representation|manifold learning|adversarial example|knowledge distillation|time series forecast|variational autoencoder|lasso regression|smart energy|dbscan|multi-label classification|intelligent robot|ubiquitous computing|gaussian mixture models|smart technolog|boltzmann machine|smart buildings|predictive analytic|pervasive computing|smart agriculture|capsule network|human-in-the-loop|intelligent agent|ai applications|word vector|transformer model|facial recognition|unstructured data|restricted boltzmann machine|albert|lifelong learning|autonomous agents|chatbot|cholesky decomposition|nosql|nosql|explainable ai|seq2seq|probabilistic graphical model|qr decomposition|unsupervised deep learning|data warehouse|quantum machine learning|continual learning|smart environment|multimodal learning|smart health|artificial immune system|swarm robotics|kernel machine|latent factor model|eigendecomposition|adversarial machine|adversarial machine learning|smart mobility|sequence-to-sequence model|eigen decomposition|adversarial robustness|smart parking|adversarial neural|roberta|bidirectional encoder representations from transformer|locally linear embedding|hebbian learning|one-shot learning|multimodal representation|smart tourism|entity extraction|adaptive moment estimation|ontology learning|topic modeling|relational database'" + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "keywords_oklist_source= r'..\\WOS\\kw_token_ranked_bibliometrics_okset.xlsx'\n", + "keyword_df = pd.read_excel(keywords_oklist_source)\n", + "keywords = keyword_df[keyword_df[\"u_Priority (done)\"].isin([\"High\",\"Medium\"])][\"kw_token\"].str.replace('\"','').tolist()\n", + "keywords = [kw.replace(\"*\",\"\").replace(\"$\",\"\").lower() for kw in keywords if (\"?\" not in kw and len(kw)>3)]\n", + "keywords = \"|\".join([kw for kw in keywords if kw not in [\"classifier\",\"clustering\",\"loss function\",'classification']]+[\"relational database\"])\n", + "keywords" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 54, "outputs": [ { "data": { "text/plain": " cpc_id cpc_name \n12725 A61B1/000096 {using artificial intelligence} \\\n13764 A61B5/7264 {Classification of physiological signals or da... \n13897 A61B6/52 {Devices using data or image processing specia... \n14016 A61B8/52 {Devices using data or image processing specia... \n15252 A61B2018/0069 {using fuzzy logic} \n... ... ... \n250685 Y10S707/99946 Object-oriented database structure network \n250686 Y10S707/99947 Object-oriented database structure reference \n250687 Y10S707/99948 Application of database or data structure, e.g... \n250688 Y10S707/99951 File or database maintenance \n250703 Y10S715/968 interface for database querying and retrieval \n\n section class subclass group main_group cpc_version \n12725 A 61 B 1 000096 2023 \\\n13764 A 61 B 5 7264 2023 \n13897 A 61 B 6 52 2023 \n14016 A 61 B 8 52 2023 \n15252 A 61 B 2018 0069 2023 \n... ... ... ... ... ... ... \n250685 Y 10 S 707 99946 2023 \n250686 Y 10 S 707 99947 2023 \n250687 Y 10 S 707 99948 2023 \n250688 Y 10 S 707 99951 2023 \n250703 Y 10 S 715 968 2023 \n\n version https://git-lfs.github.com/spec/v1 \n12725 NaN \\\n13764 NaN \n13897 NaN \n14016 NaN \n15252 NaN \n... ... \n250685 NaN \n250686 NaN \n250687 NaN \n250688 NaN \n250703 NaN \n\n cpc_taxonomy \n12725 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n13764 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n13897 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n14016 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n15252 [(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... \n... ... \n250685 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n250686 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n250687 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n250688 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n250703 [(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... \n\n[317 rows x 10 columns]", "text/html": "
\n | cpc_id | \ncpc_name | \nsection | \nclass | \nsubclass | \ngroup | \nmain_group | \ncpc_version | \nversion https://git-lfs.github.com/spec/v1 | \ncpc_taxonomy | \n
---|---|---|---|---|---|---|---|---|---|---|
12725 | \nA61B1/000096 | \n{using artificial intelligence} | \nA | \n61 | \nB | \n1 | \n000096 | \n2023 | \nNaN | \n[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... | \n
13764 | \nA61B5/7264 | \n{Classification of physiological signals or da... | \nA | \n61 | \nB | \n5 | \n7264 | \n2023 | \nNaN | \n[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... | \n
13897 | \nA61B6/52 | \n{Devices using data or image processing specia... | \nA | \n61 | \nB | \n6 | \n52 | \n2023 | \nNaN | \n[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... | \n
14016 | \nA61B8/52 | \n{Devices using data or image processing specia... | \nA | \n61 | \nB | \n8 | \n52 | \n2023 | \nNaN | \n[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... | \n
15252 | \nA61B2018/0069 | \n{using fuzzy logic} | \nA | \n61 | \nB | \n2018 | \n0069 | \n2023 | \nNaN | \n[(A, HUMAN NECESSITIES), (A61, MEDICAL OR VETE... | \n
... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
250685 | \nY10S707/99946 | \nObject-oriented database structure network | \nY | \n10 | \nS | \n707 | \n99946 | \n2023 | \nNaN | \n[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... | \n
250686 | \nY10S707/99947 | \nObject-oriented database structure reference | \nY | \n10 | \nS | \n707 | \n99947 | \n2023 | \nNaN | \n[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... | \n
250687 | \nY10S707/99948 | \nApplication of database or data structure, e.g... | \nY | \n10 | \nS | \n707 | \n99948 | \n2023 | \nNaN | \n[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... | \n
250688 | \nY10S707/99951 | \nFile or database maintenance | \nY | \n10 | \nS | \n707 | \n99951 | \n2023 | \nNaN | \n[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... | \n
250703 | \nY10S715/968 | \ninterface for database querying and retrieval | \nY | \n10 | \nS | \n715 | \n968 | \n2023 | \nNaN | \n[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... | \n
317 rows × 10 columns
\n\n | cpc_id | \ncpc_name | \nsection | \nclass | \nsubclass | \ngroup | \nmain_group | \ncpc_version | \nversion https://git-lfs.github.com/spec/v1 | \ncpc_taxonomy | \ndata_scope | \n
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \nA | \nHUMAN NECESSITIES | \nA | \nNone | \nNone | \nNone | \nNone | \n2023 | \nNaN | \n[(A, HUMAN NECESSITIES)] | \nFalse | \n
1 | \nA01 | \nAGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI... | \nA | \n01 | \nNone | \nNone | \nNone | \n2023 | \nNaN | \n[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... | \nFalse | \n
2 | \nA01B | \nSOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS... | \nA | \n01 | \nB | \nNone | \nNone | \n2023 | \nNaN | \n[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... | \nFalse | \n
3 | \nA01B1/00 | \nHand tools (edge trimmers for lawns A01G3/06 ... | \nA | \n01 | \nB | \n1 | \n00 | \n2023 | \nNaN | \n[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... | \nFalse | \n
4 | \nA01B1/02 | \nSpades; Shovels {(hand-operated dredgers E02F3... | \nA | \n01 | \nB | \n1 | \n02 | \n2023 | \nNaN | \n[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... | \nFalse | \n
... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
260486 | \nY10T483/1873 | \nIndexing matrix | \nY | \n10 | \nT | \n483 | \n1873 | \n2023 | \nNaN | \n[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... | \nFalse | \n
260487 | \nY10T483/1882 | \nRotary disc | \nY | \n10 | \nT | \n483 | \n1882 | \n2023 | \nNaN | \n[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... | \nFalse | \n
260488 | \nY10T483/1891 | \nChain or belt | \nY | \n10 | \nT | \n483 | \n1891 | \n2023 | \nNaN | \n[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... | \nFalse | \n
260489 | \nY10T483/19 | \nMiscellaneous | \nY | \n10 | \nT | \n483 | \n19 | \n2023 | \nNaN | \n[(Y, GENERAL TAGGING OF NEW TECHNOLOGICAL DEVE... | \nFalse | \n
260490 | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \nNaN | \n2022 | \noid sha256:f138d6bdf2939ba576b96b633d81366123b... | \n[] | \nFalse | \n
260491 rows × 11 columns
\n