You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
blabla/WOS/wos_extract/wos_query_generator_simples...

327 lines
17 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import os\n",
"\n",
"import pandas as pd\n",
"focal_countries_list = [\"Peoples R china\", \"Hong Kong\"]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"outputs": [],
"source": [
"country_mode = \"CU\" #CU-country-region AU-address"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 16,
"outputs": [],
"source": [
"# (TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"computer vision\") OR TS=(\"pattern recognition\")) AND"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 17,
"outputs": [],
"source": [
"keywords_source = r'..\\ai_scope_keywords.txt'\n",
"with open(keywords_source,'r') as f:\n",
" keywords = \"\\n\".join(f.readlines()).replace('\\n','')"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 18,
"outputs": [
{
"data": {
"text/plain": "'artificial intelligence*,machine* learn*,neural network*,big data*,deep learn*,pattern recognition,computer vision,image classification,reinforcement learning,support vector machine*,recommender system*,random forest,ensemble model*,image processing,generative network*,ai ethic*,natural language processing,clustering algorithm*,feature extraction,time series forecast*,anomaly detection,identity fraud detection,dimensionality reduction,feature elicitation,chatbot*,clustering,*supervised learning,convolutional network*,convolutional neural,adversarial network*,adversarial neural,adversarial machine,autoencoder*,gated recurrent unit*,perceptron*,feature learning,feature engineering,long short-term memor*,word embedding*,word vector*,gradient descent,k-nearest neighbor*,naive bayes,transfer learning,fuzzy logic,backpropagation,computational modeling,computational statistic*,intelligent agent*,expert system*,decision tree*,Bayesian network*,genetic algorithm*,swarm intelligence,cognitive computing,artificial neural network*,convolutional neural network*,recurrent neural network*,ensemble learning,data mining,artificial general intelligence,artificial consciousness,evolutionary algorithm*,self-organizing map*,deep reinforcement learning,adversarial machine learning,machine vision,neural-symbolic integration,probabilistic graphical model*,hybrid intelligent system*,machine creativity,explainable AI,interactive machine learning,artificial emotional intelligence,evolutionary computation*,human-in-the-loop,unsupervised deep learning,deep belief network*,quantum machine learning,artificial immune system*,swarm robotics,autonomous agents,machine ethics,collaborative filtering,content based filtering,pervasive computing,ubiquitous computing,human-computer interaction,cloud computing,Internet of Things,artificial cognition,computational creativity,sentiment analy*,robotics,boltzmann machine*,kernel machine*,Hopfield network*,Hebbian learning,latent factor model*,non-negative matrix factorization,independent component analysis,principal component analysis,data augmentation,image segmentation,autoregressive language model*,generative pre-trained transformer*,smart city,smart home,smart grid,smart health,smart manufacturing,smart agriculture,smart environment,smart energy,smart mobility,smart buildings,smart tourism,smart logistics,smart supply chain,smart retail,smart waste management,smart parking,smart governance,smart education,smart technolog*,smart diagnostic*,data* analytic*,hadoop*,mapreduce,map$reduce,large$ dataset*,data warehouse*,predictive analytic*,no$sql,nosql,no sql,unstructured data*,data science*'"
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"keywords"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 19,
"outputs": [
{
"data": {
"text/plain": "'\"artificial intelligence*\" OR \"machine* learn*\" OR \"neural network*\" OR \"big data*\" OR \"deep learn*\" OR \"pattern recognition\" OR \"computer vision\" OR \"image classification\" OR \"reinforcement learning\" OR \"support vector machine*\" OR \"recommender system*\" OR \"random forest\" OR \"ensemble model*\" OR \"image processing\" OR \"generative network*\" OR \"ai ethic*\" OR \"natural language processing\" OR \"clustering algorithm*\" OR \"feature extraction\" OR \"time series forecast*\" OR \"anomaly detection\" OR \"identity fraud detection\" OR \"dimensionality reduction\" OR \"feature elicitation\" OR \"chatbot*\" OR \"clustering\" OR \"*supervised learning\" OR \"convolutional network*\" OR \"convolutional neural\" OR \"adversarial network*\" OR \"adversarial neural\" OR \"adversarial machine\" OR \"autoencoder*\" OR \"gated recurrent unit*\" OR \"perceptron*\" OR \"feature learning\" OR \"feature engineering\" OR \"long short-term memor*\" OR \"word embedding*\" OR \"word vector*\" OR \"gradient descent\" OR \"k-nearest neighbor*\" OR \"naive bayes\" OR \"transfer learning\" OR \"fuzzy logic\" OR \"backpropagation\" OR \"computational modeling\" OR \"computational statistic*\" OR \"intelligent agent*\" OR \"expert system*\" OR \"decision tree*\" OR \"Bayesian network*\" OR \"genetic algorithm*\" OR \"swarm intelligence\" OR \"cognitive computing\" OR \"artificial neural network*\" OR \"convolutional neural network*\" OR \"recurrent neural network*\" OR \"ensemble learning\" OR \"data mining\" OR \"artificial general intelligence\" OR \"artificial consciousness\" OR \"evolutionary algorithm*\" OR \"self-organizing map*\" OR \"deep reinforcement learning\" OR \"adversarial machine learning\" OR \"machine vision\" OR \"neural-symbolic integration\" OR \"probabilistic graphical model*\" OR \"hybrid intelligent system*\" OR \"machine creativity\" OR \"explainable AI\" OR \"interactive machine learning\" OR \"artificial emotional intelligence\" OR \"evolutionary computation*\" OR \"human-in-the-loop\" OR \"unsupervised deep learning\" OR \"deep belief network*\" OR \"quantum machine learning\" OR \"artificial immune system*\" OR \"swarm robotics\" OR \"autonomous agents\" OR \"machine ethics\" OR \"collaborative filtering\" OR \"content based filtering\" OR \"pervasive computing\" OR \"ubiquitous computing\" OR \"human-computer interaction\" OR \"cloud computing\" OR \"Internet of Things\" OR \"artificial cognition\" OR \"computational creativity\" OR \"sentiment analy*\" OR \"robotics\" OR \"boltzmann machine*\" OR \"kernel machine*\" OR \"Hopfield network*\" OR \"Hebbian learning\" OR \"latent factor model*\" OR \"non-negative matrix factorization\" OR \"independent component analysis\" OR \"principal component analysis\" OR \"data augmentation\" OR \"image segmentation\" OR \"autoregressive language model*\" OR \"generative pre-trained transformer*\" OR \"smart city\" OR \"smart home\" OR \"smart grid\" OR \"smart health\" OR \"smart manufacturing\" OR \"smart agriculture\" OR \"smart environment\" OR \"smart energy\" OR \"smart mobility\" OR \"smart buildings\" OR \"smart tourism\" OR \"smart logistics\" OR \"smart supply chain\" OR \"smart retail\" OR \"smart waste management\" OR \"smart parking\" OR \"smart governance\" OR \"smart education\" OR \"smart technolog*\" OR \"smart diagnostic*\" OR \"data* analytic*\" OR \"hadoop*\" OR \"mapreduce\" OR \"map$reduce\" OR \"large$ dataset*\" OR \"data warehouse*\" OR \"predictive analytic*\" OR \"no$sql\" OR \"nosql\" OR \"no sql\" OR \"unstructured data*\" OR \"data science*\"'"
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"keywords = [c.strip() for c in keywords.split(\",\")]\n",
"\n",
"keywords_str = ' OR '.join('\\\"'+k+'\\\"' for k in keywords)\n",
"keywords_str"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 26,
"outputs": [
{
"data": {
"text/plain": "138"
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(keywords)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 20,
"outputs": [],
"source": [
"scope_country_source = r'..\\eu_scope_countries.txt'\n",
"\n",
"with open(scope_country_source,'r') as f:\n",
" coop_countries = f.readlines()\n",
"coop_countries = [c.strip().upper() for c in coop_countries[0].split(\",\")]\n",
"focal_countries = [c.strip().upper() for c in focal_countries_list]\n",
"eu_countries = coop_countries[0:-7]\n",
"assoc_countries = coop_countries[-7:]\n",
"\n",
"nor_c = [coop_countries[-7],]\n",
"swi_c = [coop_countries[-6],]\n",
"uk_c = coop_countries[-5:]\n",
"\n",
"foc_str = ' OR '.join([c for c in focal_countries])\n",
"coop_str = ' OR '.join([c for c in coop_countries])\n",
"eu_str = ' OR '.join([c for c in eu_countries])\n",
"assoc_str = ' OR '.join([c for c in assoc_countries])\n",
"\n",
"nor_str =' OR '.join([c for c in nor_c])\n",
"swi_str =' OR '.join([c for c in swi_c])\n",
"uk_str =' OR '.join([c for c in uk_c])\n",
"eu_sub_str = eu_str.split(' OR ')\n",
"# eu_sub_str"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 21,
"outputs": [
{
"data": {
"text/plain": "'AUSTRIA OR BELGIUM OR BULGARIA OR CROATIA OR CYPRUS OR CZECH REPUBLIC OR DENMARK OR ESTONIA OR FINLAND OR FRANCE OR GERMANY OR GREECE OR HUNGARY OR IRELAND OR ITALY OR LATVIA OR LITHUANIA OR LUXEMBOURG OR MALTA OR NETHERLANDS OR POLAND OR PORTUGAL OR ROMANIA OR SLOVAKIA OR SLOVENIA OR SPAIN OR SWEDEN OR NORWAY OR SWITZERLAND OR UNITED KINGDOM OR ENGLAND OR WALES OR SCOTLAND OR N IRELAND'"
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eu_assoc = ' OR '.join([eu_str,nor_str,swi_str,uk_str])\n",
"eu_assoc"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 22,
"outputs": [
{
"data": {
"text/plain": "'PEOPLES R CHINA OR HONG KONG'"
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"foc_str"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 22,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 22,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 23,
"outputs": [
{
"data": {
"text/plain": "'CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUSTRIA OR BELGIUM OR BULGARIA OR CROATIA OR CYPRUS OR CZECH REPUBLIC OR DENMARK OR ESTONIA OR FINLAND OR FRANCE OR GERMANY OR GREECE OR HUNGARY OR IRELAND OR ITALY OR LATVIA OR LITHUANIA OR LUXEMBOURG OR MALTA OR NETHERLANDS OR POLAND OR PORTUGAL OR ROMANIA OR SLOVAKIA OR SLOVENIA OR SPAIN OR SWEDEN OR NORWAY OR SWITZERLAND OR UNITED KINGDOM OR ENGLAND OR WALES OR SCOTLAND OR N IRELAND) AND TS=(\"artificial intelligence*\" OR \"machine* learn*\" OR \"neural network*\" OR \"big data*\" OR \"deep learn*\" OR \"pattern recognition\" OR \"computer vision\" OR \"image classification\" OR \"reinforcement learning\" OR \"support vector machine*\" OR \"recommender system*\" OR \"random forest\" OR \"ensemble model*\" OR \"image processing\" OR \"generative network*\" OR \"ai ethic*\" OR \"natural language processing\" OR \"clustering algorithm*\" OR \"feature extraction\" OR \"time series forecast*\" OR \"anomaly detection\" OR \"identity fraud detection\" OR \"dimensionality reduction\" OR \"feature elicitation\" OR \"chatbot*\" OR \"clustering\" OR \"*supervised learning\" OR \"convolutional network*\" OR \"convolutional neural\" OR \"adversarial network*\" OR \"adversarial neural\" OR \"adversarial machine\" OR \"autoencoder*\" OR \"gated recurrent unit*\" OR \"perceptron*\" OR \"feature learning\" OR \"feature engineering\" OR \"long short-term memor*\" OR \"word embedding*\" OR \"word vector*\" OR \"gradient descent\" OR \"k-nearest neighbor*\" OR \"naive bayes\" OR \"transfer learning\" OR \"fuzzy logic\" OR \"backpropagation\" OR \"computational modeling\" OR \"computational statistic*\" OR \"intelligent agent*\" OR \"expert system*\" OR \"decision tree*\" OR \"Bayesian network*\" OR \"genetic algorithm*\" OR \"swarm intelligence\" OR \"cognitive computing\" OR \"artificial neural network*\" OR \"convolutional neural network*\" OR \"recurrent neural network*\" OR \"ensemble learning\" OR \"data mining\" OR \"artificial general intelligence\" OR \"artificial consciousness\" OR \"evolutionary algorithm*\" OR \"self-organizing map*\" OR \"deep reinforcement learning\" OR \"adversarial machine learning\" OR \"machine vision\" OR \"neural-symbolic integration\" OR \"probabilistic graphical model*\" OR \"hybrid intelligent system*\" OR \"machine creativity\" OR \"explainable AI\" OR \"interactive machine learning\" OR \"artificial emotional intelligence\" OR \"evolutionary computation*\" OR \"human-in-the-loop\" OR \"unsupervised deep learning\" OR \"deep belief network*\" OR \"quantum machine learning\" OR \"artificial immune system*\" OR \"swarm robotics\" OR \"autonomous agents\" OR \"machine ethics\" OR \"collaborative filtering\" OR \"content based filtering\" OR \"pervasive computing\" OR \"ubiquitous computing\" OR \"human-computer interaction\" OR \"cloud computing\" OR \"Internet of Things\" OR \"artificial cognition\" OR \"computational creativity\" OR \"sentiment analy*\" OR \"robotics\" OR \"boltzmann machine*\" OR \"kernel machine*\" OR \"Hopfield network*\" OR \"Hebbian learning\" OR \"latent factor model*\" OR \"non-negative matrix factorization\" OR \"independent component analysis\" OR \"principal component analysis\" OR \"data augmentation\" OR \"image segmentation\" OR \"autoregressive language model*\" OR \"generative pre-trained transformer*\" OR \"smart city\" OR \"smart home\" OR \"smart grid\" OR \"smart health\" OR \"smart manufacturing\" OR \"smart agriculture\" OR \"smart environment\" OR \"smart energy\" OR \"smart mobility\" OR \"smart buildings\" OR \"smart tourism\" OR \"smart logistics\" OR \"smart supply chain\" OR \"smart retail\" OR \"smart waste management\" OR \"smart parking\" OR \"smart governance\" OR \"smart education\" OR \"smart technolog*\" OR \"smart diagnostic*\" OR \"data* analytic*\" OR \"hadoop*\" OR \"mapreduce\" OR \"map$reduce\" OR \"large$ dataset*\" OR \"data warehouse*\" OR \"predictive analytic*\" OR \"no$sql\" OR \"nosql\" OR \"no sql\" OR \"unstructured data*\" OR \"data science*\") AND PY=(2011-2022)'"
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scope_query = f'CU=({foc_str}) AND CU=({eu_assoc}) AND TS=({keywords_str}) AND PY=(2011-2022)'\n",
"scope_query"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 23,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 24,
"outputs": [],
"source": [
"from wossel_miners import wos_fetch_entries,wos_fetch_yearly_output"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 25,
"outputs": [],
"source": [
"# wos_fetch_yearly_output(query_str_list=sub_queries)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 27,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hoooold...\n",
"41511 records found! Here we go in 139 steps...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 138/138 [15:20<00:00, 6.67s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"final batch of 41401-41511\n"
]
}
],
"source": [
"wos_fetch_entries(query_str=scope_query)"
],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}