From da720a61318b5abb7f803ccd836b3e1e97f036be Mon Sep 17 00:00:00 2001
From: radvanyimome <97281689+radvanyimome@users.noreply.github.com>
Date: Tue, 4 Apr 2023 15:37:22 +0200
Subject: [PATCH] added wos selenium crawler slightly updated WOS data
processing
---
WOS/wos_extract/wos_query_generator.ipynb | 230 +++++++++--
WOS/wos_extract/wossel_miners.py | 266 ++++++++++++
WOS/wos_processing.ipynb | 479 +++++++++++++++++-----
3 files changed, 855 insertions(+), 120 deletions(-)
create mode 100644 WOS/wos_extract/wossel_miners.py
diff --git a/WOS/wos_extract/wos_query_generator.ipynb b/WOS/wos_extract/wos_query_generator.ipynb
index db1cf5f..bda3936 100644
--- a/WOS/wos_extract/wos_query_generator.ipynb
+++ b/WOS/wos_extract/wos_query_generator.ipynb
@@ -2,19 +2,21 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 50,
+ "execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
+ "import os\n",
+ "\n",
"import pandas as pd\n",
"focal_countries_list = [\"Peoples R china\", \"Hong Kong\"]"
]
},
{
"cell_type": "code",
- "execution_count": 51,
+ "execution_count": 2,
"outputs": [],
"source": [
"country_mode = \"CU\" #CU-country-region AU-address"
@@ -28,7 +30,7 @@
},
{
"cell_type": "code",
- "execution_count": 52,
+ "execution_count": 3,
"outputs": [],
"source": [
"# (TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"computer vision\") OR TS=(\"pattern recognition\")) AND"
@@ -42,13 +44,13 @@
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": 4,
"outputs": [
{
"data": {
"text/plain": "'TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"pattern recognition\") OR TS=(\"computer vision\") OR TS=(\"image classification\") OR TS=(\"reinforcement learning\") OR TS=(\"support vector machines\") OR TS=(\"recommender system\") OR TS=(\"random forest\") OR TS=(\"ensemble model\") OR TS=(\"image processing\") OR TS=(\"generative network\") OR TS=(\"ai ethic\") OR TS=(\"natural language processing\") OR TS=(\"clustering algorithm\") OR TS=(\"feature extraction\") OR TS=(\"time series forecast\") OR TS=(\"anomaly detection\") OR TS=(\"identity fraud detection\") OR TS=(\"dimensionality reduction\") OR TS=(\"feature elicitation\") OR TS=(\"chatbot\") OR TS=(\"clustering\") OR TS=(\"unsupervised learning\") OR TS=(\"supervised learning\") OR TS=(\"convolutional network\") OR TS=(\"adversarial network\")'"
},
- "execution_count": 53,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -60,7 +62,7 @@
"\n",
"keywords = [c.strip() for c in keywords[0].split(\",\")]\n",
"\n",
- "keywords_str = ' OR '.join('TS=(\"'+k+'\")' for k in keywords)\n",
+ "keywords_str = ' OR '.join('TS=(\\\"'+k+'\\\")' for k in keywords)\n",
"keywords_str"
],
"metadata": {
@@ -72,17 +74,8 @@
},
{
"cell_type": "code",
- "execution_count": 54,
- "outputs": [
- {
- "data": {
- "text/plain": "'CU=AUSTRIA OR CU=BELGIUM OR CU=BULGARIA OR CU=CROATIA OR CU=CYPRUS OR CU=CZECH REPUBLIC OR CU=DENMARK OR CU=ESTONIA OR CU=FINLAND OR CU=FRANCE OR CU=GERMANY OR CU=GREECE OR CU=HUNGARY OR CU=IRELAND OR CU=ITALY OR CU=LATVIA OR CU=LITHUANIA OR CU=LUXEMBOURG OR CU=MALTA OR CU=NETHERLANDS OR CU=POLAND OR CU=PORTUGAL OR CU=ROMANIA OR CU=SLOVAKIA OR CU=SLOVENIA OR CU=SPAIN OR CU=SWEDEN OR CU=NORWAY OR CU=SWITZERLAND OR CU=UNITED KINGDOM OR CU=ENGLAND OR CU=WALES OR CU=SCOTLAND OR CU=N IRELAND'"
- },
- "execution_count": 54,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "execution_count": 5,
+ "outputs": [],
"source": [
"scope_country_source = r'..\\eu_scope_countries.txt'\n",
"\n",
@@ -90,11 +83,58 @@
" coop_countries = f.readlines()\n",
"coop_countries = [c.strip().upper() for c in coop_countries[0].split(\",\")]\n",
"focal_countries = [c.strip().upper() for c in focal_countries_list]\n",
+ "eu_countries = coop_countries[0:-7]\n",
+ "assoc_countries = coop_countries[-7:]\n",
+ "\n",
+ "nor_c = [coop_countries[-7],]\n",
+ "swi_c = [coop_countries[-6],]\n",
+ "uk_c = coop_countries[-5:]\n",
"\n",
"foc_str = ' OR '.join([country_mode+'='+c for c in focal_countries])\n",
"coop_str = ' OR '.join([country_mode+'='+c for c in coop_countries])\n",
+ "eu_str = ' OR '.join([country_mode+'='+c for c in eu_countries])\n",
+ "assoc_str = ' OR '.join([country_mode+'='+c for c in assoc_countries])\n",
"\n",
- "coop_str"
+ "nor_str =' OR '.join([country_mode+'='+c for c in nor_c])\n",
+ "swi_str =' OR '.join([country_mode+'='+c for c in swi_c])\n",
+ "uk_str =' OR '.join([country_mode+'='+c for c in uk_c])\n",
+ "eu_sub_str = eu_str.split(' OR ')\n",
+ "# eu_sub_str"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "outputs": [],
+ "source": [],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "['UNITED KINGDOM', 'ENGLAND', 'WALES', 'SCOTLAND', 'N IRELAND']"
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "coop_countries[-5:]"
],
"metadata": {
"collapsed": false,
@@ -105,13 +145,13 @@
},
{
"cell_type": "code",
- "execution_count": 55,
+ "execution_count": 7,
"outputs": [
{
"data": {
"text/plain": "'CU=PEOPLES R CHINA OR CU=HONG KONG'"
},
- "execution_count": 55,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -128,19 +168,19 @@
},
{
"cell_type": "code",
- "execution_count": 58,
+ "execution_count": 8,
"outputs": [
{
"data": {
- "text/plain": "'(CU=PEOPLES R CHINA OR CU=HONG KONG) AND (CU=AUSTRIA OR CU=BELGIUM OR CU=BULGARIA OR CU=CROATIA OR CU=CYPRUS OR CU=CZECH REPUBLIC OR CU=DENMARK OR CU=ESTONIA OR CU=FINLAND OR CU=FRANCE OR CU=GERMANY OR CU=GREECE OR CU=HUNGARY OR CU=IRELAND OR CU=ITALY OR CU=LATVIA OR CU=LITHUANIA OR CU=LUXEMBOURG OR CU=MALTA OR CU=NETHERLANDS OR CU=POLAND OR CU=PORTUGAL OR CU=ROMANIA OR CU=SLOVAKIA OR CU=SLOVENIA OR CU=SPAIN OR CU=SWEDEN OR CU=NORWAY OR CU=SWITZERLAND OR CU=UNITED KINGDOM OR CU=ENGLAND OR CU=WALES OR CU=SCOTLAND OR CU=N IRELAND) AND (TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"pattern recognition\") OR TS=(\"computer vision\") OR TS=(\"image classification\") OR TS=(\"reinforcement learning\") OR TS=(\"support vector machines\") OR TS=(\"recommender system\") OR TS=(\"random forest\") OR TS=(\"ensemble model\") OR TS=(\"image processing\") OR TS=(\"generative network\") OR TS=(\"ai ethic\") OR TS=(\"natural language processing\") OR TS=(\"clustering algorithm\") OR TS=(\"feature extraction\") OR TS=(\"time series forecast\") OR TS=(\"anomaly detection\") OR TS=(\"identity fraud detection\") OR TS=(\"dimensionality reduction\") OR TS=(\"feature elicitation\") OR TS=(\"chatbot\") OR TS=(\"clustering\") OR TS=(\"unsupervised learning\") OR TS=(\"supervised learning\") OR TS=(\"convolutional network\") OR TS=(\"adversarial network\"))'"
+ "text/plain": "'(CU=PEOPLES R CHINA OR CU=HONG KONG) AND (CU=AUSTRIA OR CU=BELGIUM OR CU=BULGARIA OR CU=CROATIA OR CU=CYPRUS OR CU=CZECH REPUBLIC OR CU=DENMARK OR CU=ESTONIA OR CU=FINLAND OR CU=FRANCE OR CU=GERMANY OR CU=GREECE OR CU=HUNGARY OR CU=IRELAND OR CU=ITALY OR CU=LATVIA OR CU=LITHUANIA OR CU=LUXEMBOURG OR CU=MALTA OR CU=NETHERLANDS OR CU=POLAND OR CU=PORTUGAL OR CU=ROMANIA OR CU=SLOVAKIA OR CU=SLOVENIA OR CU=SPAIN OR CU=SWEDEN OR CU=NORWAY OR CU=SWITZERLAND OR CU=UNITED KINGDOM OR CU=ENGLAND OR CU=WALES OR CU=SCOTLAND OR CU=N IRELAND) AND (TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"pattern recognition\") OR TS=(\"computer vision\") OR TS=(\"image classification\") OR TS=(\"reinforcement learning\") OR TS=(\"support vector machines\") OR TS=(\"recommender system\") OR TS=(\"random forest\") OR TS=(\"ensemble model\") OR TS=(\"image processing\") OR TS=(\"generative network\") OR TS=(\"ai ethic\") OR TS=(\"natural language processing\") OR TS=(\"clustering algorithm\") OR TS=(\"feature extraction\") OR TS=(\"time series forecast\") OR TS=(\"anomaly detection\") OR TS=(\"identity fraud detection\") OR TS=(\"dimensionality reduction\") OR TS=(\"feature elicitation\") OR TS=(\"chatbot\") OR TS=(\"clustering\") OR TS=(\"unsupervised learning\") OR TS=(\"supervised learning\") OR TS=(\"convolutional network\") OR TS=(\"adversarial network\")) AND PY=(2011-2022)'"
},
- "execution_count": 58,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "scope_query = f'({foc_str}) AND ({coop_str}) AND ({keywords_str})'\n",
+ "scope_query = f'({foc_str}) AND ({coop_str}) AND ({keywords_str}) AND PY=(2011-2022)'\n",
"scope_query"
],
"metadata": {
@@ -152,19 +192,19 @@
},
{
"cell_type": "code",
- "execution_count": 60,
+ "execution_count": 9,
"outputs": [
{
"data": {
- "text/plain": "'(CU=AUSTRIA OR CU=BELGIUM OR CU=BULGARIA OR CU=CROATIA OR CU=CYPRUS OR CU=CZECH REPUBLIC OR CU=DENMARK OR CU=ESTONIA OR CU=FINLAND OR CU=FRANCE OR CU=GERMANY OR CU=GREECE OR CU=HUNGARY OR CU=IRELAND OR CU=ITALY OR CU=LATVIA OR CU=LITHUANIA OR CU=LUXEMBOURG OR CU=MALTA OR CU=NETHERLANDS OR CU=POLAND OR CU=PORTUGAL OR CU=ROMANIA OR CU=SLOVAKIA OR CU=SLOVENIA OR CU=SPAIN OR CU=SWEDEN OR CU=NORWAY OR CU=SWITZERLAND OR CU=UNITED KINGDOM OR CU=ENGLAND OR CU=WALES OR CU=SCOTLAND OR CU=N IRELAND) AND (TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"pattern recognition\") OR TS=(\"computer vision\") OR TS=(\"image classification\") OR TS=(\"reinforcement learning\") OR TS=(\"support vector machines\") OR TS=(\"recommender system\") OR TS=(\"random forest\") OR TS=(\"ensemble model\") OR TS=(\"image processing\") OR TS=(\"generative network\") OR TS=(\"ai ethic\") OR TS=(\"natural language processing\") OR TS=(\"clustering algorithm\") OR TS=(\"feature extraction\") OR TS=(\"time series forecast\") OR TS=(\"anomaly detection\") OR TS=(\"identity fraud detection\") OR TS=(\"dimensionality reduction\") OR TS=(\"feature elicitation\") OR TS=(\"chatbot\") OR TS=(\"clustering\") OR TS=(\"unsupervised learning\") OR TS=(\"supervised learning\") OR TS=(\"convolutional network\") OR TS=(\"adversarial network\"))'"
+ "text/plain": "'(CU=PEOPLES R CHINA OR CU=HONG KONG) AND (TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"pattern recognition\") OR TS=(\"computer vision\") OR TS=(\"image classification\") OR TS=(\"reinforcement learning\") OR TS=(\"support vector machines\") OR TS=(\"recommender system\") OR TS=(\"random forest\") OR TS=(\"ensemble model\") OR TS=(\"image processing\") OR TS=(\"generative network\") OR TS=(\"ai ethic\") OR TS=(\"natural language processing\") OR TS=(\"clustering algorithm\") OR TS=(\"feature extraction\") OR TS=(\"time series forecast\") OR TS=(\"anomaly detection\") OR TS=(\"identity fraud detection\") OR TS=(\"dimensionality reduction\") OR TS=(\"feature elicitation\") OR TS=(\"chatbot\") OR TS=(\"clustering\") OR TS=(\"unsupervised learning\") OR TS=(\"supervised learning\") OR TS=(\"convolutional network\") OR TS=(\"adversarial network\"))'"
},
- "execution_count": 60,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "ch_scope_query = f'({coop_str}) AND ({keywords_str})'\n",
+ "ch_scope_query = f'({foc_str}) AND ({keywords_str})'\n",
"ch_scope_query"
],
"metadata": {
@@ -173,6 +213,140 @@
"name": "#%%\n"
}
}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "'(CU=AUSTRIA OR CU=BELGIUM OR CU=BULGARIA OR CU=CROATIA OR CU=CYPRUS OR CU=CZECH REPUBLIC OR CU=DENMARK OR CU=ESTONIA OR CU=FINLAND OR CU=FRANCE OR CU=GERMANY OR CU=GREECE OR CU=HUNGARY OR CU=IRELAND OR CU=ITALY OR CU=LATVIA OR CU=LITHUANIA OR CU=LUXEMBOURG OR CU=MALTA OR CU=NETHERLANDS OR CU=POLAND OR CU=PORTUGAL OR CU=ROMANIA OR CU=SLOVAKIA OR CU=SLOVENIA OR CU=SPAIN OR CU=SWEDEN) AND (TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"pattern recognition\") OR TS=(\"computer vision\") OR TS=(\"image classification\") OR TS=(\"reinforcement learning\") OR TS=(\"support vector machines\") OR TS=(\"recommender system\") OR TS=(\"random forest\") OR TS=(\"ensemble model\") OR TS=(\"image processing\") OR TS=(\"generative network\") OR TS=(\"ai ethic\") OR TS=(\"natural language processing\") OR TS=(\"clustering algorithm\") OR TS=(\"feature extraction\") OR TS=(\"time series forecast\") OR TS=(\"anomaly detection\") OR TS=(\"identity fraud detection\") OR TS=(\"dimensionality reduction\") OR TS=(\"feature elicitation\") OR TS=(\"chatbot\") OR TS=(\"clustering\") OR TS=(\"unsupervised learning\") OR TS=(\"supervised learning\") OR TS=(\"convolutional network\") OR TS=(\"adversarial network\"))'"
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "eu_scope_query = f'({eu_str}) AND ({keywords_str})'\n",
+ "eu_scope_query"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "outputs": [],
+ "source": [
+ "sub_queries = [f'PY=(2011-2022) AND ({i_str}) AND ({keywords_str})' for i_str in [foc_str,eu_str,assoc_str,nor_str,swi_str,uk_str]+eu_sub_str]"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "outputs": [],
+ "source": [
+ "from wossel_miners import wos_fetch_entries,wos_fetch_yearly_output"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 33/33 [12:49<00:00, 23.31s/it]\n"
+ ]
+ }
+ ],
+ "source": [
+ "wos_fetch_yearly_output(query_str_list=sub_queries)"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "'(CU=PEOPLES R CHINA OR CU=HONG KONG) AND (CU=AUSTRIA OR CU=BELGIUM OR CU=BULGARIA OR CU=CROATIA OR CU=CYPRUS OR CU=CZECH REPUBLIC OR CU=DENMARK OR CU=ESTONIA OR CU=FINLAND OR CU=FRANCE OR CU=GERMANY OR CU=GREECE OR CU=HUNGARY OR CU=IRELAND OR CU=ITALY OR CU=LATVIA OR CU=LITHUANIA OR CU=LUXEMBOURG OR CU=MALTA OR CU=NETHERLANDS OR CU=POLAND OR CU=PORTUGAL OR CU=ROMANIA OR CU=SLOVAKIA OR CU=SLOVENIA OR CU=SPAIN OR CU=SWEDEN OR CU=NORWAY OR CU=SWITZERLAND OR CU=UNITED KINGDOM OR CU=ENGLAND OR CU=WALES OR CU=SCOTLAND OR CU=N IRELAND) AND (TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"pattern recognition\") OR TS=(\"computer vision\") OR TS=(\"image classification\") OR TS=(\"reinforcement learning\") OR TS=(\"support vector machines\") OR TS=(\"recommender system\") OR TS=(\"random forest\") OR TS=(\"ensemble model\") OR TS=(\"image processing\") OR TS=(\"generative network\") OR TS=(\"ai ethic\") OR TS=(\"natural language processing\") OR TS=(\"clustering algorithm\") OR TS=(\"feature extraction\") OR TS=(\"time series forecast\") OR TS=(\"anomaly detection\") OR TS=(\"identity fraud detection\") OR TS=(\"dimensionality reduction\") OR TS=(\"feature elicitation\") OR TS=(\"chatbot\") OR TS=(\"clustering\") OR TS=(\"unsupervised learning\") OR TS=(\"supervised learning\") OR TS=(\"convolutional network\") OR TS=(\"adversarial network\")) AND PY=(2011-2022)'"
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "scope_query"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Hoooold...\n",
+ "27672 records found! Here we go in 93 steps...\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 92/92 [09:38<00:00, 6.29s/it]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "final batch of 27601-27672\n"
+ ]
+ }
+ ],
+ "source": [
+ "wos_fetch_entries(query_str=scope_query)"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
}
],
"metadata": {
diff --git a/WOS/wos_extract/wossel_miners.py b/WOS/wos_extract/wossel_miners.py
new file mode 100644
index 0000000..d926672
--- /dev/null
+++ b/WOS/wos_extract/wossel_miners.py
@@ -0,0 +1,266 @@
+import os
+import glob
+import pytest
+import time
+from datetime import datetime
+import json
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.action_chains import ActionChains
+from selenium.webdriver.support import expected_conditions
+from selenium.webdriver.support.wait import WebDriverWait
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+# from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.firefox.options import Options
+
+from tqdm import tqdm
+import random
+
+def close_pendo_windows(driver):
+ '''Close guiding windows'''
+ # Cookies
+ try:
+ driver.find_element(By.XPATH, '//*[@id="onetrust-accept-btn-handler"]').click()
+ except:
+ pass
+ # "Got it"
+ try:
+ driver.find_element(By.XPATH, '//button[contains(@class, "_pendo-button-primaryButton")]').click()
+ except:
+ pass
+ # "No thanks"
+ try:
+ driver.find_element(By.XPATH, '//button[contains(@class, "_pendo-button-secondaryButton")]').click()
+ except:
+ pass
+ # What was it... I forgot...
+ try:
+ driver.find_element(By.XPATH, '//span[contains(@class, "_pendo-close-guide")').click()
+ except:
+ pass
+ # Overlay
+ try:
+ driver.find_element(By.XPATH, '//div[contains(@class, "cdk-overlay-container")').click()
+ except:
+ pass
+
+
+def wos_fetch_entries(query_str="TS=\"web of science\" AND PY=(2008-2010)",
+ wait_mu=1, wait_sigma=0.2, debug=False):
+
+ now = datetime.now() # current date and time
+ date_time = now.strftime("%Y-%m-%d-%H-%M-%S-%f")+"save"
+
+ options = Options()
+
+ # init directory
+ download_path = fr'C:\Users\radvanyi\PycharmProjects\ZSI_analytics\WOS\wos_extract\wos_downloads\entry_batches\{date_time}'
+ os.makedirs(download_path, exist_ok=True)
+ files = glob.glob(fr'{download_path}\*')
+ for f in files:
+ os.remove(f)
+
+ options.set_preference("browser.download.folderList", 2)
+ options.set_preference("browser.download.manager.showWhenStarting", False)
+ options.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv/xls")
+ options.set_preference("browser.download.dir", download_path)
+
+ with open(fr'{download_path}\query.txt', "w") as f:
+ f.write(query_str)
+
+ # options.headless = True
+ if debug==False:
+ options.add_argument('--headless')
+ driver = webdriver.Firefox(options=options)
+ driver.get("https://www.webofscience.com/")
+ driver.set_window_size(974, 1040)
+ try:
+ WebDriverWait(driver, 30).until(
+ expected_conditions.visibility_of_element_located((By.ID, "onetrust-reject-all-handler")))
+ driver.find_element(By.ID, "onetrust-reject-all-handler").click()
+ except:
+ close_pendo_windows(driver)
+ WebDriverWait(driver, 30).until(
+ expected_conditions.visibility_of_element_located((By.LINK_TEXT, "Advanced Search")))
+ WebDriverWait(driver, 30).until(
+ expected_conditions.invisibility_of_element_located((By.ID, "onetrust-pc-dark-filter ot-fade-in")))
+
+ print("Hoooold...")
+ time.sleep(2)
+ WebDriverWait(driver, 30).until(expected_conditions.element_to_be_clickable((By.LINK_TEXT, "Advanced Search")))
+ driver.find_element(By.LINK_TEXT, "Advanced Search").click()
+
+ WebDriverWait(driver, 30).until(expected_conditions.element_to_be_clickable((By.ID, "advancedSearchInputArea")))
+ driver.find_element(By.ID, "advancedSearchInputArea").click()
+ driver.find_element(By.ID, "advancedSearchInputArea").send_keys(query_str)
+ driver.find_element(By.CSS_SELECTOR, ".mat-menu-trigger > svg").click()
+ driver.find_element(By.CSS_SELECTOR, ".cdk-focused > span").click()
+
+ WebDriverWait(driver, 30).until(expected_conditions.visibility_of_element_located((By.CSS_SELECTOR, ".brand-blue")))
+ driver.execute_script("window.scrollTo(0,0)")
+ count_str = driver.find_element(By.CSS_SELECTOR, ".brand-blue").text
+ count_int = int(count_str.replace(",", "").replace(".", "").strip())
+ print(f'{count_int} records found! Here we go in {int(count_int / 300) + 1} steps...')
+ for i in tqdm(range(1, count_int - 300, 300), position=0, leave=True):
+ # print(f'records {i}-{i+299}')
+ if i == 1:
+ driver.find_element(By.XPATH, "//app-export-menu/div/button").click()
+ # driver.find_element(By.ID, "exportToExcelButton").click()
+ driver.find_element(By.ID, "exportToTabWinButton").click()
+ driver.find_element(By.CSS_SELECTOR, "#radio3 .mat-radio-outer-circle").click()
+ driver.find_element(By.NAME, "markTo").clear()
+ driver.find_element(By.NAME, "markTo").send_keys("300")
+ driver.find_element(By.CSS_SELECTOR, ".margin-top-5 > .dropdown").click()
+ driver.find_element(By.XPATH, "//span[contains(.,\'Full Record\')]").click()
+ driver.find_element(By.CSS_SELECTOR, ".mat-flat-button .ng-star-inserted").click()
+ WebDriverWait(driver, 30).until(expected_conditions.invisibility_of_element_located(
+ (By.CSS_SELECTOR, ".mat-flat-button .ng-star-inserted")))
+ time.sleep(random.gauss(wait_mu, wait_sigma))
+ else:
+ WebDriverWait(driver, 30).until(expected_conditions.invisibility_of_element_located(
+ (By.CSS_SELECTOR, ".mat-flat-button .ng-star-inserted")))
+
+ driver.find_element(By.XPATH, "//app-export-menu/div/button").click()
+ # driver.find_element(By.ID, "exportToExcelButton").click()
+ driver.find_element(By.ID, "exportToTabWinButton").click()
+ driver.find_element(By.CSS_SELECTOR, "#radio3 .mat-radio-container").click()
+ driver.find_element(By.NAME, "markFrom").clear()
+ driver.find_element(By.NAME, "markFrom").send_keys(f"{i}")
+ driver.find_element(By.NAME, "markTo").clear()
+ driver.find_element(By.NAME, "markTo").send_keys(f"{i + 299}")
+ driver.find_element(By.CSS_SELECTOR, ".margin-top-5 > .dropdown").click()
+ driver.find_element(By.XPATH, "//span[contains(.,\'Full Record\')]").click()
+ driver.find_element(By.CSS_SELECTOR, ".mat-flat-button .ng-star-inserted").click()
+
+ WebDriverWait(driver, 30).until(expected_conditions.invisibility_of_element_located(
+ (By.CSS_SELECTOR, ".mat-flat-button .ng-star-inserted")))
+ time.sleep(random.gauss(wait_mu, wait_sigma))
+
+ # Absolute path of a file
+ old_name = fr"{download_path}\savedrecs.txt"
+ new_name = fr"{download_path}\records_{i}_{i + 299}.txt"
+
+ # Renaming the file
+ os.rename(old_name, new_name)
+
+ if (i + 299) % count_int != 0:
+ print(f'final batch of {i + 300}-{count_int}')
+ WebDriverWait(driver, 30).until(expected_conditions.invisibility_of_element_located(
+ (By.CSS_SELECTOR, ".mat-flat-button .ng-star-inserted")))
+
+ driver.find_element(By.XPATH, "//app-export-menu/div/button").click()
+ # driver.find_element(By.ID, "exportToExcelButton").click()
+ driver.find_element(By.ID, "exportToTabWinButton").click()
+ driver.find_element(By.CSS_SELECTOR, "#radio3 .mat-radio-container").click()
+ driver.find_element(By.NAME, "markFrom").clear()
+ driver.find_element(By.NAME, "markFrom").send_keys(f"{i + 300}")
+ driver.find_element(By.NAME, "markTo").clear()
+ driver.find_element(By.NAME, "markTo").send_keys(f"{count_int}")
+ driver.find_element(By.CSS_SELECTOR, ".margin-top-5 > .dropdown").click()
+ driver.find_element(By.XPATH, "//span[contains(.,\'Full Record\')]").click()
+ driver.find_element(By.CSS_SELECTOR, ".mat-flat-button .ng-star-inserted").click()
+
+ WebDriverWait(driver, 30).until(expected_conditions.invisibility_of_element_located(
+ (By.CSS_SELECTOR, ".mat-flat-button .ng-star-inserted")))
+ time.sleep(random.gauss(wait_mu, wait_sigma))
+
+ # Absolute path of a file
+ old_name = fr"{download_path}\savedrecs.txt"
+ new_name = fr"{download_path}\records_{i + 300}_{count_int}.txt"
+
+ # Renaming the file
+ time.sleep(0.1)
+ os.rename(old_name, new_name)
+
+ time.sleep(2)
+ time.sleep(random.gauss(wait_mu, wait_sigma))
+ driver.close()
+
+def wos_fetch_yearly_output(query_str_list = (
+ "TS=\"web of science\" AND PY=(2008-2010)",
+ "TS=\"artificial intelligence\" AND PY=(2011-2022)"),
+ wait_mu=1, wait_sigma=0.2,debug=False):
+
+ # if isinstance(query_iterable,tuple) or
+
+ for query_str in tqdm(query_str_list):
+ options = Options()
+
+ # query_file_str = query_str.replace('"', '``')
+
+ now = datetime.now() # current date and time
+ date_time = now.strftime("%Y-%m-%d-%H-%M-%S-%f")+"save"
+
+ # init directory
+ download_path = fr'C:\Users\radvanyi\PycharmProjects\ZSI_analytics\WOS\wos_extract\wos_downloads\aggregated\{date_time}'
+ os.makedirs(download_path, exist_ok=True)
+ files = glob.glob(fr'{download_path}\*')
+ for f in files:
+ os.remove(f)
+
+ options.set_preference("browser.download.folderList", 2)
+ options.set_preference("browser.download.manager.showWhenStarting", False)
+ options.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv/xls")
+ options.set_preference("browser.download.dir", download_path)
+
+ with open(fr'{download_path}\query.txt', "w") as f:
+ f.write(query_str)
+
+ # options.headless = True
+ if debug == False:
+ options.add_argument('--headless')
+ driver = webdriver.Firefox(options=options)
+ driver.get("https://www.webofscience.com/")
+ driver.set_window_size(974, 1040)
+ try:
+ WebDriverWait(driver, 30).until(
+ expected_conditions.visibility_of_element_located((By.ID, "onetrust-reject-all-handler")))
+ driver.find_element(By.ID, "onetrust-reject-all-handler").click()
+ except:
+ close_pendo_windows(driver)
+ WebDriverWait(driver, 30).until(
+ expected_conditions.visibility_of_element_located((By.LINK_TEXT, "Advanced Search")))
+ WebDriverWait(driver, 30).until(
+ expected_conditions.invisibility_of_element_located((By.ID, "onetrust-pc-dark-filter ot-fade-in")))
+
+ # print("Hoooold...")
+ time.sleep(2)
+ WebDriverWait(driver, 30).until(expected_conditions.element_to_be_clickable((By.LINK_TEXT, "Advanced Search")))
+ driver.find_element(By.LINK_TEXT, "Advanced Search").click()
+
+ WebDriverWait(driver, 30).until(expected_conditions.element_to_be_clickable((By.ID, "advancedSearchInputArea")))
+ driver.find_element(By.ID, "advancedSearchInputArea").click()
+ driver.find_element(By.ID, "advancedSearchInputArea").send_keys(query_str)
+ driver.find_element(By.CSS_SELECTOR, ".mat-menu-trigger > svg").click()
+ driver.find_element(By.CSS_SELECTOR, ".cdk-focused > span").click()
+
+ WebDriverWait(driver, 30).until(
+ expected_conditions.visibility_of_element_located((By.CSS_SELECTOR, ".brand-blue")))
+ driver.execute_script("window.scrollTo(0,0)")
+ count_str = driver.find_element(By.CSS_SELECTOR, ".brand-blue").text
+ count_int = int(count_str.replace(",", "").replace(".", "").strip())
+ # print(f'{count_int} records found!')
+
+ driver.find_element(By.XPATH, "//span[contains(.,\'Analyze Results\')]").click()
+ # element = driver.find_element(By.CSS_SELECTOR, ".search-terms")
+ # actions = ActionChains(driver)
+ # actions.move_to_element(element).perform()
+ driver.find_element(By.CSS_SELECTOR, "#snSelectCategories svg").click()
+ driver.find_element(By.XPATH, "//span[contains(.,\'Publication Years\')]").click()
+ driver.find_element(By.XPATH, "//mat-radio-button[@id=\'mat-radio-3\']/label/span/span").click()
+ driver.find_element(By.XPATH, "//span[contains(.,\'Download data table\')]").click()
+
+ # Absolute path of a file
+ old_name = fr"{download_path}\analyze.txt"
+ new_name = fr'{download_path}\analyze_PY_{date_time}_.txt'
+
+ # Renaming the file
+ time.sleep(2)
+ os.rename(old_name, new_name)
+ time.sleep(random.gauss(wait_mu, wait_sigma))
+ driver.close()
+
+if __name__ == '__main__':
+ wos_fetch_entries(debug=False)
+ wos_fetch_yearly_output(debug=False)
\ No newline at end of file
diff --git a/WOS/wos_processing.ipynb b/WOS/wos_processing.ipynb
index 0fe5e0b..03e92de 100644
--- a/WOS/wos_processing.ipynb
+++ b/WOS/wos_processing.ipynb
@@ -2,20 +2,196 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
- "metadata": {},
+ "execution_count": 35,
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import os\n",
"import shutil\n",
- "from flashgeotext.geotext import GeoText"
- ]
+ "from flashgeotext.geotext import GeoText\n",
+ "import re\n",
+ "import spacy"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "I like salty fries and hamburgers. <-> Fast food tastes very good. 0.691649353055761\n",
+ "salty fries <-> hamburgers 0.6938489675521851\n"
+ ]
+ }
+ ],
+ "source": [
+ "import spacy\n",
+ "\n",
+ "nlp = spacy.load(\"en_core_web_md\") # make sure to use larger package!\n",
+ "doc1 = nlp(\"I like salty fries and hamburgers.\")\n",
+ "doc2 = nlp(\"Fast food tastes very good.\")\n",
+ "\n",
+ "# Similarity of two documents\n",
+ "print(doc1, \"<->\", doc2, doc1.similarity(doc2))\n",
+ "# Similarity of tokens and spans\n",
+ "french_fries = doc1[2:4]\n",
+ "burgers = doc1[5]\n",
+ "print(french_fries, \"<->\", burgers, french_fries.similarity(burgers))"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "I\n",
+ "salty fry\n",
+ "hamburger\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": "[None, None, None]"
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "[print(i.lemma_) for i in doc1.noun_chunks]"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "outputs": [],
+ "source": [
+ "doc_test = nlp(\"On the inevitability of neural networks and other tasty topics of the 21st century\")"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "['the inevitability',\n 'neural network',\n 'other tasty topic',\n 'the 21st century']"
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "[i.lemma_ for i in doc_test.noun_chunks]"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 24,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "(300,)"
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "doc1.vector.shape"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": "\"tokens = []\\nlemma = []\\npos = []\\n\\nfor doc in nlp.pipe(df['species'].astype('unicode').values, batch_size=50,\\n n_threads=3):\\n if doc.is_parsed:\\n tokens.append([n.text for n in doc])\\n lemma.append([n.lemma_ for n in doc])\\n pos.append([n.pos_ for n in doc])\\n else:\\n # We want to make sure that the lists of parsed results have the\\n # same number of entries of the original Dataframe, so add some blanks in case the parse fails\\n tokens.append(None)\\n lemma.append(None)\\n pos.append(None)\\n\\ndf['species_tokens'] = tokens\\ndf['species_lemma'] = lemma\\ndf['species_pos'] = pos\""
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#spacy pipe example\n",
+ "\"\"\"tokens = []\n",
+ "lemma = []\n",
+ "pos = []\n",
+ "\n",
+ "for doc in nlp.pipe(df['species'].astype('unicode').values, batch_size=50,\n",
+ " n_threads=3):\n",
+ " if doc.is_parsed:\n",
+ " tokens.append([n.text for n in doc])\n",
+ " lemma.append([n.lemma_ for n in doc])\n",
+ " pos.append([n.pos_ for n in doc])\n",
+ " else:\n",
+ " # We want to make sure that the lists of parsed results have the\n",
+ " # same number of entries of the original Dataframe, so add some blanks in case the parse fails\n",
+ " tokens.append(None)\n",
+ " lemma.append(None)\n",
+ " pos.append(None)\n",
+ "\n",
+ "df['species_tokens'] = tokens\n",
+ "df['species_lemma'] = lemma\n",
+ "df['species_pos'] = pos\"\"\""
+ ],
+ "metadata": {
+ "collapsed": false,
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
@@ -34,7 +210,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
@@ -43,7 +219,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
@@ -66,14 +242,14 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": "0 Publication Type\n1 Authors\n2 Book Authors\n3 Book Editors\n4 Book Group Authors\n ... \n76 SubField_English\n77 2.00 SEQ\n78 Source_title\n79 srcid\n80 issn_type\nLength: 81, dtype: object"
},
- "execution_count": 5,
+ "execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
@@ -84,14 +260,14 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": "0 Salucci, Marco/S-8654-2016; Arrebola, Manuel/L...\n9714 Huang, Yu/AAY-5464-2020\n9697 Kakavand, Mohammad Reza Azadi/X-9556-2019; Fen...\n9699 Dong, Sheng/AAE-3619-2021; Soares, Carlos Gued...\n9701 Han, Guoqi/T-7365-2019; Nan, Yang/HKD-9687-202...\n ... \n3066 ; Liotta, Antonio/G-9532-2014\n5097 , 卢帅/AAK-2185-2020; Popp, József/AFN-1250-2022\n11369 NaN\n11368 Rossiter, D G/D-3842-2009\n11362 Jin, Shuanggen/B-8094-2008\nName: Researcher Ids, Length: 9889, dtype: object"
},
- "execution_count": 6,
+ "execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
@@ -102,7 +278,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 31,
"metadata": {},
"outputs": [
{
@@ -110,7 +286,7 @@
"text/plain": " Publication Type Authors \n16979 J Zhang, MS; Huang, J; Cao, Y; Xiong, CH; Mohamm... \\\n1880 J Zhang, MS; Huang, J; Cao, Y; Xiong, CH; Mohamm... \n\n Book Authors Book Editors Book Group Authors \n16979 NaN NaN NaN \\\n1880 NaN NaN NaN \n\n Author Full Names \n16979 Zhang, Mengshi; Huang, Jian; Cao, Yu; Xiong, C... \\\n1880 Zhang, Mengshi; Huang, Jian; Cao, Yu; Xiong, C... \n\n Book Author Full Names Group Authors \n16979 NaN NaN \\\n1880 NaN NaN \n\n Article Title \n16979 Echo State Network-Enhanced Super-Twisting Con... \\\n1880 Echo State Network-Enhanced Super-Twisting Con... \n\n Source Title ... Web of Science Record \n16979 IEEE-ASME TRANSACTIONS ON MECHATRONICS ... 0 \\\n1880 IEEE-ASME TRANSACTIONS ON MECHATRONICS ... 0 \n\n issn_var issn Domain_English Field_English \n16979 issn 10834435 Applied Sciences Engineering \\\n1880 issn 10834435 Applied Sciences Engineering \n\n SubField_English 2.00 SEQ \n16979 Industrial Engineering & Automation 27 \\\n1880 Industrial Engineering & Automation 27 \n\n Source_title srcid issn_type \n16979 IEEE/ASME Transactions on Mechatronics 19113.0 issn1 \n1880 IEEE/ASME Transactions on Mechatronics 19113.0 issn1 \n\n[2 rows x 81 columns]",
"text/html": "
\n\n
\n \n
\n
\n
Publication Type
\n
Authors
\n
Book Authors
\n
Book Editors
\n
Book Group Authors
\n
Author Full Names
\n
Book Author Full Names
\n
Group Authors
\n
Article Title
\n
Source Title
\n
...
\n
Web of Science Record
\n
issn_var
\n
issn
\n
Domain_English
\n
Field_English
\n
SubField_English
\n
2.00 SEQ
\n
Source_title
\n
srcid
\n
issn_type
\n
\n \n \n
\n
16979
\n
J
\n
Zhang, MS; Huang, J; Cao, Y; Xiong, CH; Mohamm...
\n
NaN
\n
NaN
\n
NaN
\n
Zhang, Mengshi; Huang, Jian; Cao, Yu; Xiong, C...
\n
NaN
\n
NaN
\n
Echo State Network-Enhanced Super-Twisting Con...
\n
IEEE-ASME TRANSACTIONS ON MECHATRONICS
\n
...
\n
0
\n
issn
\n
10834435
\n
Applied Sciences
\n
Engineering
\n
Industrial Engineering & Automation
\n
27
\n
IEEE/ASME Transactions on Mechatronics
\n
19113.0
\n
issn1
\n
\n
\n
1880
\n
J
\n
Zhang, MS; Huang, J; Cao, Y; Xiong, CH; Mohamm...
\n
NaN
\n
NaN
\n
NaN
\n
Zhang, Mengshi; Huang, Jian; Cao, Yu; Xiong, C...
\n
NaN
\n
NaN
\n
Echo State Network-Enhanced Super-Twisting Con...
\n
IEEE-ASME TRANSACTIONS ON MECHATRONICS
\n
...
\n
0
\n
issn
\n
10834435
\n
Applied Sciences
\n
Engineering
\n
Industrial Engineering & Automation
\n
27
\n
IEEE/ASME Transactions on Mechatronics
\n
19113.0
\n
issn1
\n
\n \n
\n
2 rows × 81 columns
\n
"
},
- "execution_count": 7,
+ "execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
@@ -121,14 +297,14 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 32,
"outputs": [
{
"data": {
"text/plain": " UT (Unique WOS ID) Keywords Plus \n0 WOS:000852293800024 CONVOLUTIONAL NEURAL-NETWORK; DEEP LEARNING FR... \\\n9714 WOS:000540750000002 STATE-SPACE RECONSTRUCTION; SURFACE AIR-TEMPER... \n9697 WOS:000600708400002 COMPRESSIVE STRENGTH; MODELS; ADABOOST.RT; DUC... \n9699 WOS:000511965100005 STRUCTURAL RELIABILITY; FAILURE MODES \n9701 WOS:000663142500003 REFLECTED GPS SIGNALS; SOIL-MOISTURE; OCEAN; S... \n... ... ... \n3066 WOS:000528727500074 LOCAL SEARCH; ALGORITHM; VARIANCE; MODEL \n5097 WOS:000596139400001 INDUSTRY 4.0; MANAGEMENT; RISK; ANALYTICS; CHA... \n11369 WOS:000436774300069 NaN \n11368 WOS:000846290700001 PARTIAL LEAST-SQUARES; INFRARED-SPECTROSCOPY; ... \n11362 WOS:000480527800025 MICROWAVE DIELECTRIC BEHAVIOR; GPS SIGNALS; RE... \n\n Author Keywords \n0 Imaging; Three-dimensional displays; Electroma... \\\n9714 NaN \n9697 Plastic hinge length; RC columns; Machine lear... \n9699 system reliability; jacket platform; beta-unzi... \n9701 Cyclone GNSS (CYGNSS); Sea surface wind speed;... \n... ... \n3066 sea surface temperature; sea surface temperatu... \n5097 Big data finance; Big data in financial servic... \n11369 planetary gear; fault diagnosis; VMD; center f... \n11368 soil fertility class; reflectance spectroscopy... \n11362 global navigation satellite system (GNSS)-refl... \n\n Article Title \n0 Artificial Intelligence: New Frontiers in Real... \\\n9714 Detecting causality from time series in a mach... \n9697 Data-Driven Approach to Predict the Plastic Hi... \n9699 System Reliability Analysis of an Offshore Jac... \n9701 Analysis of coastal wind speed retrieval from ... \n... ... \n3066 Improved Particle Swarm Optimization for Sea S... \n5097 Current landscape and influence of big data on... \n11369 Planetary Gear Fault Diagnosis via Feature Ima... \n11368 How Well Can Reflectance Spectroscopy Allocate... \n11362 GNSS-R Soil Moisture Retrieval Based on a XGbo... \n\n Abstract \n0 In recent years, artificial intelligence (AI) ... \n9714 Detecting causality from observational data is... \n9697 Inelastic response of reinforced concrete colu... \n9699 This study investigates strategies for solving... \n9701 This paper demonstrates the capability and per... \n... ... \n3066 The Sea Surface Temperature (SST) is one of th... \n5097 Big data is one of the most recent business an... \n11369 Poor working environment leads to frequent fai... \n11368 Fertilization decisions depend on the measurem... \n11362 Global navigation satellite system (GNSS)-refl... \n\n[9889 rows x 5 columns]",
"text/html": "