diff --git a/WOS/wos_analysis/wos_analyses.ipynb b/WOS/wos_analysis/wos_analyses.ipynb
index 6ee3ade..f9cd4dd 100644
--- a/WOS/wos_analysis/wos_analyses.ipynb
+++ b/WOS/wos_analysis/wos_analyses.ipynb
@@ -238094,7 +238094,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": "
"
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -239236,7 +239236,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -240285,7 +240285,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -243263,7 +243263,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -246137,7 +246137,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -248389,7 +248389,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -250526,7 +250526,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -250662,7 +250662,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "C:\\Users\\radvanyi\\AppData\\Local\\Temp\\ipykernel_20376\\1606125869.py:24: SettingWithCopyWarning:\n",
+ "C:\\Users\\radvanyi\\AppData\\Local\\Temp\\ipykernel_13036\\1606125869.py:24: SettingWithCopyWarning:\n",
"\n",
"\n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
@@ -250670,7 +250670,7 @@
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
"\n",
- "C:\\Users\\radvanyi\\AppData\\Local\\Temp\\ipykernel_20376\\1606125869.py:24: SettingWithCopyWarning:\n",
+ "C:\\Users\\radvanyi\\AppData\\Local\\Temp\\ipykernel_13036\\1606125869.py:24: SettingWithCopyWarning:\n",
"\n",
"\n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
@@ -250678,7 +250678,7 @@
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
"\n",
- "C:\\Users\\radvanyi\\AppData\\Local\\Temp\\ipykernel_20376\\1606125869.py:24: SettingWithCopyWarning:\n",
+ "C:\\Users\\radvanyi\\AppData\\Local\\Temp\\ipykernel_13036\\1606125869.py:24: SettingWithCopyWarning:\n",
"\n",
"\n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
@@ -251983,7 +251983,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -254032,7 +254032,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -256045,7 +256045,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -262809,7 +262809,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -269623,7 +269623,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -273697,7 +273697,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -278067,7 +278067,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -282558,7 +282558,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -287072,7 +287072,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -288332,7 +288332,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -289368,7 +289368,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -290332,7 +290332,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -292114,7 +292114,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -293847,7 +293847,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -295576,7 +295576,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -297312,7 +297312,7 @@
"plotlyServerURL": "https://plotly.com"
}
},
- "text/html": ""
+ "text/html": ""
},
"metadata": {},
"output_type": "display_data"
@@ -299127,7 +299127,7 @@
},
{
"cell_type": "code",
- "execution_count": 81,
+ "execution_count": 76,
"outputs": [],
"source": [
"%%capture\n",
@@ -299630,7 +299630,7 @@
" data = np.where(mask,inst_co_occur,inst_co_occur)\n",
"\n",
" fig = px.imshow(data,\n",
- " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),\n",
+ " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),text_auto=True,\n",
" x=list(inst_co_occur.columns),\n",
" y=list(inst_co_occur.index), title=f\"Top {TOPN} institutes within Europe ({t})\"\n",
" )\n",
@@ -299683,7 +299683,7 @@
" mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n",
" data = np.where(mask,inst_co_occur,inst_co_occur)\n",
" fig = px.imshow(data,\n",
- " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),\n",
+ " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),text_auto=True,\n",
" x=list(inst_co_occur.columns),\n",
" y=list(inst_co_occur.index), title=f\"Top {TOPN} institutes within EU-28 ({t})\"\n",
" )\n",
@@ -299721,6 +299721,862 @@
"metadata": {
"collapsed": false
}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "outputs": [],
+ "source": [
+ "%%capture\n",
+ "# Trending fields\n",
+ "# Build environment & design\n",
+ "\n",
+ "# Adding trending subfields\n",
+ "# Applied sciences\n",
+ "trending_topics = [\"Distributed Computing\", \"Nanoscience & Nanotechnology\", \"Building & Construction\"]\n",
+ "\n",
+ "# Natural Sciences -> Analytical chemistry\n",
+ "\n",
+ "trending_topics = [\"Distributed Computing\", \"Nanoscience & Nanotechnology\", \"Building & Construction\",\"Analytical Chemistry\"]\n",
+ "for t in trending_topics:\n",
+ " os.makedirs(rf'plot_html/PPT_plots/trending_topics/{t}',exist_ok=True)\n",
+ "\n",
+ " if t == \"Analytical Chemistry\":\n",
+ " subset = \"Natural Sciences\"\n",
+ " else:\n",
+ " subset = \"Applied Sciences\"\n",
+ "\n",
+ "\n",
+ " id_subset = wos[((wos[\"Domain_English\"]==subset)&\n",
+ " (wos[\"SubField_English\"]==t))][record_col].unique()\n",
+ "\n",
+ " data = (wos[wos[record_col].isin(id_subset)]\n",
+ " .groupby(['Publication Year','SubField_English'],)[record_col].nunique(dropna=False).unstack()\n",
+ " .fillna(0)\n",
+ " .stack()\n",
+ " .reset_index()\n",
+ " .rename(columns={0:record_col}))\n",
+ " print(data)\n",
+ "\n",
+ " data = data.merge(wos[wos[record_col].isin(id_subset)][[\"Domain_English\",'SubField_English']].drop_duplicates(),on=\"SubField_English\")\n",
+ "\n",
+ " data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset='SubField_English'),\n",
+ " on='SubField_English', suffixes=[None,\"_relative_growth\"])\n",
+ " data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]\n",
+ "\n",
+ " data = data.sort_values(by =[\"SubField_English\",\"Publication Year\"], ascending=[True,True])\n",
+ " data[record_col+\"_cumsum\"] = (data.groupby('SubField_English',as_index=False)[record_col].cumsum())\n",
+ "\n",
+ " # country contributions\n",
+ " wos_univ_locations = wos_univ[wos_univ[record_col].isin(id_subset)].merge(wos_country_types, on=\"Country\")\n",
+ " wos_collabs = wos_univ_locations[wos_univ_locations[\"Country_Type\"]!=\"Other\"][[record_col,\"Country\"]].drop_duplicates()\n",
+ "\n",
+ " collab_desc = wos_collabs[wos_collabs[\"Country\"]!=\"China\"][\"Country\"].value_counts().reset_index()\n",
+ " collab_desc[\"percent_of_copubs\"] = collab_desc[\"count\"]/wos_collabs[record_col].nunique()#*100\n",
+ " collab_desc[\"percent_contrib_in_copubs\"] = collab_desc[\"count\"]/wos_collabs[record_col].size#*100\n",
+ " collab_desc = collab_desc.merge(wos_country_types, on=\"Country\")\n",
+ " # collab_desc\n",
+ "\n",
+ " c_dict = {\"count\":\"Number of co-publications\",\n",
+ " \"percent_of_copubs\":\"Percent of co-publications\",\n",
+ " \"percent_contrib_in_copubs\":\"Contribution to co-publications\"}\n",
+ "\n",
+ " color_discrete_map= {'China': '#EF553B',\n",
+ " 'EU': '#636EFA',\n",
+ " 'Non-EU associate': '#00CC96'}\n",
+ "\n",
+ " fig_dict = dict()\n",
+ " for c in c_dict.keys():\n",
+ " data = collab_desc[[\"Country\",c,\"Country_Type\"]]\n",
+ " data[\"ISO3\"] = cc.pandas_convert(series=data[\"Country\"], to='ISO3')\n",
+ " col_by=\"Country_Type\"\n",
+ " y_lab=\"ISO3\"\n",
+ " fig = px.bar(data, x=c, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,text_auto=True,\n",
+ " labels=dict({\n",
+ " record_col: 'Number of co-publications',\n",
+ " \"Institution_harm\": \"Institution\",\n",
+ " \"Institution_harm_label\": \"Institution\",\n",
+ " \"Country_Type\":\"Country type\",\n",
+ " \"Eurovoc_Class\":\"Region\"\n",
+ " },**c_dict),\n",
+ " title=c_dict[c], template='plotly')\n",
+ " fig.update_layout(xaxis_tickformat='d',font_family=\"Montserrat\",\n",
+ " yaxis={'categoryorder':'total ascending'},\n",
+ " width=1000, height=1000,)\n",
+ " if \"percent\" in c:\n",
+ " fig.update_traces(hovertemplate='%{y}
%{x}')\n",
+ " fig.update_xaxes(tickformat=\".1%\")\n",
+ " else:\n",
+ " fig.update_traces(hovertemplate='%{y}
%{x:d}')\n",
+ " fig_dict[c] = go.Figure(fig)\n",
+ "\n",
+ " figsuper = make_subplots(rows=1, cols=3, subplot_titles =list(c_dict.values()))\n",
+ " for i,f in enumerate(fig_dict.keys()):\n",
+ " sfig = fig_dict[f]\n",
+ " for trace in list(sfig.select_traces()):\n",
+ " trace.showlegend=False\n",
+ " figsuper.add_trace(trace,\n",
+ " row=1, col=i+1)\n",
+ "\n",
+ " figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative',yaxis2={'categoryorder':'total ascending'},yaxis3={'categoryorder':'total ascending'})\n",
+ " figsuper.update_yaxes(\n",
+ " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
+ " ticks=\"outside\")\n",
+ " figsuper.update_xaxes(\n",
+ " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
+ " ticks=\"outside\")\n",
+ " figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n",
+ " figsuper['layout']['xaxis1'].update(tickformat=\".0f\")\n",
+ " figsuper['layout']['xaxis2'].update(tickformat=\".1%\")\n",
+ " figsuper['layout']['xaxis3'].update(tickformat=\".1%\")\n",
+ " figsuper['layout'][\"font\"][\"size\"]=12\n",
+ " for a in figsuper['layout'][\"annotations\"]:\n",
+ " a[\"font\"][\"size\"] = 14\n",
+ " figsuper[\"layout\"][\"yaxis\"][\"tickfont\"][\"size\"] = 10\n",
+ " figsuper[\"layout\"][\"yaxis2\"][\"tickfont\"][\"size\"] = 10\n",
+ " figsuper[\"layout\"][\"yaxis3\"][\"tickfont\"][\"size\"] = 10\n",
+ "\n",
+ " figsuper.update_layout(uniformtext_minsize=10)\n",
+ " figsuper.update_layout(title=f\"Contribution of european countries ({t})\")\n",
+ " # figsuper.write_html(f\"plot_html/{cat}/{cat}_europe_contribution_bar.html\",config= dict(displayModeBar = False, responsive = True))\n",
+ "\n",
+ " figsuper_ppt = go.Figure(figsuper)\n",
+ "\n",
+ " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n",
+ " for a in figsuper_ppt['layout'][\"annotations\"]:\n",
+ " a[\"font\"][\"size\"] = 22\n",
+ " figsuper_ppt.show()\n",
+ " figsuper_ppt.update_yaxes(tickfont=dict(size=18))\n",
+ " figsuper_ppt.write_image(f\"plot_html/PPT_plots/trending_topics/{t}/{t}_europe_contribution_bar.png\",height=900,width=1600,scale = 4)\n",
+ "\n",
+ "\n",
+ " # intraeurope collabs\n",
+ " wos_collabs_EU = wos_univ_locations[~wos_univ_locations[\"Country_Type\"].isin([\"Other\",\"China\"])][[record_col,\"Country\"]].drop_duplicates()\n",
+ " wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)\n",
+ " EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique').fillna(0).astype(int)\n",
+ "\n",
+ "\n",
+ " eu_list = wos_collabs_EU.groupby(['Country_x'])[record_col].count().sort_values(ascending=False).index\n",
+ "\n",
+ " EU_co_occur = EU_co_occur.reindex(index = eu_list, columns=eu_list)\n",
+ "\n",
+ " # Generate a mask for the upper triangle\n",
+ " mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))\n",
+ " data = np.where(mask,None,EU_co_occur)\n",
+ "\n",
+ " fig = px.imshow(data,\n",
+ " labels=dict(x=\"Country\", y=\"Country\", color=\"Co-publication with China\"),\n",
+ " x=list(EU_co_occur.columns),\n",
+ " y=list(EU_co_occur.index), title=f\"Intraeuropean patterns: Co-occurences of countries in chinese co-publications ({t})\"\n",
+ " )\n",
+ " fig.update_layout(\n",
+ " width=1000, height=1000,\n",
+ " xaxis_showgrid=False,\n",
+ " yaxis_showgrid=False,\n",
+ " yaxis_autorange='reversed', template='plotly_white',font_family=\"Montserrat\",)\n",
+ " # fig.update_traces(hovertemplate='%{y}
%{x}
Co-publications: %{hovertext}')\n",
+ " fig.update_xaxes(tickangle= -90)\n",
+ " fig.update_yaxes(\n",
+ " ticks=\"outside\")\n",
+ " fig.update_xaxes(\n",
+ " ticks=\"outside\")\n",
+ " # fig.write_html(f\"plot_html/{cat}/{cat}_intraeurope_collabs.html\",config= dict(displayModeBar = False, responsive = True))\n",
+ " figsuper_ppt = go.Figure(fig)\n",
+ "\n",
+ " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n",
+ " for a in figsuper_ppt['layout'][\"annotations\"]:\n",
+ " a[\"font\"][\"size\"] = 22\n",
+ "\n",
+ " s=16\n",
+ " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n",
+ " figsuper_ppt.update_xaxes(tickfont=dict(size=s),tickangle=45)\n",
+ " figsuper_ppt.write_image(f\"plot_html/PPT_plots/trending_topics/{t}/{t}_intraeurope_collabs.png\",height=900,width=1600,scale = 4)\n",
+ "\n",
+ "\n",
+ " # country trends\n",
+ " collab_year = wos_collabs[wos_collabs[\"Country\"]!=\"China\"].copy()\n",
+ " collab_year = collab_year.merge(wos_country_types, on=\"Country\").merge(wos[[record_col,\"Publication Year\"]],on=record_col).drop_duplicates()\n",
+ "\n",
+ " data = (collab_year.groupby(['Publication Year',\"Country\"])[record_col]\n",
+ " .nunique(dropna=False).unstack()\n",
+ " .fillna(0)\n",
+ " .stack()\n",
+ " .reset_index()\n",
+ " .rename(columns={0:record_col}))\n",
+ " data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset=\"Country\"),\n",
+ " on=[\"Country\"], suffixes=[None,\"_relative_growth\"])\n",
+ " data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]\n",
+ " data = data.sort_values(by =[\"Country\",\"Publication Year\"], ascending=[True,True])\n",
+ " data[record_col+\"_cumsum\"] = (data.groupby('Country',as_index=False)[record_col].cumsum())\n",
+ " data = data.merge(wos_country_types, on='Country')\n",
+ "\n",
+ " yearsum = collab_year.groupby(\"Publication Year\")[record_col].nunique().reset_index().rename(columns={record_col:\"year_unique\"})\n",
+ " data = data.merge(yearsum, on=\"Publication Year\")\n",
+ " data[\"pub_output_percent\"] = data[record_col]/data[\"year_unique\"]\n",
+ " data[\"ISO3\"] = cc.pandas_convert(series=data[\"Country\"], to='ISO3')\n",
+ "\n",
+ "\n",
+ " fig = px.choropleth(data[data[\"Publication Year\"] == 2022], locations=\"ISO3\", color=record_col+\"_cumsum\", hover_name=\"Country\",\n",
+ " scope=\"europe\", template='plotly',\n",
+ " range_color=[data[record_col+\"_cumsum\"].min(),data[record_col+\"_cumsum\"].max()],hover_data=[\"Eurovoc_Class\"])\n",
+ "\n",
+ " fig.update_traces(hovertemplate='%{hovertext}'\n",
+ " '
Region: %{customdata[0]}
'\n",
+ " 'Co-pubications: %{z:d}')\n",
+ "\n",
+ " cumsum_country = go.Figure(fig)\n",
+ "\n",
+ " figsuper = make_subplots(rows=3, cols=2, subplot_titles=[\"Number of publications (2022)\",\"Cumulative number of co-publications\",\n",
+ " \"Yearly output of co-publications\",\"Relative growth of co-publications\"],\n",
+ " specs=[\n",
+ " [{\"type\": \"geo\", \"rowspan\":3}, {\"type\": \"xy\"}],\n",
+ " [None,{\"type\": \"xy\"}],\n",
+ " [None, {\"type\": \"xy\"}]\n",
+ " ])\n",
+ "\n",
+ " for trace in list(cumsum_country.select_traces()):\n",
+ " figsuper.add_trace(trace,\n",
+ " row=1, col=1\n",
+ " )\n",
+ "\n",
+ " fig = px.area(data.sort_values(ascending=True, by='Publication Year'), y=record_col+\"_cumsum\",\n",
+ " x='Publication Year',\n",
+ " color=\"Eurovoc_Class\",\n",
+ " line_group=\"Country\",\n",
+ " labels={\n",
+ " record_col: 'Number of co-publications',\n",
+ " \"Eurovoc_Class\": \"Region\"\n",
+ " },\n",
+ " title=\"Cumulative number of co-publications\",\n",
+ " hover_name= \"Country\")\n",
+ " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Co-publications: %{y}')\n",
+ "\n",
+ " for trace in list(fig.select_traces()):\n",
+ " figsuper.add_trace(trace,\n",
+ " row=1, col=2\n",
+ " )\n",
+ "\n",
+ "\n",
+ " fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n",
+ " y=record_col,\n",
+ " x='Publication Year',\n",
+ " color=\"Eurovoc_Class\",\n",
+ " line_group=\"Country\",\n",
+ " markers=True,\n",
+ " labels={\n",
+ " record_col: 'Number of co-publications',\n",
+ " \"Eurovoc_Class\": \"Region\"\n",
+ " },\n",
+ " title=\"Yearly output of co-publications\",hover_name= \"Country\")\n",
+ " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Co-publications: %{y}')\n",
+ "\n",
+ " for trace in list(fig.select_traces()):\n",
+ " trace.showlegend=False\n",
+ " figsuper.add_trace(trace,\n",
+ " row=2, col=2\n",
+ " )\n",
+ "\n",
+ " fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n",
+ " y=record_col+\"_relative_growth\",\n",
+ " x='Publication Year',\n",
+ " color=\"Eurovoc_Class\",line_group=\"Country\",markers=True,\n",
+ " labels={\n",
+ " record_col+\"_relative_growth\": 'Relative growth of co-publications (%)',\"Eurovoc_Class\": \"Region\"\n",
+ " },\n",
+ " title=\"Relative growth of co-publications\", template='plotly',hover_name= \"Country\")\n",
+ " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Relative growth: %{y}%')\n",
+ " fig.add_shape(\n",
+ " # Rectangle with reference to the plot\n",
+ " type=\"rect\",\n",
+ " xref=\"paper\",\n",
+ " yref=\"paper\",\n",
+ " x0=0,\n",
+ " y0=0,\n",
+ " x1=1.0,\n",
+ " y1=1.0,\n",
+ " line=dict(\n",
+ " color=\"black\",\n",
+ " width=0.5,\n",
+ " )\n",
+ " )\n",
+ "\n",
+ " for trace in list(fig.select_traces()):\n",
+ " trace.showlegend=False\n",
+ " figsuper.add_trace(trace,\n",
+ " row=3, col=2\n",
+ " )\n",
+ "\n",
+ " figsuper.update_yaxes(\n",
+ " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
+ " ticks=\"outside\")\n",
+ " figsuper.update_xaxes(\n",
+ " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
+ " ticks=\"outside\")\n",
+ " figsuper.update_layout({'template':\"plotly\"})\n",
+ " figsuper.update_layout(font_family=\"Montserrat\")\n",
+ " figsuper.layout[\"geo\"][\"scope\"] = 'europe'\n",
+ " figsuper.update_coloraxes(colorbar=dict(lenmode='fraction',len=0.55, orientation=\"v\",yanchor='top', title=\"Co-publications\",\n",
+ " ticks=\"outside\", ticksuffix=\" \",outlinewidth=0.5))\n",
+ " for i in[\"xaxis\",\"xaxis2\",\"xaxis3\"]:\n",
+ " figsuper['layout'][f'{i}'][\"range\"] = [2010.8,2022.2]\n",
+ " # figsuper.write_html(f\"plot_html/{cat}/{cat}_country_trends_overall.html\",config= dict(displayModeBar = False, responsive = True))\n",
+ "\n",
+ " figsuper_ppt = go.Figure(figsuper)\n",
+ "\n",
+ " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n",
+ " for a in figsuper_ppt['layout'][\"annotations\"]:\n",
+ " a[\"font\"][\"size\"] = 22\n",
+ " figsuper_ppt.show()\n",
+ " figsuper_ppt.update_yaxes(tickfont=dict(size=18))\n",
+ " figsuper_ppt.write_image(f\"plot_html/PPT_plots/trending_topics/{t}/{t}_country_trends_overall.png\",height=900,width=1600,scale = 4)\n",
+ "\n",
+ " fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n",
+ " y=record_col,\n",
+ " x='Publication Year',\n",
+ " color=\"Eurovoc_Class\",\n",
+ " line_group=\"Country\",facet_col=\"Country\",facet_col_wrap=6,category_orders={\"Country\": sorted(data[\"Country\"].unique())},\n",
+ " markers=True,\n",
+ " labels={\n",
+ " record_col: 'Number of co-publications',\n",
+ " \"Eurovoc_Class\": \"Region\"\n",
+ " },\n",
+ " title=f\"Yearly output of co-publications ({t})\",hover_name= \"Country\")\n",
+ " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Co-publications: %{y}')\n",
+ " fig.update_yaxes(\n",
+ " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
+ " ticks=\"outside\")\n",
+ " fig.update_xaxes(\n",
+ " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
+ " ticks=\"outside\")\n",
+ " fig.update_layout({'template':\"plotly\"})\n",
+ " fig.update_layout(font_family=\"Montserrat\")\n",
+ " fig.update_yaxes(title='')\n",
+ " fig.update_xaxes(title='')\n",
+ " fig.for_each_annotation(lambda a: a.update(text=a.text.split(\"=\")[-1]))\n",
+ " fig.show(config= dict(displayModeBar = False, responsive = True))\n",
+ " figsuper_ppt = go.Figure(fig)\n",
+ "\n",
+ " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n",
+ " for a in figsuper_ppt['layout'][\"annotations\"]:\n",
+ " a[\"font\"][\"size\"] = 22\n",
+ "\n",
+ " s=16\n",
+ " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n",
+ " figsuper_ppt.update_xaxes(tickfont=dict(size=s),tickangle=45)\n",
+ " figsuper_ppt.write_image(f\"plot_html//PPT_plots/trending_topics/{t}/{t}_country_year_trends.png\",height=900,width=1600,scale = 4)\n",
+ "\n",
+ "\n",
+ " fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n",
+ " y=record_col+\"_relative_growth\",\n",
+ " x='Publication Year',\n",
+ " color=\"Eurovoc_Class\",line_group=\"Country\",markers=True,facet_col=\"Country\",facet_col_wrap=6,category_orders={\"Country\": sorted(data[\"Country\"].unique())},\n",
+ " labels={\n",
+ " record_col+\"_relative_growth\": 'Relative growth of co-publications (%)',\"Eurovoc_Class\": \"Region\"\n",
+ " },\n",
+ " title=f\"Relative growth of co-publication output ({t})\", template='plotly',hover_name= \"Country\")\n",
+ " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Relative growth: %{y}')\n",
+ "\n",
+ "\n",
+ " fig.update_yaxes(\n",
+ " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
+ " ticks=\"outside\")\n",
+ " fig.update_xaxes(\n",
+ " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
+ " ticks=\"outside\")\n",
+ " # for candidate in fig[\"layout\"].keys():\n",
+ " # if \"yaxis\" in candidate:\n",
+ " # fig[\"layout\"][candidate].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey',tickformat=\".0%\")\n",
+ " fig.update_layout({'template':\"plotly\"})\n",
+ " fig.update_layout(font_family=\"Montserrat\")\n",
+ " fig.update_yaxes(title='',zeroline=True, zerolinewidth=0.5, zerolinecolor='grey',tickformat=\".0%\")\n",
+ " fig.update_xaxes(title='')\n",
+ " fig.for_each_annotation(lambda a: a.update(text=a.text.split(\"=\")[-1]))\n",
+ " figsuper_ppt = go.Figure(fig)\n",
+ "\n",
+ " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n",
+ " for a in figsuper_ppt['layout'][\"annotations\"]:\n",
+ " a[\"font\"][\"size\"] = 22\n",
+ "\n",
+ " s=16\n",
+ " figsuper_ppt.update_yaxes(title='',zeroline=True, zerolinewidth=2, zerolinecolor='grey',tickformat=\".0%\")\n",
+ " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n",
+ " figsuper_ppt.update_xaxes(tickfont=dict(size=s),tickangle=45)\n",
+ " figsuper_ppt.write_image(f\"plot_html/PPT_plots/trending_topics/{t}/{t}_country_year_rel_trends.png\",height=900,width=1600,scale = 4)\n",
+ "\n",
+ " fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n",
+ " y=\"pub_output_percent\",\n",
+ " x='Publication Year',\n",
+ " color=\"Eurovoc_Class\",line_group=\"Country\",markers=True,facet_col=\"Country\",facet_col_wrap=6,category_orders={\"Country\": sorted(data[\"Country\"].unique())},\n",
+ " labels={\n",
+ " record_col+\"_relative_growth\": 'Relative growth of co-publications (%)',\"Eurovoc_Class\": \"Region\"\n",
+ " },\n",
+ " title=f\"Relative changes in co-publication focus of China ({t})\", template='plotly',hover_name= \"Country\")\n",
+ " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Relative growth: %{y}')\n",
+ "\n",
+ "\n",
+ " fig.update_yaxes(\n",
+ " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
+ " ticks=\"outside\")\n",
+ " fig.update_xaxes(\n",
+ " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
+ " ticks=\"outside\")\n",
+ " # for candidate in fig[\"layout\"].keys():\n",
+ " # if \"yaxis\" in candidate:\n",
+ " # fig[\"layout\"][candidate].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey',tickformat=\".0%\")\n",
+ " fig.update_layout({'template':\"plotly\"})\n",
+ " fig.update_layout(font_family=\"Montserrat\")\n",
+ " fig.update_yaxes(title='',zeroline=True, zerolinewidth=0.5, zerolinecolor='grey',tickformat=\".0%\")\n",
+ " fig.update_xaxes(title='')\n",
+ " fig.for_each_annotation(lambda a: a.update(text=a.text.split(\"=\")[-1]))\n",
+ " figsuper_ppt = go.Figure(fig)\n",
+ "\n",
+ " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n",
+ " for a in figsuper_ppt['layout'][\"annotations\"]:\n",
+ " a[\"font\"][\"size\"] = 22\n",
+ "\n",
+ " s=16\n",
+ " figsuper_ppt.update_yaxes(title='',zeroline=True, zerolinewidth=2, zerolinecolor='grey',tickformat=\".0%\")\n",
+ " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n",
+ " figsuper_ppt.update_xaxes(tickfont=dict(size=s),tickangle=45)\n",
+ " figsuper_ppt.write_image(f\"plot_html/PPT_plots/trending_topics/{t}/{t}_country_year_rel_focus_trend.png\",height=900,width=1600,scale = 4)\n",
+ "\n",
+ "\n",
+ "\n",
+ " TOPN = 15\n",
+ " wos_univ_locations = wos_univ[wos_univ[record_col].isin(id_subset)].merge(wos_country_types, on=\"Country\")\n",
+ " wos_univ_collabs = wos_univ_locations[wos_univ_locations[\"Country_Type\"]!=\"Other\"][[record_col,\"Country\",\"Institution_harm\",\"Country_Type\",\"Eurovoc_Class\"]].drop_duplicates()\n",
+ " wos_univ_collabs[\"ISO3\"] = cc.pandas_convert(series=wos_univ_collabs[\"Country\"], to='ISO3')\n",
+ " wos_univ_collabs[\"Institution_harm_label\"] = wos_univ_collabs[\"Institution_harm\"] + \" (\"+wos_univ_collabs[\"ISO3\"]+ \")\"\n",
+ "\n",
+ "\n",
+ " wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"China\"]\n",
+ " wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]!=\"China\"]\n",
+ "\n",
+ " wos_univ_eu_strict = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"EU\"]\n",
+ "\n",
+ " data_eu = (wos_univ_eu.groupby([\"Country\",\"Institution_harm_label\",\"Country_Type\"], as_index=False)[record_col].nunique()\n",
+ " .sort_values(by=record_col,ascending=False).head(TOPN).copy()).sort_values(by=\"Country_Type\")\n",
+ "\n",
+ " data_eu_strict = (wos_univ_eu_strict.groupby([\"Country\",\"Institution_harm_label\",\"Eurovoc_Class\"], as_index=False)[record_col].nunique()\n",
+ " .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n",
+ "\n",
+ " data_ch = (wos_univ_ch.groupby([\"Country\",\"Institution_harm\",\"Country_Type\"], as_index=False)[record_col].nunique()\n",
+ " .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n",
+ "\n",
+ "\n",
+ " for data,c_scope, y_lab, col_by, pat in zip([data_eu,data_eu_strict,data_ch],\n",
+ " [\"Europe\",\"EU-28 only\",\"China\"],\n",
+ " [\"Institution_harm_label\",\"Institution_harm_label\",\"Institution_harm\"],\n",
+ " [\"Country\",\"Eurovoc_Class\",\"Country_Type\"],\n",
+ " [\"Country_Type\",None,None]):\n",
+ " fig = px.bar(data, x=record_col, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,pattern_shape=pat,text_auto=True,\n",
+ " labels={\n",
+ " record_col: 'Number of co-publications',\n",
+ " \"Institution_harm\": \"Institution\",\n",
+ " \"Institution_harm_label\": \"Institution\",\n",
+ " \"Country_Type\":\"Country type\",\n",
+ " \"Eurovoc_Class\":\"Region\"\n",
+ " },\n",
+ " title=f\"Top {TOPN} institutes within {c_scope}
({t})\", template='plotly')\n",
+ " fig.update_layout(xaxis_tickformat='d',font_family=\"Montserrat\",yaxis={'categoryorder':'total ascending'},\n",
+ " width=1000, height=1000,)\n",
+ " fig.update_traces(hovertemplate='%{x:d}')\n",
+ " fig.add_shape(\n",
+ " # Rectangle with reference to the plot\n",
+ " type=\"rect\",\n",
+ " xref=\"paper\",\n",
+ " yref=\"paper\",\n",
+ " x0=0,\n",
+ " y0=0,\n",
+ " x1=1.0,\n",
+ " y1=1.0,\n",
+ " line=dict(\n",
+ " color=\"black\",\n",
+ " width=0.5,\n",
+ " )\n",
+ " )\n",
+ " fig.update_yaxes(\n",
+ " showgrid=True,\n",
+ " ticks=\"outside\")\n",
+ " fig.update_xaxes(\n",
+ " showgrid=True,\n",
+ " ticks=\"outside\")\n",
+ " # fig.write_html(f\"plot_html/{cat}/{cat}_overall_inst_collab_bar_{c_scope}.html\",config= dict(displayModeBar = False, responsive = True))\n",
+ " # fig.write_image(f\"plot_html/overall_inst_collab_bar_{c_scope}.svg\",height=800,width=1600)\n",
+ " fig.write_image(f\"plot_html/overall_inst_collab_bar_{c_scope}.png\",height=800,width=1600)\n",
+ " figsuper_ppt = go.Figure(fig)\n",
+ " figsuper_ppt.update_traces(textposition='inside')\n",
+ " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n",
+ "\n",
+ " s=16\n",
+ " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n",
+ " figsuper_ppt.update_xaxes(tickfont=dict(size=s))\n",
+ " figsuper_ppt.write_image(f\"plot_html/PPT_plots/trending_topics/{t}/{t}_overall_inst_collab_bar_{c_scope}.png\",height=900,width=1000,scale = 4)\n",
+ "\n",
+ " wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"China\"]\n",
+ " wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]!=\"China\"]\n",
+ "\n",
+ " wos_univ_dipol = wos_univ_eu.merge(wos_univ_ch, on=record_col, suffixes=('_eu', '_ch')).merge(wos[[record_col,\"Domain_English\",\"Field_English\",\"SubField_English\"]], on =record_col)\n",
+ "\n",
+ " subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu[\"Institution_harm_label\"]))&\n",
+ " (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n",
+ "\n",
+ " fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Institution_harm_eu\",\"Domain_English\",\"Institution_harm_ch\"]])\n",
+ " # fig.show()\n",
+ " sub_df = wos_univ_dipol[subfilter]\n",
+ "\n",
+ " inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],\n",
+ " values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)\n",
+ "\n",
+ " eu_list = sub_df.groupby(['Institution_harm_label_eu'])[record_col].count().sort_values(ascending=False).index\n",
+ " ch_list = sub_df.groupby(['Institution_harm_ch'])[record_col].count().sort_values(ascending=False).index\n",
+ "\n",
+ " inst_co_occur = inst_co_occur.reindex(index = eu_list, columns=ch_list)\n",
+ "\n",
+ " mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n",
+ " data = np.where(mask,inst_co_occur,inst_co_occur)\n",
+ "\n",
+ " fig = px.imshow(data,\n",
+ " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),text_auto=True,\n",
+ " x=list(inst_co_occur.columns),\n",
+ " y=list(inst_co_occur.index), title=f\"Top {TOPN} institutes within Europe ({t})\"\n",
+ " )\n",
+ " fig.update_layout(\n",
+ " width=1000, height=1000,\n",
+ " xaxis_showgrid=False,\n",
+ " yaxis_showgrid=False,\n",
+ " yaxis_autorange='reversed',\n",
+ " template='plotly_white',font_family=\"Montserrat\",\n",
+ " coloraxis_colorbar=dict(\n",
+ " thicknessmode=\"pixels\", thickness=25,\n",
+ " ticks=\"outside\", ticksuffix=\" \",\n",
+ " dtick=20,outlinewidth=1,\n",
+ " ))\n",
+ " fig.update_xaxes(tickangle= -45)\n",
+ " fig.update_yaxes(\n",
+ " ticks=\"outside\")\n",
+ " fig.update_xaxes(\n",
+ " ticks=\"outside\")\n",
+ "\n",
+ " # fig.write_html(f\"plot_html/{cat}/{cat}_overall_inst_collab_europe.html\",config= dict(displayModeBar = False, responsive = True))\n",
+ " figsuper_ppt = go.Figure(fig)\n",
+ "\n",
+ " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n",
+ " for a in figsuper_ppt['layout'][\"annotations\"]:\n",
+ " a[\"font\"][\"size\"] = 22\n",
+ "\n",
+ " s=16\n",
+ " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n",
+ " figsuper_ppt.update_xaxes(tickfont=dict(size=s),tickangle=45)\n",
+ " figsuper_ppt.write_image(f\"plot_html/PPT_plots/trending_topics/{t}/{t}_overall_inst_collab_europe.png\",height=900,width=1600,scale = 4)\n",
+ "\n",
+ "\n",
+ "\n",
+ " subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu_strict[\"Institution_harm_label\"]))&\n",
+ " (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n",
+ "\n",
+ " fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Institution_harm_eu\",\"Domain_English\",\"Institution_harm_ch\"]])\n",
+ " # fig.show()\n",
+ " sub_df =wos_univ_dipol[subfilter]\n",
+ "\n",
+ " inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],\n",
+ " values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)\n",
+ "\n",
+ " eu_list = sub_df.groupby(['Institution_harm_label_eu'])[record_col].count().sort_values(ascending=False).index\n",
+ " ch_list = sub_df.groupby(['Institution_harm_ch'])[record_col].count().sort_values(ascending=False).index\n",
+ "\n",
+ " inst_co_occur = inst_co_occur.reindex(index = eu_list, columns=ch_list)\n",
+ "\n",
+ " mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n",
+ " data = np.where(mask,inst_co_occur,inst_co_occur)\n",
+ " fig = px.imshow(data,\n",
+ " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),text_auto=True,\n",
+ " x=list(inst_co_occur.columns),\n",
+ " y=list(inst_co_occur.index), title=f\"Top {TOPN} institutes within EU-28 ({t})\"\n",
+ " )\n",
+ " fig.update_layout(\n",
+ " width=1000, height=1000,\n",
+ " xaxis_showgrid=False,\n",
+ " yaxis_showgrid=False,\n",
+ " yaxis_autorange='reversed',\n",
+ " template='plotly_white',font_family=\"Montserrat\",\n",
+ " coloraxis_colorbar=dict(\n",
+ " thicknessmode=\"pixels\", thickness=25,\n",
+ " ticks=\"outside\", ticksuffix=\" \",\n",
+ " dtick=20,outlinewidth=1,\n",
+ " ))\n",
+ " fig.update_xaxes(tickangle= -45)\n",
+ " fig.update_yaxes(\n",
+ " ticks=\"outside\")\n",
+ " fig.update_xaxes(\n",
+ " ticks=\"outside\")\n",
+ "\n",
+ " # fig.show(config= dict(displayModeBar = False))\n",
+ " # fig.write_html(f\"plot_html/{cat}/{cat}_overall_inst_collab_eu28.html\",config= dict(displayModeBar = False, responsive = True))\n",
+ "\n",
+ " figsuper_ppt = go.Figure(fig)\n",
+ "\n",
+ " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n",
+ " for a in figsuper_ppt['layout'][\"annotations\"]:\n",
+ " a[\"font\"][\"size\"] = 22\n",
+ "\n",
+ " s=16\n",
+ " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n",
+ " figsuper_ppt.update_xaxes(tickfont=dict(size=s),tickangle=45)\n",
+ " figsuper_ppt.write_image(f\"plot_html/PPT_plots/trending_topics/{t}/{t}_overall_inst_collab_eu28.png\",height=900,width=1600,scale = 4)\n"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 97,
+ "outputs": [],
+ "source": [
+ "# Adding emphasized countries\n",
+ "os.makedirs(f\"plot_html/PPT_plots/highlight_countries\", exist_ok=True)\n",
+ "# General: Croatia, Cyprus, Luxembourg\n",
+ "for c in [\"Croatia\",\"Cyprus\",\"Luxembourg\"]:\n",
+ " os.makedirs(f\"plot_html/PPT_plots/highlight_countries/general_{c}\", exist_ok=True)\n",
+ "\n",
+ " t = \"General\"\n",
+ "\n",
+ " #sunburst distribution\n",
+ " groups = ['Domain_English',\"Field_English\",'SubField_English']\n",
+ "\n",
+ " id_subset = wos_country[wos_country[\"Country\"]==c][record_col].unique()\n",
+ "\n",
+ " data = (wos[wos[record_col].isin(id_subset)]\n",
+ " .groupby(groups, as_index=False)[record_col]\n",
+ " .nunique()\n",
+ " .sort_values(ascending=False, by=record_col))\n",
+ " data[\"percent\"] = data[record_col]/data[record_col].sum()*100\n",
+ " data[groups] = data[groups].applymap(replace_nth)\n",
+ " fig = px.sunburst(data, path=groups, values=record_col,\n",
+ " color='Domain_English', template='plotly')\n",
+ " fig.update_traces(textinfo=\"label+value+percent root\")\n",
+ " fig.update_traces(hovertemplate='%{id}
%{value}')\n",
+ " metrix_distr = go.Figure(fig)\n",
+ " metrix_distr.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n",
+ " figsuper_ppt = go.Figure(metrix_distr)\n",
+ "\n",
+ " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n",
+ " for a in figsuper_ppt['layout'][\"annotations\"]:\n",
+ " a[\"font\"][\"size\"] = 22\n",
+ " figsuper_ppt.write_image(f\"plot_html/PPT_plots/highlight_countries/general_{c}/sunburst.png\",height=900,width=900,scale = 4)\n",
+ "\n",
+ " TOPN = 10\n",
+ " wos_univ_locations = wos_univ[wos_univ[record_col].isin(id_subset)].merge(wos_country_types, on=\"Country\")\n",
+ " wos_univ_collabs = wos_univ_locations[wos_univ_locations[\"Country_Type\"]!=\"Other\"][[record_col,\"Country\",\"Institution_harm\",\"Country_Type\",\"Eurovoc_Class\"]].drop_duplicates()\n",
+ " wos_univ_collabs[\"ISO3\"] = cc.pandas_convert(series=wos_univ_collabs[\"Country\"], to='ISO3')\n",
+ " wos_univ_collabs[\"Institution_harm_label\"] = wos_univ_collabs[\"Institution_harm\"] + \" (\"+wos_univ_collabs[\"ISO3\"]+ \")\"\n",
+ "\n",
+ "\n",
+ " wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"China\"]\n",
+ " wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]!=\"China\"]\n",
+ "\n",
+ " wos_univ_eu_strict = wos_univ_collabs[wos_univ_collabs[\"Country\"]==c]\n",
+ "\n",
+ " # data_eu = (wos_univ_eu.groupby([\"Country\",\"Institution_harm_label\",\"Country_Type\"], as_index=False)[record_col].nunique()\n",
+ " # .sort_values(by=record_col,ascending=False).head(TOPN).copy()).sort_values(by=\"Country_Type\")\n",
+ "\n",
+ " data_eu_strict = (wos_univ_eu_strict.groupby([\"Country\",\"Institution_harm\",\"Eurovoc_Class\"], as_index=False)[record_col].nunique()\n",
+ " .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n",
+ "\n",
+ " data_ch = (wos_univ_ch.groupby([\"Country\",\"Institution_harm\",\"Country_Type\"], as_index=False)[record_col].nunique()\n",
+ " .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n",
+ "\n",
+ "\n",
+ " for data,c_scope, y_lab, in zip(\n",
+ " [data_eu_strict,data_ch],\n",
+ " [c,\"China\"],\n",
+ " [\"Institution_harm\",\"Institution_harm\"]):\n",
+ " fig = px.bar(data, x=record_col, y=y_lab, color_discrete_map=color_discrete_map,text_auto=True,\n",
+ " labels={\n",
+ " record_col: 'Number of co-publications',\n",
+ " \"Institution_harm\": \"Institution\",\n",
+ " \"Institution_harm_label\": \"Institution\",\n",
+ " \"Country_Type\":\"Country type\",\n",
+ " \"Eurovoc_Class\":\"Region\"\n",
+ " },\n",
+ " title=f\"Top {TOPN} institutes ({c_scope})\", template='plotly')\n",
+ " fig.update_layout(xaxis_tickformat='d',font_family=\"Montserrat\",yaxis={'categoryorder':'total ascending'},\n",
+ " width=1000, height=1000,)\n",
+ " fig.update_traces(hovertemplate='%{x:d}')\n",
+ " fig.add_shape(\n",
+ " # Rectangle with reference to the plot\n",
+ " type=\"rect\",\n",
+ " xref=\"paper\",\n",
+ " yref=\"paper\",\n",
+ " x0=0,\n",
+ " y0=0,\n",
+ " x1=1.0,\n",
+ " y1=1.0,\n",
+ " line=dict(\n",
+ " color=\"black\",\n",
+ " width=0.5,\n",
+ " )\n",
+ " )\n",
+ " fig.update_yaxes(\n",
+ " showgrid=True,\n",
+ " ticks=\"outside\")\n",
+ " fig.update_xaxes(\n",
+ " showgrid=True,\n",
+ " ticks=\"outside\")\n",
+ " figsuper_ppt = go.Figure(fig)\n",
+ " figsuper_ppt.update_traces(textposition='inside')\n",
+ " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n",
+ "\n",
+ " s=16\n",
+ " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n",
+ " figsuper_ppt.update_xaxes(tickfont=dict(size=s))\n",
+ " figsuper_ppt.write_image(f\"plot_html/PPT_plots/highlight_countries/general_{c}/inst_bar_{c_scope}.png\",height=900,width=900,scale = 4)\n",
+ "\n",
+ " wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country\"]==\"China\"]\n",
+ " wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country\"]==c]\n",
+ "\n",
+ " wos_univ_dipol = wos_univ_eu.merge(wos_univ_ch, on=record_col, suffixes=('_eu', '_ch')).merge(wos[[record_col,\"Domain_English\",\"Field_English\",\"SubField_English\"]], on =record_col)\n",
+ "\n",
+ " subfilter = ((wos_univ_dipol[\"Institution_harm_eu\"].isin(data_eu_strict[\"Institution_harm\"]))&\n",
+ " (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n",
+ "\n",
+ " fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Institution_harm_eu\",\"Domain_English\",\"Institution_harm_ch\"]])\n",
+ " # fig.show()\n",
+ " sub_df = wos_univ_dipol[subfilter]\n",
+ "\n",
+ " inst_co_occur = pd.crosstab(sub_df['Institution_harm_eu'], sub_df['Institution_harm_ch'],\n",
+ " values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)\n",
+ "\n",
+ " eu_list = sub_df.groupby(['Institution_harm_eu'])[record_col].count().sort_values(ascending=False).index\n",
+ " ch_list = sub_df.groupby(['Institution_harm_ch'])[record_col].count().sort_values(ascending=False).index\n",
+ "\n",
+ " inst_co_occur = inst_co_occur.reindex(index = eu_list, columns=ch_list)\n",
+ "\n",
+ " mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n",
+ " data = np.where(mask,inst_co_occur,inst_co_occur)\n",
+ "\n",
+ " fig = px.imshow(data,\n",
+ " labels=dict(x=\"Institute (CH)\", y=f\"Institute ({c})\", color=\"Co-publication\"),text_auto=True,\n",
+ " x=list(inst_co_occur.columns),\n",
+ " y=list(inst_co_occur.index), title=f\"Top {TOPN} institutes ({t})\"\n",
+ " )\n",
+ " fig.update_layout(\n",
+ " width=1000, height=1000,\n",
+ " xaxis_showgrid=False,\n",
+ " yaxis_showgrid=False,\n",
+ " yaxis_autorange='reversed',\n",
+ " template='plotly_white',font_family=\"Montserrat\",\n",
+ " coloraxis_colorbar=dict(\n",
+ " thicknessmode=\"pixels\", thickness=25,\n",
+ " ticks=\"outside\", ticksuffix=\" \",\n",
+ " dtick=20,outlinewidth=1,\n",
+ " ))\n",
+ " fig.update_xaxes(tickangle= -45)\n",
+ " fig.update_traces(showlegend=False)\n",
+ " fig.update_traces(showscale=False)\n",
+ " fig.update_layout(coloraxis_showscale=False)\n",
+ " fig.update_yaxes(\n",
+ " ticks=\"outside\")\n",
+ " fig.update_xaxes(\n",
+ " ticks=\"outside\")\n",
+ "\n",
+ " figsuper_ppt = go.Figure(fig)\n",
+ "\n",
+ " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n",
+ " for a in figsuper_ppt['layout'][\"annotations\"]:\n",
+ " a[\"font\"][\"size\"] = 22\n",
+ "\n",
+ " s=16\n",
+ " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n",
+ " figsuper_ppt.update_xaxes(tickfont=dict(size=s),tickangle=45)\n",
+ " figsuper_ppt.write_image(f\"plot_html/PPT_plots/highlight_countries/general_{c}/inst_collab.png\",height=900,width=900,scale = 4)\n",
+ "\n",
+ "# Applied Sciences: Hungary, Poland\n",
+ "\n",
+ "# Natural Sciences: Ireland\n",
+ "\n",
+ "# Health Sciences: Austria, Czeck Republic, Ireland, Poland, Portugal\n",
+ "\n",
+ "#Economic Social Sciences :France"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "outputs": [
+ {
+ "data": {
+ "text/plain": " Country Institution_harm Eurovoc_Class UT (Unique WOS ID)\n47 Croatia Univ Zagreb Eastern Europe 70\n19 Croatia Inst Rudjer Boskovic Eastern Europe 59\n45 Croatia Univ Split Eastern Europe 58\n42 Croatia Univ North Eastern Europe 16\n33 Croatia Tech Univ Split Eastern Europe 12",
+ "text/html": "\n\n
\n \n \n | \n Country | \n Institution_harm | \n Eurovoc_Class | \n UT (Unique WOS ID) | \n
\n \n \n \n 47 | \n Croatia | \n Univ Zagreb | \n Eastern Europe | \n 70 | \n
\n \n 19 | \n Croatia | \n Inst Rudjer Boskovic | \n Eastern Europe | \n 59 | \n
\n \n 45 | \n Croatia | \n Univ Split | \n Eastern Europe | \n 58 | \n
\n \n 42 | \n Croatia | \n Univ North | \n Eastern Europe | \n 16 | \n
\n \n 33 | \n Croatia | \n Tech Univ Split | \n Eastern Europe | \n 12 | \n
\n \n
\n
"
+ },
+ "execution_count": 91,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_eu_strict.head()"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "outputs": [],
+ "source": [
+ "# Adding emphasized institutes\n",
+ "\n",
+ "# General:\n",
+ "# Polish academy of Science\n",
+ "\n",
+ "# University of Groningen/Politecnico Milano – prominent co-publisher but not necessarily with the largest chinese partners\n",
+ "#\n",
+ "# Aalto University – Xidian University\n",
+ "#\n",
+ "# Technical University of Munich – Tongji University\n",
+ "#\n",
+ "# Aalborg University – University of Electric Science & Technology\n",
+ "\n",
+ "\n",
+ "# Natural Sciences: Charles Univ of Prague\n",
+ "\n",
+ "# Health Sciences Karolinska Institute\n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "outputs": [],
+ "source": [
+ "def print_hello_world():\n",
+ " # just a general hello world print"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "outputs": [],
+ "source": [
+ "class Test"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
}
],
"metadata": {