diff --git a/WOS/wos_analysis/wos_analyses.html b/WOS/wos_analysis/wos_analyses.html new file mode 100644 index 0000000..3c9f369 --- /dev/null +++ b/WOS/wos_analysis/wos_analyses.html @@ -0,0 +1,21072 @@ + + +
+ + +import pandas as pd
+import janitor
+import matplotlib.pyplot as plt
+import seaborn as sns
+from matplotlib.ticker import MaxNLocator
+import math
+import plotly.express as px
+%matplotlib inline
+
sns.set_theme(context='notebook', style='ticks', palette='colorblind', font='sans-serif', font_scale=1, color_codes=True, rc=None)
+sns.palplot(sns.color_palette())
+
outdir="wos_processed_data"
+
+wos = pd.read_excel(f"../{outdir}/wos_processed.xlsx")
+wos_univ = pd.read_excel(f"../{outdir}/wos_institution_locations_harmonized.xlsx")
+
wos_country = pd.read_excel(f"../{outdir}/wos_countries.xlsx")
+wos_country_types = pd.read_excel(f"../{outdir}/wos_country_types.xlsx")
+
wos_country_types
+
+ | Country | +Country_Type | +
---|---|---|
0 | +Belgium | +EU | +
1 | +China | +China | +
2 | +Luxembourg | +EU | +
3 | +Netherlands | +EU | +
4 | +Norway | +Non-EU associate | +
5 | +United Kingdom | +Non-EU associate | +
6 | +France | +EU | +
7 | +Sweden | +EU | +
8 | +Italy | +EU | +
9 | +Denmark | +EU | +
10 | +Germany | +EU | +
11 | +Slovenia | +EU | +
12 | +Estonia | +EU | +
13 | +Finland | +EU | +
14 | +Bulgaria | +EU | +
15 | +Slovakia | +EU | +
16 | +Spain | +EU | +
17 | +Poland | +EU | +
18 | +Czech Republic | +EU | +
19 | +Greece | +EU | +
20 | +Malta | +EU | +
21 | +Austria | +EU | +
22 | +Switzerland | +Non-EU associate | +
23 | +Ireland | +EU | +
24 | +Portugal | +EU | +
25 | +Romania | +EU | +
26 | +Hungary | +EU | +
27 | +Cyprus | +EU | +
28 | +Croatia | +EU | +
29 | +Lithuania | +EU | +
30 | +Latvia | +EU | +
# len(wos),len(wos_univ_locations)
+
# wos_addresses = pd.read_excel(f"/{outdir}/wos_addresses.xlsx")
+
+# wos_affiliations = pd.read_excel(f"/{outdir}/wos_affiliations.xlsx")
+
+# wos_author_locations = pd.read_excel(f"/{outdir}/wos_author_locations.xlsx")
+
+# wos_univ_locations = pd.read_excel(f"/{outdir}/wos_univ_locations.xlsx")
+
record_col = "UT (Unique WOS ID)"
+
# def nth_repl_all(s, sub="", repl="<br>", nth=2):
+# find = s.find(sub)
+# # loop util we find no match
+# i = 1
+# while find != -1:
+# # if i is equal to nth we found nth matches so replace
+# if i == nth:
+# s = s[:find]+repl+s[find + len(sub):]
+# i = 0
+# # find + len(sub) + 1 means we start after the last match
+# find = s.find(sub, find + len(sub) + 1)
+# i += 1
+# return s.replace("<br>&","&<br")
+
+def replace_nth(s, sub=" ", repl="<br>", n=2):
+ chunks = s.split(sub)
+ size = len(chunks)
+ rows = size // n + (0 if size % n == 0 else 1)
+ return (repl.join([
+ sub.join([chunks[i * n + j] for j in range(n if (i + 1) * n < size else size - i * n)])
+ for i in range(rows)
+ ])).replace("<br>&"," &<br>")
+
+
+groups = ['Domain_English',"Field_English",'SubField_English']
+data = wos.groupby(groups, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)
+data["percent"] = data[record_col]/data[record_col].sum()
+
+data[groups] = data[groups].applymap(replace_nth)
+data
+
+ | Domain_English | +Field_English | +SubField_English | +UT (Unique WOS ID) | +percent | +
---|---|---|---|---|---|
37 | +Applied Sciences | +Information &<br>Communication Technologies | +Artificial Intelligence &<br> Image<br>Processing | +7915 | +0.171841 | +
44 | +Applied Sciences | +Information &<br>Communication Technologies | +Networking &<br>Telecommunications | +5360 | +0.116370 | +
32 | +Applied Sciences | +Engineering | +Geological &<br>Geomatics Engineering | +2576 | +0.055927 | +
33 | +Applied Sciences | +Engineering | +Industrial Engineering &<br> Automation | +2316 | +0.050282 | +
15 | +Applied Sciences | +Enabling &<br>Strategic Technologies | +Energy | +1965 | +0.042662 | +
... | +... | +... | +... | +... | +... | +
11 | +Applied Sciences | +Economics &<br>Business | +Business &<br>Management | +1 | +0.000022 | +
46 | +Applied Sciences | +Social Sciences | +Anthropology | +1 | +0.000022 | +
54 | +Arts &<br>Humanities | +Philosophy &<br>Theology | +Philosophy | +1 | +0.000022 | +
52 | +Arts &<br>Humanities | +Historical Studies | +History of<br>Social Sciences | +1 | +0.000022 | +
129 | +Health Sciences | +Psychology &<br>Cognitive Sciences | +General Psychology &<br> Cognitive<br>Sciences | +1 | +0.000022 | +
175 rows × 5 columns
+fig = px.sunburst(data, path=groups, values=record_col,
+ color='Domain_English',title="Distribution of topics<br>(METRIX classification)", template='plotly')
+fig.update_traces(hovertemplate='%{id}<br>%{value:d}')
+fig.show(config= dict(displayModeBar = False))
+
group = 'Domain_English'
+data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)
+data
+
+ | Domain_English | +UT (Unique WOS ID) | +
---|---|---|
0 | +Applied Sciences | +29985 | +
5 | +Natural Sciences | +8457 | +
3 | +Health Sciences | +5341 | +
2 | +Economic & Social Sciences | +1360 | +
4 | +Multidisciplinary | +847 | +
1 | +Arts & Humanities | +70 | +
+
g = sns.barplot(data, x=record_col, y=group)
+g.set_xlim(0,35000)
+g.set_ylabel(None)
+g.set_xlabel("Number of co-publications")
+g.set_title("Distribution of Domains")
+for i in g.containers:
+ g.bar_label(i,fontsize=10)
+
fig = px.bar(data, x=record_col, y=group, color=group,
+ labels={
+ record_col: 'Number of co-publications',
+ group: "",
+ },
+ title="Distribution of Domains", template='plotly')
+fig.update_layout(showlegend=False, xaxis_tickformat='d',font_family="Montserrat")
+fig.update_traces(hovertemplate='%{x:d}')
+fig.add_shape(
+ # Rectangle with reference to the plot
+ type="rect",
+ xref="paper",
+ yref="paper",
+ x0=0,
+ y0=0,
+ x1=1.0,
+ y1=1.0,
+ line=dict(
+ color="black",
+ width=0.5,
+ )
+ )
+fig.update_yaxes(
+ showgrid=True,
+ ticks="outside")
+fig.update_xaxes(
+ showgrid=True,
+ ticks="outside")
+fig.show(config= dict(displayModeBar = False))
+
# # define a function to divide each row's 'Count' by the value of the first year
+# def divide_by_first_year(group):
+# group['relative_growth'] = group[record_col] / group.loc[group['Publication Year'] == group['Publication Year'].min(), record_col].values[0]
+# return group
+#
+#
+#
+# data = (wos.groupby(group)[record_col].nunique()
+# .unstack(fill_value=0).stack()
+# .reset_index()
+# .rename(columns={0:record_col})
+# .sort_values(ascending=False, by=group+[record_col]))
+#
+# # group by 'Topic'
+# grouped = data.groupby('Domain_English')
+# # apply the function to each group
+# data = grouped.apply(divide_by_first_year).reset_index(drop=True)
+# data['relative_growth'] = data['relative_growth']*100
+
+
group = ['Publication Year','Domain_English']
+data = (wos.groupby(['Publication Year','Domain_English'])[record_col].nunique(dropna=False).unstack()
+ .fillna(0)
+ .stack()
+ .reset_index()
+ .rename(columns={0:record_col}))
+data = data.merge(data[data[record_col]>0].sort_values(by=["Publication Year"], ascending=True).drop_duplicates(subset='Domain_English'),
+ on='Domain_English', suffixes=[None,"_relative_growth"])
+data[record_col+"_relative_growth"] = (data[record_col]-data[record_col+"_relative_growth"])/data[record_col+"_relative_growth"]*100
+data
+
+ | Publication Year | +Domain_English | +UT (Unique WOS ID) | +Publication Year_relative_growth | +UT (Unique WOS ID)_relative_growth | +
---|---|---|---|---|---|
0 | +2011 | +Applied Sciences | +490.0 | +2011 | +0.000000 | +
1 | +2012 | +Applied Sciences | +593.0 | +2011 | +21.020408 | +
2 | +2013 | +Applied Sciences | +738.0 | +2011 | +50.612245 | +
3 | +2014 | +Applied Sciences | +1031.0 | +2011 | +110.408163 | +
4 | +2015 | +Applied Sciences | +1201.0 | +2011 | +145.102041 | +
... | +... | +... | +... | +... | +... | +
67 | +2018 | +Natural Sciences | +753.0 | +2011 | +316.022099 | +
68 | +2019 | +Natural Sciences | +999.0 | +2011 | +451.933702 | +
69 | +2020 | +Natural Sciences | +1232.0 | +2011 | +580.662983 | +
70 | +2021 | +Natural Sciences | +1403.0 | +2011 | +675.138122 | +
71 | +2022 | +Natural Sciences | +1665.0 | +2011 | +819.889503 | +
72 rows × 5 columns
+g=sns.lineplot(data.sort_values(ascending=True, by=group[-1]),y=record_col,x=group[0], hue=group[-1], marker="o")
+g.set(xticks=list(range(2012,2022+1,2)))
+g.legend(title=None)
+g.set_xlabel(None)
+g.set_ylabel(None)
+g.set_title("Yearly output of co-publications")
+
Text(0.5, 1.0, 'Yearly output of co-publications')+
fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col,x=group[0], color=group[-1], markers=True, labels={
+ record_col: 'Number of co-publications',
+ group[-1]: "Domain",
+ },
+ title="Yearly output of co-publications", template='plotly')
+fig.update_traces(hovertemplate='%{y:d}')
+fig.update_layout(hovermode='x unified')
+fig.add_shape(
+ # Rectangle with reference to the plot
+ type="rect",
+ xref="paper",
+ yref="paper",
+ x0=0,
+ y0=0,
+ x1=1.0,
+ y1=1.0,
+ line=dict(
+ color="black",
+ width=0.5,
+ )
+ )
+fig.update_yaxes(
+ showgrid=True,
+ ticks="outside")
+fig.update_xaxes(
+ showgrid=True,
+ ticks="outside")
+fig.show(config= dict(displayModeBar = False))
+
fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col+"_relative_growth",x=group[0], color=group[-1], markers=True, labels={
+ record_col+"_relative_growth": 'Rel. growth<br>in co-publications (%)',
+ group[-1]: "Domain",
+ },
+ title="Relative growth in the output of co-publications", template='plotly')
+fig.update_traces(hovertemplate='%{y:.2f}%')
+
+fig.update_layout(hovermode='x unified',yaxis_tickformat='d',font_family="Montserrat")
+fig.add_shape(
+ # Rectangle with reference to the plot
+ type="rect",
+ xref="paper",
+ yref="paper",
+ x0=0,
+ y0=0,
+ x1=1.0,
+ y1=1.0,
+ line=dict(
+ color="black",
+ width=0.5,
+ )
+ )
+fig.update_yaxes(
+ showgrid=True,
+ ticks="outside")
+fig.update_xaxes(
+ showgrid=True,
+ ticks="outside")
+fig.show(config= dict(displayModeBar = False))
+
+
pivot_data = pd.pivot_table(data, values=record_col, index=['Domain_English'],
+
+ columns=['Publication Year'], fill_value=0)
+pivot_data
+
Publication Year | +2011 | +2012 | +2013 | +2014 | +2015 | +2016 | +2017 | +2018 | +2019 | +2020 | +2021 | +2022 | +
---|---|---|---|---|---|---|---|---|---|---|---|---|
Domain_English | ++ | + | + | + | + | + | + | + | + | + | + | + |
Applied Sciences | +490 | +593 | +738 | +1031 | +1201 | +1535 | +1920 | +2808 | +3729 | +4446 | +5295 | +6199 | +
Arts & Humanities | +0 | +0 | +0 | +4 | +1 | +3 | +7 | +4 | +11 | +11 | +16 | +13 | +
Economic & Social Sciences | +20 | +22 | +29 | +28 | +34 | +40 | +84 | +105 | +160 | +211 | +252 | +375 | +
Health Sciences | +116 | +120 | +155 | +184 | +216 | +243 | +321 | +403 | +611 | +755 | +1035 | +1182 | +
Multidisciplinary | +15 | +21 | +43 | +52 | +57 | +64 | +75 | +76 | +83 | +97 | +115 | +149 | +
Natural Sciences | +181 | +223 | +298 | +318 | +380 | +437 | +568 | +753 | +999 | +1232 | +1403 | +1665 | +
f, ax = plt.subplots(figsize=(9, 6))
+g = sns.heatmap(pivot_data, annot=True, fmt="d", linewidths=.5, ax=ax)
+g.set(xlabel="", ylabel="")
+
[Text(0.5, 33.249999999999986, ''), Text(79.74999999999999, 0.5, '')]+
import numpy as np
+percent_pivot = pd.crosstab(data['Domain_English'], data['Publication Year'], values=data[record_col], aggfunc=np.sum, normalize='columns')*100
+percent_pivot
+
Publication Year | +2011 | +2012 | +2013 | +2014 | +2015 | +2016 | +2017 | +2018 | +2019 | +2020 | +2021 | +2022 | +
---|---|---|---|---|---|---|---|---|---|---|---|---|
Domain_English | ++ | + | + | + | + | + | + | + | + | + | + | + |
Applied Sciences | +59.610706 | +60.572012 | +58.432304 | +63.760049 | +63.578613 | +66.106804 | +64.537815 | +67.678959 | +66.672626 | +65.847156 | +65.241498 | +64.687467 | +
Arts & Humanities | +0.000000 | +0.000000 | +0.000000 | +0.247372 | +0.052938 | +0.129199 | +0.235294 | +0.096409 | +0.196674 | +0.162915 | +0.197141 | +0.135657 | +
Economic & Social Sciences | +2.433090 | +2.247191 | +2.296120 | +1.731602 | +1.799894 | +1.722653 | +2.823529 | +2.530730 | +2.860719 | +3.125000 | +3.104978 | +3.913180 | +
Health Sciences | +14.111922 | +12.257406 | +12.272367 | +11.379097 | +11.434621 | +10.465116 | +10.789916 | +9.713184 | +10.924370 | +11.181872 | +12.752587 | +12.334342 | +
Multidisciplinary | +1.824818 | +2.145046 | +3.404592 | +3.215832 | +3.017470 | +2.756245 | +2.521008 | +1.831767 | +1.483998 | +1.436611 | +1.416954 | +1.554837 | +
Natural Sciences | +22.019465 | +22.778345 | +23.594616 | +19.666048 | +20.116464 | +18.819983 | +19.092437 | +18.148952 | +17.861613 | +18.246445 | +17.286841 | +17.374517 | +
f, ax = plt.subplots(figsize=(15, 6))
+g = sns.heatmap(percent_pivot, annot=True, fmt='.2f', linewidths=.5, ax=ax, cbar=False)
+for t in ax.texts: t.set_text(t.get_text() + " %")
+g.set(xlabel="", ylabel="")
+
[Text(0.5, 33.249999999999986, ''), Text(154.75, 0.5, '')]+
percent_pivot.T.plot(kind='bar',
+ stacked=True,
+ figsize=(10, 6))
+
<Axes: xlabel='Publication Year'>+
percent_pivot.T.plot(kind='bar',
+ stacked=True,
+ figsize=(15, 8))
+
+plt.legend(loc="lower left", ncol=2)
+# plt.ylabel("Release Year")
+# plt.xlabel("Proportion")
+
+
+for n, x in enumerate([*pivot_data.T.index.values]):
+ for (proportion, count, y_loc) in zip(percent_pivot.T.loc[x],
+ pivot_data.T.loc[x],
+ percent_pivot.T.loc[x].cumsum()):
+
+ plt.text(y=(y_loc - proportion) + (proportion / 2),
+ x=n - 0.11,
+ s=f'{count}',# ({np.round(proportion, 1)}%)',
+ color="black",
+ fontsize=8,
+ fontweight="bold")
+
+plt.show()
+
group = ['Publication Year',"Domain_English",'Field_English']
+data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])
+data
+
+ | Publication Year | +Domain_English | +Field_English | +UT (Unique WOS ID) | +
---|---|---|---|---|
233 | +2022 | +Natural Sciences | +Physics & Astronomy | +596 | +
232 | +2022 | +Natural Sciences | +Mathematics & Statistics | +228 | +
231 | +2022 | +Natural Sciences | +Earth & Environmental Sciences | +409 | +
230 | +2022 | +Natural Sciences | +Chemistry | +251 | +
229 | +2022 | +Natural Sciences | +Biology | +181 | +
... | +... | +... | +... | +... | +
4 | +2011 | +Applied Sciences | +Information & Communication Technologies | +256 | +
3 | +2011 | +Applied Sciences | +Engineering | +166 | +
2 | +2011 | +Applied Sciences | +Enabling & Strategic Technologies | +53 | +
1 | +2011 | +Applied Sciences | +Built Environment & Design | +6 | +
0 | +2011 | +Applied Sciences | +Agriculture, Fisheries & Forestry | +9 | +
234 rows × 4 columns
+len(data[group[-2]].unique())
+
6+
+
data_complete = pd.DataFrame()
+
+for cat in sorted(data[group[-2]].unique()):
+ #data segment
+ sub_data = data[data[group[-2]]==cat]
+ sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
+ ,group[-1],fill_value=0)
+ data_complete = pd.concat([data_complete,sub_data], ignore_index=True)
+ #plot
+ g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),
+ y=record_col,x=group[0], hue=group[-1], marker="o")
+ g.set(xticks=list(range(2012,2022+1,2)))
+ g.legend(title=None)
+ g.set_title(cat)
+ g.yaxis.set_major_locator(MaxNLocator(integer=True))
+ plt.show()
+
data_complete = pd.DataFrame()
+
+# Creating subplot axes
+fig, axes = plt.subplots(nrows=3,ncols=2,figsize=(15, 15))
+
+for cat,ax in zip(sorted(data[group[-2]].unique()),axes.flatten()):
+ #data segment
+ sub_data = data[data[group[-2]]==cat]
+ sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
+ ,group[-1],fill_value=0)
+ data_complete = pd.concat([data_complete,sub_data], ignore_index=True)
+ #plot
+ g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),
+ y=record_col,x=group[0], hue=group[-1], marker="o", ax=ax)
+ g.set(xticks=list(range(2012,2022+1,2)))
+ g.legend(title=None)
+ g.set_title(cat)
+ g.set_xlabel(None)
+ g.set_ylabel(None)
+ g.yaxis.set_major_locator(MaxNLocator(integer=True))
+fig.suptitle("Number of co-publications in domains and respective fields", y=0.92)
+plt.show()
+
group = ['Publication Year',"Domain_English",'Field_English',"SubField_English"]
+data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])
+data
+
+ | Publication Year | +Domain_English | +Field_English | +SubField_English | +UT (Unique WOS ID) | +
---|---|---|---|---|---|
1598 | +2022 | +Natural Sciences | +Physics & Astronomy | +Optics | +134 | +
1597 | +2022 | +Natural Sciences | +Physics & Astronomy | +Nuclear & Particle Physics | +65 | +
1596 | +2022 | +Natural Sciences | +Physics & Astronomy | +Mathematical Physics | +10 | +
1595 | +2022 | +Natural Sciences | +Physics & Astronomy | +General Physics | +31 | +
1594 | +2022 | +Natural Sciences | +Physics & Astronomy | +Fluids & Plasmas | +79 | +
... | +... | +... | +... | +... | +... | +
4 | +2011 | +Applied Sciences | +Agriculture, Fisheries & Forestry | +Forestry | +1 | +
3 | +2011 | +Applied Sciences | +Agriculture, Fisheries & Forestry | +Food Science | +1 | +
2 | +2011 | +Applied Sciences | +Agriculture, Fisheries & Forestry | +Fisheries | +2 | +
1 | +2011 | +Applied Sciences | +Agriculture, Fisheries & Forestry | +Dairy & Animal Science | +2 | +
0 | +2011 | +Applied Sciences | +Agriculture, Fisheries & Forestry | +Agronomy & Agriculture | +3 | +
1599 rows × 5 columns
+for cat in sorted(data[group[-2]].unique()):
+ sub_data = data[data[group[-2]]==cat]
+ sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
+ ,group[-1],fill_value=0)
+ g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),y=record_col,x=group[0],
+ hue=group[-1], marker="o", errorbar=None)
+ g.set(xticks=list(range(2012,2022+1,2)))
+ g.legend(title=None,bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., ncols=math.ceil(len(g.legend_.texts)/12))
+ g.set_title(f'Number or co-publications in {cat}')
+ g.set_ylabel(None)
+ plt.show()
+
len(sorted(data[group[-2]].unique()))
+
20+
from matplotlib.ticker import FuncFormatter
+import math
+def orderOfMagnitude(number):
+ return math.floor(math.log(number, 10))
+
+def roundToNearest(number):
+ order = orderOfMagnitude(number)
+ # if order!=0:
+ # order+=1
+ near = math.ceil(number/10**order)*10**order
+ return near
+
wos_univ_locations = wos_univ.merge(wos_country_types, on="Country")
+wos_univ_locations.sample(100)
+
+ | UT (Unique WOS ID) | +Institution | +Country | +Institution_harm | +merge_iter | +Country_Type | +
---|---|---|---|---|---|---|
41191 | +WOS:000538161600016 | +Anhui Univ | +China | +Anhui Univ | +0 | +China | +
175692 | +WOS:000709411500003 | +Univ Porto | +Portugal | +Univ Porto | +0 | +EU | +
75198 | +WOS:000831217100027 | +Zhejiang Univ | +China | +Zhejiang Univ | +0 | +China | +
48614 | +WOS:000597938400003 | +Shanghai Jiao Tong Univ | +China | +Shanghai Jiao Tong Univ | +0 | +China | +
133670 | +WOS:000411824101159 | +Univ Pisa | +Italy | +Univ Pisa | +0 | +EU | +
... | +... | +... | +... | +... | +... | +... | +
2892 | +WOS:000293708200019 | +Natl Univ Def Technol | +China | +Natl Univ Def Technol | +0 | +China | +
125259 | +WOS:000663324800010 | +INRAE | +France | +INRAE | +0 | +EU | +
55780 | +WOS:000659952900011 | +Huazhong Univ Sci & Technol | +China | +Huazhong Univ Sci & Technol | +0 | +China | +
138600 | +WOS:000744399000001 | +Brignone Clin | +Italy | +Brignone Clin | +0 | +EU | +
31040 | +WOS:000471758500010 | +Chinese Acad Sci | +China | +Chinese Acad Sci | +0 | +China | +
100 rows × 6 columns
+wos_collabs = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country"]].drop_duplicates()
+
collab_desc = wos_collabs[wos_collabs["Country"]!="China"]["Country"].value_counts().reset_index()
+collab_desc["percent_of_copubs"] = collab_desc["count"]/wos_collabs[record_col].nunique()*100
+collab_desc["percent_contrib_in_copubs"] = collab_desc["count"]/wos_collabs[record_col].size*100
+collab_desc = collab_desc.merge(wos_country_types, on="Country")
+collab_desc
+
+c_dict = {"count":"Number of co-publications",
+ "percent_of_copubs":"Percent of co-publications",
+ "percent_contrib_in_copubs":"Contribution to co-publications"}
+
+
+# Creating subplot axes
+# fig, axes = plt.subplots(ncols=3,figsize=(15, 15))
+# for c,ax in zip(c_dict.keys(),axes.flatten()):
+for c in c_dict.keys():
+ data = collab_desc[["Country",c,"Country_Type"]]
+ plt.figure(figsize=(9,12))
+ g = sns.barplot(data, x=c, y="Country", hue="Country_Type", dodge=False)
+ g.set_xlim(0,roundToNearest(data[c].max()))
+ g.set_ylabel(None)
+ g.set_xlabel(c_dict.get(c))
+ g.set_title(c_dict.get(c))
+ g.legend(title=None, loc="right")
+ for i in g.containers:
+ g.bar_label(i,fontsize=10, fmt='%.1f%%' if 'percent' in c else '%.0f')
+ if 'percent' in c:
+ g.xaxis.set_major_locator(MaxNLocator(integer=True))
+ vals = g.get_xticks()
+ g.set_xticklabels([str(int(val))+'%' for val in vals])
+ plt.show()
+
C:\Users\radvanyi\AppData\Local\Temp\ipykernel_30956\556627507.py:29: UserWarning: + +FixedFormatter should only be used together with FixedLocator + ++
C:\Users\radvanyi\AppData\Local\Temp\ipykernel_30956\556627507.py:29: UserWarning: + +FixedFormatter should only be used together with FixedLocator + ++
wos_collabs_EU = wos_univ_locations[~wos_univ_locations["Country_Type"].isin(["Other","China"])][[record_col,"Country"]].drop_duplicates()
+wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)
+EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique', normalize='all').fillna(0)
+
+# Generate a mask for the upper triangle
+mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))
+
+# Set up the matplotlib figure
+f, ax = plt.subplots(figsize=(11, 9))
+
+# Draw the heatmap with the mask and correct aspect ratio
+g = sns.heatmap(EU_co_occur, mask=mask,
+ square=True, linewidths=.5)
+
+g.set_ylabel(None)
+g.set_xlabel(None)
+
Text(0.5, 71.74999999999994, '')+
wos_collabs_EU = wos_univ_locations[~wos_univ_locations["Country_Type"].isin(["Other","China"])][[record_col,"Country"]].drop_duplicates()
+wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)
+wos_collabs_EU
+EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique').fillna(0).astype(int)
+
+
+# Generate a mask for the upper triangle
+mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))
+data = np.where(mask,None,EU_co_occur)
+EU_co_occur.columns
+
Index(['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic', + 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', + 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', + 'Malta', 'Netherlands', 'Norway', 'Poland', 'Portugal', 'Romania', + 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'Switzerland', + 'United Kingdom'], + dtype='object', name='Country_y')+
fig = px.imshow(data,
+ labels=dict(x="Country (x)", y="Country (y)", color="Co-publication"),
+ x=list(EU_co_occur.columns),
+ y=list(EU_co_occur.index), title="Intraeuropean patterns"
+ )
+fig.update_layout(title_x=0.5,
+ width=1000, height=1000,
+ xaxis_showgrid=False,
+ yaxis_showgrid=False,
+ yaxis_autorange='reversed', template='plotly_white')
+fig.update_xaxes(tickangle= -90)
+fig.update_yaxes(
+ ticks="outside")
+fig.update_xaxes(
+ ticks="outside")
+fig.show(config= dict(displayModeBar = False))
+
collab_year = wos_collabs[wos_collabs["Country"]!="China"].copy()
+collab_year = collab_year.merge(wos_country_types, on="Country").merge(wos[[record_col,"Publication Year"]],on=record_col).drop_duplicates()
+data = collab_year.groupby(["Publication Year",'Country_Type'],as_index=False)[record_col].nunique()
+
+
+g=sns.lineplot(data,y=record_col,x="Publication Year", hue="Country_Type", marker="o")
+g.set(xticks=list(range(2012,2022+1,2)))
+g.legend(title=None)
+g.set_xlabel(None)
+g.set_ylabel(None)
+g.set_title("Yearly output of co-publications with China")
+
Text(0.5, 1.0, 'Yearly output of co-publications with China')+
import country_converter as coco
+cc = coco.CountryConverter()
+
+data = (collab_year.groupby(['Publication Year',"Country"])[record_col]
+ .nunique(dropna=False).unstack()
+ .fillna(0)
+ .stack()
+ .reset_index()
+ .rename(columns={0:record_col}))
+data = data.merge(data[data[record_col]>0].sort_values(by=["Publication Year"], ascending=True).drop_duplicates(subset="Country"),
+ on=["Country"], suffixes=[None,"_relative_growth"])
+data[record_col+"_relative_growth"] = (data[record_col]-data[record_col+"_relative_growth"])/data[record_col+"_relative_growth"]*100
+data
+
+ | Publication Year | +Country | +UT (Unique WOS ID) | +Publication Year_relative_growth | +UT (Unique WOS ID)_relative_growth | +
---|---|---|---|---|---|
0 | +2011 | +Austria | +22.0 | +2011 | +0.000000 | +
1 | +2012 | +Austria | +24.0 | +2011 | +9.090909 | +
2 | +2013 | +Austria | +26.0 | +2011 | +18.181818 | +
3 | +2014 | +Austria | +39.0 | +2011 | +77.272727 | +
4 | +2015 | +Austria | +50.0 | +2011 | +127.272727 | +
... | +... | +... | +... | +... | +... | +
355 | +2018 | +United Kingdom | +1837.0 | +2011 | +406.060606 | +
356 | +2019 | +United Kingdom | +2430.0 | +2011 | +569.421488 | +
357 | +2020 | +United Kingdom | +3108.0 | +2011 | +756.198347 | +
358 | +2021 | +United Kingdom | +3718.0 | +2011 | +924.242424 | +
359 | +2022 | +United Kingdom | +4245.0 | +2011 | +1069.421488 | +
360 rows × 5 columns
+data["ISO3"] = cc.pandas_convert(series=data["Country"], to='ISO3')
+fig = px.choropleth(data, locations="ISO3", color=record_col, hover_name="Country",
+ animation_frame='Publication Year', scope="europe", template='plotly', range_color=[data[record_col].min(),data[record_col].max()])
+fig.show()
+
data["ISO3"] = cc.pandas_convert(series=data["Country"], to='ISO3')
+fig = px.choropleth(data, locations="ISO3", color=record_col+"_relative_growth", hover_name="Country",
+ animation_frame='Publication Year', scope="europe", template='plotly',
+ range_color=[data[record_col+"_relative_growth"].min(),data[record_col+"_relative_growth"].max()])
+fig.show()
+
[data[record_col+"_relative_growth"].min(),data[record_col+"_relative_growth"].max()]
+
[-100.0, 3700.0]+
fig = px.line(data.sort_values(ascending=True, by='Publication Year'),y=record_col,x='Publication Year', color="Country", markers=True,
+ labels={
+ record_col: 'Number of co-publications',
+ },
+ title="Yearly output of co-publications", template='plotly')
+fig.update_traces(hovertemplate='%{y:d}')
+fig.update_layout(hovermode='x unified')
+fig.add_shape(
+ # Rectangle with reference to the plot
+ type="rect",
+ xref="paper",
+ yref="paper",
+ x0=0,
+ y0=0,
+ x1=1.0,
+ y1=1.0,
+ line=dict(
+ color="black",
+ width=0.5,
+ )
+ )
+fig.update_yaxes(
+ showgrid=True,
+ ticks="outside")
+fig.update_xaxes(
+ showgrid=True,
+ ticks="outside")
+fig.show(config= dict(displayModeBar = False))
+
fig = px.line(data.sort_values(ascending=True, by='Publication Year'),y=record_col+"_relative_growth",x='Publication Year', color="Country", markers=True,
+ labels={
+ record_col+"_relative_growth": 'Relative growth of co-publications (%)',
+ },
+ title="Relative growth of co-publications<br>(baseline: 2011)", template='plotly')
+fig.update_traces(hovertemplate='%{y:d}%')
+fig.add_shape(
+ # Rectangle with reference to the plot
+ type="rect",
+ xref="paper",
+ yref="paper",
+ x0=0,
+ y0=0,
+ x1=1.0,
+ y1=1.0,
+ line=dict(
+ color="black",
+ width=0.5,
+ )
+ )
+fig.update_yaxes(
+ showgrid=True,
+ ticks="outside")
+fig.update_xaxes(
+ showgrid=True,
+ ticks="outside")
+fig.show(config= dict(displayModeBar = False))
+
year_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique').fillna(0).astype(int)
+year_pivot
+
Publication Year | +2011 | +2012 | +2013 | +2014 | +2015 | +2016 | +2017 | +2018 | +2019 | +2020 | +2021 | +2022 | +
---|---|---|---|---|---|---|---|---|---|---|---|---|
Country | ++ | + | + | + | + | + | + | + | + | + | + | + |
Austria | +22 | +24 | +26 | +39 | +50 | +57 | +72 | +89 | +138 | +137 | +185 | +205 | +
Belgium | +34 | +38 | +40 | +65 | +71 | +81 | +90 | +133 | +179 | +213 | +242 | +292 | +
Bulgaria | +4 | +5 | +8 | +9 | +7 | +19 | +21 | +18 | +10 | +25 | +32 | +19 | +
Croatia | +1 | +2 | +6 | +8 | +10 | +7 | +10 | +19 | +27 | +29 | +33 | +35 | +
Cyprus | +2 | +1 | +5 | +5 | +5 | +5 | +8 | +7 | +15 | +28 | +36 | +43 | +
Czech Republic | +13 | +15 | +16 | +21 | +20 | +36 | +37 | +56 | +64 | +81 | +93 | +123 | +
Denmark | +35 | +33 | +40 | +59 | +68 | +74 | +101 | +195 | +234 | +245 | +293 | +343 | +
Estonia | +3 | +3 | +7 | +10 | +12 | +10 | +15 | +15 | +16 | +38 | +45 | +39 | +
Finland | +31 | +35 | +44 | +82 | +100 | +125 | +126 | +198 | +241 | +256 | +289 | +380 | +
France | +117 | +130 | +174 | +231 | +269 | +325 | +348 | +491 | +648 | +691 | +807 | +858 | +
Germany | +123 | +172 | +192 | +273 | +310 | +365 | +456 | +604 | +801 | +907 | +1210 | +1386 | +
Greece | +15 | +18 | +19 | +32 | +35 | +50 | +47 | +81 | +114 | +122 | +139 | +181 | +
Hungary | +11 | +11 | +21 | +16 | +20 | +38 | +34 | +47 | +61 | +61 | +83 | +90 | +
Ireland | +13 | +16 | +22 | +31 | +27 | +45 | +66 | +72 | +84 | +116 | +167 | +187 | +
Italy | +51 | +70 | +84 | +116 | +178 | +187 | +247 | +325 | +441 | +571 | +641 | +811 | +
Latvia | +0 | +0 | +1 | +0 | +1 | +8 | +10 | +15 | +10 | +9 | +13 | +18 | +
Lithuania | +1 | +2 | +10 | +4 | +4 | +13 | +12 | +23 | +38 | +36 | +38 | +38 | +
Luxembourg | +2 | +3 | +3 | +1 | +8 | +9 | +13 | +15 | +18 | +22 | +35 | +51 | +
Malta | +1 | +0 | +0 | +0 | +1 | +1 | +0 | +0 | +6 | +2 | +7 | +10 | +
Netherlands | +72 | +64 | +77 | +103 | +139 | +166 | +220 | +297 | +408 | +470 | +529 | +655 | +
Norway | +30 | +42 | +60 | +76 | +67 | +88 | +104 | +134 | +222 | +253 | +304 | +311 | +
Poland | +17 | +31 | +37 | +57 | +73 | +82 | +98 | +110 | +138 | +181 | +276 | +353 | +
Portugal | +16 | +23 | +35 | +41 | +45 | +58 | +79 | +119 | +136 | +147 | +204 | +212 | +
Romania | +7 | +15 | +13 | +16 | +25 | +26 | +37 | +57 | +64 | +55 | +48 | +62 | +
Slovakia | +9 | +6 | +6 | +10 | +12 | +22 | +18 | +27 | +27 | +34 | +36 | +45 | +
Slovenia | +7 | +7 | +10 | +12 | +17 | +27 | +22 | +47 | +54 | +31 | +48 | +40 | +
Spain | +50 | +49 | +69 | +112 | +138 | +185 | +232 | +273 | +356 | +386 | +473 | +640 | +
Sweden | +34 | +50 | +59 | +83 | +113 | +170 | +233 | +232 | +385 | +359 | +428 | +510 | +
Switzerland | +37 | +50 | +54 | +74 | +74 | +95 | +155 | +195 | +233 | +263 | +349 | +447 | +
United Kingdom | +363 | +417 | +531 | +660 | +781 | +979 | +1350 | +1837 | +2430 | +3108 | +3718 | +4245 | +
f, ax = plt.subplots(figsize=(15, 15))
+g = sns.heatmap(year_pivot, annot=True, fmt="d", linewidths=.5, ax=ax)
+g.set(xlabel="", ylabel="")
+for i in range(year_pivot.shape[0]+1):
+ ax.axhline(i, color='white', lw=10)
+
year_percent_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique', normalize='columns').fillna(0)*100
+year_percent_pivot
+
Publication Year | +2011 | +2012 | +2013 | +2014 | +2015 | +2016 | +2017 | +2018 | +2019 | +2020 | +2021 | +2022 | +
---|---|---|---|---|---|---|---|---|---|---|---|---|
Country | ++ | + | + | + | + | + | + | + | + | + | + | + |
Austria | +1.962533 | +1.801802 | +1.557819 | +1.736420 | +1.865672 | +1.699970 | +1.689744 | +1.552958 | +1.816267 | +1.543488 | +1.712804 | +1.623248 | +
Belgium | +3.033006 | +2.852853 | +2.396645 | +2.894034 | +2.649254 | +2.415747 | +2.112180 | +2.320712 | +2.355883 | +2.399730 | +2.240533 | +2.312139 | +
Bulgaria | +0.356824 | +0.375375 | +0.479329 | +0.400712 | +0.261194 | +0.566657 | +0.492842 | +0.314081 | +0.131614 | +0.281658 | +0.296269 | +0.150447 | +
Croatia | +0.089206 | +0.150150 | +0.359497 | +0.356189 | +0.373134 | +0.208768 | +0.234687 | +0.331530 | +0.355357 | +0.326724 | +0.305527 | +0.277140 | +
Cyprus | +0.178412 | +0.075075 | +0.299581 | +0.222618 | +0.186567 | +0.149120 | +0.187749 | +0.122143 | +0.197420 | +0.315457 | +0.333302 | +0.340486 | +
Czech Republic | +1.159679 | +1.126126 | +0.958658 | +0.934996 | +0.746269 | +1.073665 | +0.868341 | +0.977142 | +0.842327 | +0.912573 | +0.861031 | +0.973949 | +
Denmark | +3.122212 | +2.477477 | +2.396645 | +2.626892 | +2.537313 | +2.206979 | +2.370336 | +3.402548 | +3.079758 | +2.760252 | +2.712712 | +2.715971 | +
Estonia | +0.267618 | +0.225225 | +0.419413 | +0.445236 | +0.447761 | +0.298240 | +0.352030 | +0.261734 | +0.210582 | +0.428121 | +0.416628 | +0.308813 | +
Finland | +2.765388 | +2.627628 | +2.636309 | +3.650935 | +3.731343 | +3.728005 | +2.957052 | +3.454894 | +3.171887 | +2.884182 | +2.675678 | +3.008948 | +
France | +10.437110 | +9.759760 | +10.425404 | +10.284951 | +10.037313 | +9.692812 | +8.167097 | +8.567440 | +8.528560 | +7.785038 | +7.471530 | +6.793887 | +
Germany | +10.972346 | +12.912913 | +11.503895 | +12.154942 | +11.567164 | +10.885774 | +10.701713 | +10.539173 | +10.542248 | +10.218567 | +11.202666 | +10.974741 | +
Greece | +1.338091 | +1.351351 | +1.138406 | +1.424755 | +1.305970 | +1.491202 | +1.103027 | +1.413366 | +1.500395 | +1.374493 | +1.286918 | +1.433209 | +
Hungary | +0.981267 | +0.825826 | +1.258238 | +0.712378 | +0.746269 | +1.133313 | +0.797935 | +0.820101 | +0.802843 | +0.687247 | +0.768447 | +0.712645 | +
Ireland | +1.159679 | +1.201201 | +1.318155 | +1.380232 | +1.007463 | +1.342082 | +1.548932 | +1.256325 | +1.105554 | +1.306895 | +1.546153 | +1.480719 | +
Italy | +4.549509 | +5.255255 | +5.032954 | +5.164737 | +6.641791 | +5.577095 | +5.796761 | +5.670913 | +5.804159 | +6.433078 | +5.934636 | +6.421728 | +
Latvia | +0.000000 | +0.000000 | +0.059916 | +0.000000 | +0.037313 | +0.238592 | +0.234687 | +0.261734 | +0.131614 | +0.101397 | +0.120359 | +0.142529 | +
Lithuania | +0.089206 | +0.150150 | +0.599161 | +0.178094 | +0.149254 | +0.387712 | +0.281624 | +0.401326 | +0.500132 | +0.405588 | +0.351819 | +0.300895 | +
Luxembourg | +0.178412 | +0.225225 | +0.179748 | +0.044524 | +0.298507 | +0.268416 | +0.305093 | +0.261734 | +0.236904 | +0.247859 | +0.324044 | +0.403832 | +
Malta | +0.089206 | +0.000000 | +0.000000 | +0.000000 | +0.037313 | +0.029824 | +0.000000 | +0.000000 | +0.078968 | +0.022533 | +0.064809 | +0.079183 | +
Netherlands | +6.422837 | +4.804805 | +4.613541 | +4.585931 | +5.186567 | +4.950790 | +5.163107 | +5.182342 | +5.369834 | +5.295178 | +4.897695 | +5.186476 | +
Norway | +2.676182 | +3.153153 | +3.594967 | +3.383793 | +2.500000 | +2.624515 | +2.440742 | +2.338161 | +2.921822 | +2.850383 | +2.814554 | +2.462586 | +
Poland | +1.516503 | +2.327327 | +2.216896 | +2.537845 | +2.723881 | +2.445571 | +2.299930 | +1.919386 | +1.816267 | +2.039207 | +2.555319 | +2.795154 | +
Portugal | +1.427297 | +1.726727 | +2.097064 | +1.825467 | +1.679104 | +1.729794 | +1.854025 | +2.076426 | +1.789945 | +1.656151 | +1.888714 | +1.678676 | +
Romania | +0.624442 | +1.126126 | +0.778910 | +0.712378 | +0.932836 | +0.775425 | +0.868341 | +0.994591 | +0.842327 | +0.619648 | +0.444403 | +0.490934 | +
Slovakia | +0.802855 | +0.450450 | +0.359497 | +0.445236 | +0.447761 | +0.656129 | +0.422436 | +0.471122 | +0.355357 | +0.383055 | +0.333302 | +0.356323 | +
Slovenia | +0.624442 | +0.525526 | +0.599161 | +0.534283 | +0.634328 | +0.805249 | +0.516311 | +0.820101 | +0.710713 | +0.349256 | +0.444403 | +0.316731 | +
Spain | +4.460303 | +3.678679 | +4.134212 | +4.986643 | +5.149254 | +5.517447 | +5.444731 | +4.763567 | +4.685444 | +4.348806 | +4.379224 | +5.067701 | +
Sweden | +3.033006 | +3.753754 | +3.535051 | +3.695459 | +4.216418 | +5.070086 | +5.468200 | +4.048159 | +5.067123 | +4.044615 | +3.962596 | +4.038324 | +
Switzerland | +3.300624 | +3.753754 | +3.235470 | +3.294746 | +2.761194 | +2.833284 | +3.637644 | +3.402548 | +3.066596 | +2.963046 | +3.231182 | +3.539473 | +
United Kingdom | +32.381802 | +31.306306 | +31.815458 | +29.385574 | +29.141791 | +29.197733 | +31.682704 | +32.053743 | +31.982101 | +35.015773 | +34.422739 | +33.613113 | +
f, ax = plt.subplots(figsize=(15, 15))
+g = sns.heatmap(year_percent_pivot, annot=True, fmt='.1f', linewidths=(.5), ax=ax, cbar=False)
+for t in ax.texts: t.set_text(t.get_text() + " %")
+g.set(xlabel="", ylabel="")
+for i in range(year_percent_pivot.shape[1]+1):
+ ax.axvline(i, color='white', lw=10)
+
+
# Institutional collab
+
wos_univ_locations = wos_univ.merge(wos_country_types, on="Country")
+wos_univ_collabs = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country","Institution_harm","Country_Type"]].drop_duplicates()
+wos_univ_collabs.sample(100)
+
+ | UT (Unique WOS ID) | +Country | +Institution_harm | +Country_Type | +
---|---|---|---|---|
104534 | +WOS:000536637200011 | +United Kingdom | +Univ Warwick | +Non-EU associate | +
120323 | +WOS:000373806800006 | +France | +ENSAIT | +EU | +
41841 | +WOS:000542956600003 | +China | +Nanjing Univ Aeronaut & Astronaut | +China | +
100019 | +WOS:000459844300007 | +United Kingdom | +Univ Manchester | +Non-EU associate | +
174151 | +WOS:000843324300007 | +Ireland | +Trinity Coll Dublin | +EU | +
... | +... | +... | +... | +... | +
157638 | +WOS:000863147500001 | +Finland | +Univ Turku | +EU | +
71835 | +WOS:000798227800116 | +China | +Shanghai Jiao Tong Univ | +China | +
128870 | +WOS:000460118200077 | +Sweden | +Royal Inst Technol | +EU | +
37822 | +WOS:000517665600048 | +China | +Chinese Acad Sci | +China | +
26625 | +WOS:000453750400001 | +China | +Hangzhou Dianzi Univ | +China | +
100 rows × 4 columns
+TOPN = 25
+
+
+wos_univ_ch = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="China"]
+wos_univ_eu = wos_univ_collabs[wos_univ_collabs["Country_Type"]!="China"]
+
+wos_univ_eu_strict = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="EU"]
+
+data_eu = (wos_univ_eu.groupby(["Country","Institution_harm","Country_Type"], as_index=False)[record_col].nunique()
+ .sort_values(by=record_col,ascending=False).head(TOPN).copy())
+
+data_eu_strict = (wos_univ_eu_strict.groupby(["Country","Institution_harm","Country_Type"], as_index=False)[record_col].nunique()
+ .sort_values(by=record_col,ascending=False).head(TOPN).copy())
+data_eu_strict
+
+data_ch = (wos_univ_ch.groupby(["Country","Institution_harm","Country_Type"], as_index=False)[record_col].nunique()
+ .sort_values(by=record_col,ascending=False).head(TOPN).copy())
+
+
+for data in [data_eu,data_eu_strict,data_ch]:
+ fig = px.bar(data, x=record_col, y="Institution_harm", color="Country_Type",
+ labels={
+ record_col: 'Number of co-publications',
+ "Institution_harm": "Institution",
+ "Country_Type":"Country type"
+ },
+ title="Most visible institutions", template='plotly')
+ fig.update_layout(xaxis_tickformat='d',font_family="Montserrat",yaxis={'categoryorder':'total ascending'},
+ width=1000, height=1000,)
+ fig.update_traces(hovertemplate='%{x:d}')
+ fig.add_shape(
+ # Rectangle with reference to the plot
+ type="rect",
+ xref="paper",
+ yref="paper",
+ x0=0,
+ y0=0,
+ x1=1.0,
+ y1=1.0,
+ line=dict(
+ color="black",
+ width=0.5,
+ )
+ )
+ fig.update_yaxes(
+ showgrid=True,
+ ticks="outside")
+ fig.update_xaxes(
+ showgrid=True,
+ ticks="outside")
+ fig.show(config= dict(displayModeBar = False))
+
wos_univ_test = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country","Institution","Institution_harm","Country_Type"]].drop_duplicates()
+www = wos_univ_test.groupby(["Institution","Institution_harm"], as_index=False)[record_col].nunique()
+www[www["Institution_harm"]=="Chinese Acad Sci"]
+
+ | Institution | +Institution_harm | +UT (Unique WOS ID) | +
---|---|---|---|
16 | +Chinese Acad Sci | +Chinese Acad Sci | +1 | +
3149 | +Chinese Acad Sci | +Chinese Acad Sci | +4614 | +
3153 | +Chinese Acad Sci AIRCAS | +Chinese Acad Sci | +2 | +
3155 | +Chinese Acad Sci CAREERI CAS | +Chinese Acad Sci | +1 | +
3157 | +Chinese Acad Sci CASIA | +Chinese Acad Sci | +8 | +
3159 | +Chinese Acad Sci GUCAS | +Chinese Acad Sci | +2 | +
3160 | +Chinese Acad Sci IAP | +Chinese Acad Sci | +1 | +
3161 | +Chinese Acad Sci IECAS | +Chinese Acad Sci | +2 | +
3162 | +Chinese Acad Sci IME CAS | +Chinese Acad Sci | +1 | +
3163 | +Chinese Acad Sci IMECAS | +Chinese Acad Sci | +1 | +
3164 | +Chinese Acad Sci ITP CAS | +Chinese Acad Sci | +1 | +
3166 | +Chinese Acad Sci NAOC | +Chinese Acad Sci | +1 | +
3167 | +Chinese Acad Sci NAOC CAS | +Chinese Acad Sci | +2 | +
13501 | +RCEES Chinese Acad Sci | +Chinese Acad Sci | +1 | +
19499 | +ZIAT Chinese Acad Sci | +Chinese Acad Sci | +1 | +
wos_univ_ch = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="China"]
+wos_univ_eu = wos_univ_collabs[wos_univ_collabs["Country_Type"]!="China"]
+
+wos_univ_dipol = wos_univ_eu.merge(wos_univ_ch, on=record_col, suffixes=('_eu', '_ch')).merge(wos[[record_col,"Domain_English","Field_English","SubField_English"]], on =record_col)
+wos_univ_dipol.sample(100)
+
+ | UT (Unique WOS ID) | +Country_eu | +Institution_harm_eu | +Country_Type_eu | +Country_ch | +Institution_harm_ch | +Country_Type_ch | +Domain_English | +Field_English | +SubField_English | +
---|---|---|---|---|---|---|---|---|---|---|
263074 | +WOS:000597493300001 | +United Kingdom | +Univ Northumbria Newcastle | +Non-EU associate | +China | +Nanchang Univ | +China | +Natural Sciences | +Chemistry | +Analytical Chemistry | +
71907 | +WOS:000494411700001 | +Germany | +Univ Wurzburg | +EU | +China | +South China Agr Univ | +China | +Economic & Social Sciences | +Social Sciences | +Information & Library Sciences | +
303069 | +WOS:000569985300066 | +Italy | +Selex | +EU | +China | +Wuhan Elect Informat Inst | +China | +Applied Sciences | +Engineering | +Computation Theory & Mathematics | +
259937 | +WOS:000557391000036 | +United Kingdom | +Univ Glasgow | +Non-EU associate | +China | +Southwest Jiaotong Univ | +China | +Natural Sciences | +Chemistry | +Analytical Chemistry | +
302133 | +WOS:000477943300012 | +Italy | +Politecn Milan | +EU | +China | +City Univ Hong Kong | +China | +Applied Sciences | +Information & Communication Technologies | +Artificial Intelligence & Image Processing | +
... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +... | +
179087 | +WOS:000460128100005 | +Finland | +Univ Jyvaskyla | +EU | +China | +Capital Med Univ | +China | +Health Sciences | +Clinical Medicine | +Ophthalmology & Optometry | +
333080 | +WOS:000589420400001 | +Ireland | +Univ Coll Dublin | +EU | +China | +Guangxi Normal Univ | +China | +Natural Sciences | +Chemistry | +Organic Chemistry | +
300958 | +WOS:000388876400003 | +Finland | +Nokias Mobile Networks Organizat | +EU | +China | +Nokia Bell Labs | +China | +Applied Sciences | +Information & Communication Technologies | +Networking & Telecommunications | +
95342 | +WOS:000579154000008 | +United Kingdom | +Imperial Coll London | +Non-EU associate | +China | +Wuhan Polytech Univ | +China | +Health Sciences | +Clinical Medicine | +General & Internal Medicine | +
197767 | +WOS:000571399800004 | +Switzerland | +Univ Bern | +Non-EU associate | +China | +Shandong Univ | +China | +Natural Sciences | +Physics & Astronomy | +General Physics | +
100 rows × 10 columns
+fig = px.parallel_categories(wos_univ_dipol[["Country_eu","Domain_English","Country_ch"]])
+fig.show()
+