Commit 3f0abcc0 authored by Orkut Karaçalık's avatar Orkut Karaçalık
Browse files

update

parent 8f50bf93
......@@ -6,6 +6,7 @@ import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
from scipy.stats import entropy
from functools import reduce
def get_unique_fields(df, col='conference_fields'):
......@@ -76,6 +77,14 @@ df_h_index_per_year = group_by_years(df_h_index)
df_h_index_per_year_per_fields = group_by_years_fields(df_h_index, years, fields) # { y: { f: df_h_index_per_year[y][df_h_index_per_year[y].conference_fields.apply(lambda x: f in x)] for f in fields } for y in df_h_index_per_year.keys() }
#get_distribution_plot(df_h_index)
get_distribution_plot(df_h_index_per_year)
#get_distribution_plot(df_h_index_per_year_per_fields)
#a = get_unique_fields(df_h_index_per_year_per_fields[2017]['human-computer-interaction'], 'gender')
\ No newline at end of file
#get_distribution_plot(df_h_index_per_year)
get_distribution_plot(df_h_index_per_year_per_fields)
#a = get_unique_fields(df_h_index_per_year_per_fields[2017]['human-computer-interaction'], 'gender')
female = df_h_index.groupby('gender').get_group('female')['h_index']
male = df_h_index.groupby('gender').get_group('male')['h_index']
pmf_female, bins_female = np.histogram(female, density=True)
pmf_male, bins_male = np.histogram(male, density=True)
dist_f = dict(zip(bins_female, pmf_female))
dist_m = dict(zip(bins_male, pmf_male))
answer = entropy(pmf_female, pmf_male)
This diff is collapsed.
%% Cell type:code id: tags:
``` python
import pandas as pd
import seaborn as sns
sns.set()
```
%% Cell type:code id: tags:
``` python
df= pd.read_json("h-index.json")
```
%% Cell type:code id: tags:
``` python
df.head()
```
%% Output
citations citations_last_5_year \
0 NaN NaN
1 NaN NaN
2 NaN NaN
3 NaN NaN
4 NaN NaN
conference_fields \
0 [machine-learning]
1 [software-programming, network-communication, ...
2 [software-programming, network-communication, ...
3 [hardware-electronics, signal-processing]
4 [human-computer-interaction, machine-learning,...
conference_name date gender \
0 AAAI 2018 : AAAI Conference on Artificial Inte... 2018-06-18 male
1 AAMAS 2018 : International Conference on Auton... 2018-06-18 male
2 AAMAS 2018 : International Conference on Auton... 2018-06-18 male
3 ACC 2018 : American Control Conference 2018-06-18 male
4 ACII 2018 : International Conference on Affect... 2018-06-18 male
google_scholar_profile h_index h_index_lat_5_year name \
0 None NaN NaN Charles Isbell
1 None NaN NaN Thomas A. Henzinger
2 None NaN NaN Josh Tenenbaum
3 None NaN NaN Robert J. Wood
4 None NaN NaN Jeffrey Cohn
organization
0 Georgia Institute of Technology
1 Institute of Science and Technology Austria
2 Massachusetts Institute of Technology
3 Harvard University
4 University of Pittsburgh
%% Cell type:code id: tags:
``` python
df=df[df.h_index.notnull()]
```
%% Cell type:code id: tags:
``` python
df.head()
```
%% Output
citations citations_last_5_year \
11 18431.0 5524.0
14 5838.0 4603.0
15 41827.0 20595.0
16 1897.0 1246.0
17 87.0 87.0
conference_fields \
11 [software-programming, network-communication, ...
14 [machine-learning]
15 [machine-learning]
16 [hardware-electronics, signal-processing]
17 [hardware-electronics, signal-processing]
conference_name date gender \
11 AAMAS 2018 : International Conference on Auton... 2018-06-18 male
14 AAAI 2018 : AAAI Conference on Artificial Inte... 2018-06-18 female
15 AAAI 2018 : AAAI Conference on Artificial Inte... 2018-06-18 male
16 ACC 2018 : American Control Conference 2018-06-18 male
17 ACC 2018 : American Control Conference 2018-06-18 male
google_scholar_profile h_index \
11 https://scholar.google.com/citations?user=cXkm... 64.0
14 https://scholar.google.com/citations?user=pouy... 34.0
15 https://scholar.google.com/citations?user=0uTu... 88.0
16 https://scholar.google.com/citations?user=z1ru... 22.0
17 https://scholar.google.com/citations?user=iRff... 4.0
h_index_lat_5_year name \
11 40.0 Craig Boutilier
14 31.0 Percy Liang
15 67.0 Zoubin Ghahramani
16 18.0 Ketan Savla
17 4.0 Noah N. Emery
organization
11 Google
14 Stanford University
15 University of Cambridge / Uber
16 University of Southern California
17 Harvard Medical School & Massachusetts Institu...
%% Cell type:code id: tags:
``` python
df.shape
```
%% Output
(113, 11)
%% Cell type:code id: tags:
``` python
women=df.groupby('gender').get_group("female")['h_index']
men=df.groupby('gender').get_group("male")['h_index']
```
%% Output
14 34.0
18 28.0
23 65.0
26 50.0
27 42.0
28 26.0
31 48.0
51 43.0
66 22.0
82 18.0
83 8.0
84 67.0
107 50.0
114 59.0
117 18.0
119 53.0
122 36.0
133 41.0
141 34.0
142 50.0
149 31.0
152 44.0
156 24.0
163 39.0
166 25.0
176 80.0
Name: h_index, dtype: float64
%% Cell type:code id: tags:
``` python
import numpy as np
import matplotlib.pyplot as plt
```
%% Cell type:code id: tags:
``` python
plt.hist(women, bins=5)
arr = plt.hist(women, bins=5)
plt.show()
```
%% Output
%% Cell type:code id: tags:
``` python
pmf, bins = np.histogram(women, density=True)
pmf
```
%% Output
array([0.00534188, 0.01602564, 0.02136752, 0.02136752, 0.02136752,
0.0267094 , 0.00534188, 0.01068376, 0.00534188, 0.00534188])
%% Cell type:code id: tags:
``` python
plt.hist(men, bins=5)
plt.show()
```
%% Output
%% Cell type:code id: tags:
``` python
sns.distplot(women);
plt.show()
```
%% Output
/home/okaracalik/Applications/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_axes.py:6462: UserWarning: The 'normed' kwarg is deprecated, and has been replaced by the 'density' kwarg.
warnings.warn("The 'normed' kwarg is deprecated, and has been "
%% Cell type:code id: tags:
``` python
sns.distplot(women, hist=True, rug=True, color="red", label="Women");
sns.distplot(men, hist=True, rug=True, color="blue", label="Men");
plt.show()
```
%% Output
/home/okaracalik/Applications/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_axes.py:6462: UserWarning: The 'normed' kwarg is deprecated, and has been replaced by the 'density' kwarg.
warnings.warn("The 'normed' kwarg is deprecated, and has been "
/home/okaracalik/Applications/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_axes.py:6462: UserWarning: The 'normed' kwarg is deprecated, and has been replaced by the 'density' kwarg.
warnings.warn("The 'normed' kwarg is deprecated, and has been "
%% Cell type:code id: tags:
``` python
```
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment