Commit b9ed5bcb authored by Orkut Karaçalık's avatar Orkut Karaçalık
Browse files

before presentation

parent 9c9f6e3c
......@@ -61,12 +61,20 @@ def get_distribution_plot(df, col='h_index', title='all'):
# TODO: make histogram
df = pd.read_json('../scrapers/o-7.json')
#df_gender_field = create_gender_field_df(df)
#df_gender_field_normalized = df_gender_field.apply(lambda col: col / col.sum() * 100 )
df_gender_field = create_gender_field_df(df).transpose()
df_gender_field['total'] = df_gender_field['male'] + df_gender_field['female']
df_gender_field_top = df_gender_field[df_gender_field.total > 40]
df_gender_field_top['female_ratio'] = df_gender_field_top['female'] / df_gender_field_top['total'] * 100
df_gender_field_top['male_ratio'] = df_gender_field_top['male'] / df_gender_field_top['total'] * 100
df_gender_field_top = df_gender_field_top.sort_values(by='female_ratio')
ax = df_gender_field_top.loc[:,['female_ratio', 'male_ratio']].plot.barh(figsize=(8,6), grid=True)
plt.yticks(fontsize=18)
#df_gender_field_normalized = df_gender_field_top.apply(lambda col: col / col.sum() * 100 )
#df_gender_field = df_gender_field.transpose().sort_values(by='female', ascending=False)
#df_gender_field_normalized = df_gender_field_normalized.transpose().sort_values(by='female', ascending=True)
#ax = df_gender_field_normalized.plot.barh(figsize=(12,16), grid=True)
#ax.set(xlabel='Ratio', ylabel='Fields')
#df_ = pd.read_json('../scrapers/top-computer-science-conferences.json')
#df_ = pd.read_json('conferences.json')
df_h_index = df[df.h_index.notnull()]
......@@ -76,15 +84,28 @@ df_h_index_per_fields = group_by_fields(df_h_index)
df_h_index_per_year = group_by_years(df_h_index)
df_h_index_per_year_per_fields = group_by_years_fields(df_h_index, years, fields) # { y: { f: df_h_index_per_year[y][df_h_index_per_year[y].conference_fields.apply(lambda x: f in x)] for f in fields } for y in df_h_index_per_year.keys() }
#get_distribution_plot(df_h_index)
#get_distribution_plot(df_h_index)
#get_distribution_plot(df_h_index_per_year)
get_distribution_plot(df_h_index_per_year_per_fields)
#get_distribution_plot(df_h_index_per_year_per_fields)
#a = get_unique_fields(df_h_index_per_year_per_fields[2017]['human-computer-interaction'], 'gender')
female = df_h_index.groupby('gender').get_group('female')['h_index']
male = df_h_index.groupby('gender').get_group('male')['h_index']
pmf_female, bins_female = np.histogram(female, density=True)
pmf_male, bins_male = np.histogram(male, density=True)
dist_f = dict(zip(bins_female, pmf_female))
dist_m = dict(zip(bins_male, pmf_male))
#female = df_h_index.groupby('gender').get_group('female')['h_index']
#male = df_h_index.groupby('gender').get_group('male')['h_index']
#pmf_female, bins_female = np.histogram(female, density=True)
#pmf_male, bins_male = np.histogram(male, density=True)
#dist_f = dict(zip(bins_female, pmf_female))
#dist_m = dict(zip(bins_male, pmf_male))
#
#answer = entropy(pmf_female, pmf_male)
#def find_female_male(df):
# return { 'female': df[df.gender == 'female'].shape[0], 'male': df[df.gender == 'male'].shape[0] }
#
#ratio_per_year = { y: find_female_male(df_h_index_per_year[y]) for y in df_h_index_per_year.keys() }
#plt.pie(ratio_per_year[2017].values(), explode = (0.1, 0), labels=ratio_per_year[2017].keys(), colors=['gold', 'yellowgreen'], autopct='%1.1f%%', shadow=True)
#plt.axis('equal')
#plt.show()
#plt.pie(ratio_per_year[2018].values(), explode = (0.1, 0), labels=ratio_per_year[2018].keys(), colors=['gold', 'yellowgreen'], autopct='%1.1f%%', shadow=True)
#plt.axis('equal')
#plt.show()
answer = entropy(pmf_female, pmf_male)
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment