K. Corporation

Interactive chart for the relation of area and population density

This code shows the relation of country size and population density. It uses world data from kaggle.com and poorly written python code, but has the cool feature that by hovering over a dot, the country name will appear. I wrote this because I woke up on a saturday at 1pm and was wondering if there's any noticable trend in this relation.

I started writing this garbage in Julia, but because finding solutions for problems in Julia is terribly painful due to the poor documentation of some packages and presumably zero users that tried similar things before I switched to python. There is so much stuff where I really don't know what I did and I really don't care. The only thing bugging me is that the hover function seems to have a weird hitbox, but I'm too tired to fix this right now and I will probably never try to improve this.

Check out my Gitea repo to view the code in a readable format. Be aware that the hover feature does not work within jupyter notebook / lab.
Here's the raw text:

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# with local csv file:
#data = pd.read_csv('data.csv')
# without download of csv:
data = pd.read_csv('https://k-corporation.org/media/population_data.csv')

# easier to write column names
data.rename(columns = {'Area (sq. mi.)':'Area',
                       'Pop. Density (per sq. mi.)':'Density'},
            inplace = True)
# convert text number to float
data['Density'] = data['Density'].str.replace(',','.').astype(float)
# correct some data
data.loc[data.Country == 'Iran']['Region'] = 'NEAR EAST'
data['Country'] = data['Country'].str.replace('Bahamas, The', 'The Bahamas', regex=False)
data['Country'] = data['Country'].str.replace('British Virgin Is.', 'British Virgin Islands', regex=False)
data['Country'] = data['Country'].str.replace('Central African Rep.', 'Central African Republic', regex=False)
data['Country'] = data['Country'].str.replace('Congo, Dem. Rep.', 'Democratic Republic of the Congo', regex=False)
data['Country'] = data['Country'].str.replace('Congo, Rep. of the', 'Republic of the Congo', regex=False)
data['Country'] = data['Country'].str.replace('Gambia, The', 'The Gambia', regex=False)
data['Country'] = data['Country'].str.replace('Korea, North', 'North Korea', regex=False)
data['Country'] = data['Country'].str.replace('Korea, South', 'South Korea', regex=False)
data['Country'] = data['Country'].str.replace('Micronesia, Fed. St.', 'Federal State of Micronesia', regex=False)
data['Country'] = data['Country'].str.replace('N. Mariana Islands', 'Northern Mariana Islands ', regex=False)
data['Country'] = data['Country'].str.replace('Turks and Caicos Is', 'Turks and Caicos Islands', regex=False)

# more precise values
data['Density'] = data['Population'] / data['Area']
# filter data
data = data[['Country', 'Region', 'Area', 'Density']]

# Rename Regions
data['Region'] = data['Region'].str.strip()
data['Region'] = data['Region'].str.replace('ASIA (EX. NEAR EAST)', 'SOUTHERN/EASTERN ASIA', regex=False)
data['Region'] = data['Region'].str.replace('LATIN AMER. & CARIB', 'LATIN AMERICA', regex=False)
data['Region'] = data['Region'].str.replace('BALTICS', 'EASTERN EUROPE', regex=False)
data['Region'] = data['Region'].str.replace('NEAR EAST', 'MIDDLE EAST', regex=False)

data = data.set_index('Country')

data['Area'] = data['Area']*2.58999
data['Density'] = data['Density']/2.58999

x0 = data['Area'].min() - data['Area'].min()*0.093
y0 = data['Density'].min()- data['Density'].min()*0.065
# Either use 0km² and 0 people per km² as factor origin or use the minimum values
# data['Relation Factor'] = np.sqrt( np.log10(data['Area'])**2 + np.log10(data['Density'])**2 )
data['Relation Factor'] = np.sqrt( ( np.log10(data['Area'])-np.log10(x0) )**2
                                  +( np.log10(data['Density'])-np.log10(y0) )**2 )

# Normalize on a maximum of 1
normalize = data['Relation Factor'].max()
data['Relation Factor'] = data['Relation Factor'] / normalize

se_asia = data.loc[data.Region == 'SOUTHERN/EASTERN ASIA']
w_europe = data.loc[data.Region == 'WESTERN EUROPE']
e_europe = data.loc[data.Region == 'EASTERN EUROPE']
n_africa = data.loc[data.Region == 'NORTHERN AFRICA']
s_africa = data.loc[data.Region == 'SUB-SAHARAN AFRICA']
oceania = data.loc[data.Region == 'OCEANIA']
sa = data.loc[data.Region == 'LATIN AMERICA']
na = data.loc[data.Region == 'NORTHERN AMERICA']
me = data.loc[data.Region == 'MIDDLE EAST']
udssr = data.loc[data.Region == 'C.W. OF IND. STATES']

continents = pd.Series({'Western Europe':w_europe, 'Eastern Europe':e_europe,
                        'Latin America':sa, 'North America':na,
                        'Sub-Saharan Africa':s_africa, 'North Africa':n_africa,
                        'Middle East':me, 'C.W. of Ind. States':udssr,
                        'Oceania':oceania, 'South-East Asia':se_asia
                        })
colors = ['dodgerblue', 'aqua', 'darkviolet', 'magenta', 'chartreuse',
          'olivedrab', 'saddlebrown', 'goldenrod', 'palevioletred', 'crimson']

for i in continents:
    i.pop('Region')

fig,ax = plt.subplots()

sc = []
li = []

for c in continents.index:
    for i in range(0,len(continents)):
        if continents.iloc[i].equals(continents[c]):
            col_i = i
            break

    x = continents[c]['Area']
    y = continents[c]['Density']
    sc.append(plt.scatter(x, y, c=colors[col_i], label=c))
    for i in range(0,len(x)):
        li.append(plt.plot([x0,x[i]], [y0,y[i]], c=colors[col_i], alpha=0.6))

for i in li:
    for j in i:
        j.set_lw(0)

annot = ax.annotate("", xy=(0,0), xytext=(20,20), textcoords="offset points",
                    bbox=dict(boxstyle="round", fc="w"),
                    arrowprops=dict(arrowstyle="-"))
annot.set_visible(False)

def update_fig(s, ind):
    pos = s.get_offsets()[ind["ind"][0]]
    annot.xy = pos

    for i in continents:
        if not i.loc[i.Area == pos[0]].empty:
            text = i.loc[i.Area == pos[0]].index[0] + "\n" + \
                   i.loc[i.Area == pos[0]].columns[2] + ": " + \
                   str(np.round(i.loc[i.Area == pos[0]]['Relation Factor'].squeeze(), 3))

    annot.set_text(text)

    for ci in continents.index:
        for i in continents[ci].Area:
            if(i == pos[0]):
                for j in range(0,len(continents)):
                    if continents.iloc[j].equals(continents[ci]):
                        b_col = j
                        for other in sc:
                            if not other == s:
                                other.set_alpha(0.1)
                        break
    for i in li:
        for j in i:
            if(j.get_xdata()[1] == pos[0]):
                j.set_lw(1.5)

    annot.get_bbox_patch().set_facecolor(colors[b_col])
    annot.get_bbox_patch().set_alpha(0.65)

def hover(event):
    vis = annot.get_visible()
    if event.inaxes == ax:
        for i in sc:
            cont, ind = i.contains(event)
            if cont:
                update_fig(i, ind)
                annot.set_visible(True)
                fig.canvas.draw_idle()
            else:
                if vis:
                    for other in sc:
                        other.set_alpha(1)
                    for i in li:
                        for j in i:
                            j.set_lw(0)

                    annot.set_visible(False)
                    fig.canvas.draw_idle()

fig.canvas.mpl_connect("motion_notify_event", hover)
plt.xscale('log')
plt.yscale('log')
plt.xlim(x0, 20e7)
plt.ylim(y0, 10e3)
plt.xlabel(continents['Western Europe'].columns[0] + " (km²) ")
plt.ylabel(continents['Western Europe'].columns[1] + " (people per km²)")
plt.grid(which='major', alpha=0.3)
plt.grid(which='minor', alpha=0.05)
plt.legend()
plt.show()