import matplotlib.pyplot as plt import numpy as np import random import pandas as pd # 'price.csv' is the renamed raw data file downloaded from # https://data.gov.sg/dataset/resale-flat-prices # original file name : # resale-flat-prices-based-on-registration-date-from-jan-2017-onwards data = pd.read_csv('price.csv') x = data['resale_price'] plt.rcParams.update({'font.size': 16}) fig, axs = plt.subplots(tight_layout=True) axs.hist(x, bins=30) plt.title('Singapore public housing 2017-01 to 2020-10') plt.xlabel('resale price') plt.ylabel('count') sample_mean = x.sample(30).mean() pop_mean = x.mean() pop_std = x.std() print(pop_mean) # preview the values print(sample_mean) plt.axvline(pop_mean) # guiding lines plt.axvline(sample_mean) plt.show() # figure is then modified in GIMP