import modules
import pandas as pd
import numpy as np
create dummy dataframe
raw_data = {'name': ['Willard Morris', 'Al Jennings', 'Omar Mullins', 'Spencer McDaniel'],
'age': [20, 19, 22, 21],
'favorite_color': ['blue', 'blue', 'yellow', "green"],
'grade': [88, 92, 95, 70]}
df = pd.DataFrame(raw_data, index = ['Willard Morris', 'Al Jennings', 'Omar Mullins', 'Spencer McDaniel'])
df
| age | favorite_color | grade | name | |
|---|---|---|---|---|
| Willard Morris | 20 | blue | 88 | Willard Morris |
| Al Jennings | 19 | blue | 92 | Al Jennings |
| Omar Mullins | 22 | yellow | 95 | Omar Mullins |
| Spencer McDaniel | 21 | green | 70 | Spencer McDaniel |
return the average/mean from a Pandas column
df['grade'].mean()
86.25
return the median from a Pandas column
df['grade'].median()
90.0
return descriptive statistics from Pandas dataframe
#Aside from the mean/median, you may be interested in general descriptive statistics of your dataframe
#--'describe' is a handy function for this
df.describe()
| age | grade | |
|---|---|---|
| count | 4.000000 | 4.000000 |
| mean | 20.500000 | 86.250000 |
| std | 1.290994 | 11.206397 |
| min | 19.000000 | 70.000000 |
| 25% | 19.750000 | 83.500000 |
| 50% | 20.500000 | 90.000000 |
| 75% | 21.250000 | 92.750000 |
| max | 22.000000 | 95.000000 |