Extract month and year from column in Pandas, create new column
Pandas
import modules
import pandas as pd
import numpy as np
import datetime
create dummy dataframe
raw_data = {'name': ['Willard Morris', 'Al Jennings', 'Omar Mullins', 'Spencer McDaniel'],
'age': [20, 19, 22, 21],
'favorite_color': ['blue', 'red', 'yellow', "green"],
'grade': [88, 92, 95, 70],
'birth_date': ['01-02-1996', '08-05-1997', '04-28-1996', '12-16-1995']}
df = pd.DataFrame(raw_data, index = ['Willard Morris', 'Al Jennings', 'Omar Mullins', 'Spencer McDaniel'])
df
age | birth_date | favorite_color | grade | name | |
---|---|---|---|---|---|
Willard Morris | 20 | 01-02-1996 | blue | 88 | Willard Morris |
Al Jennings | 19 | 08-05-1997 | red | 92 | Al Jennings |
Omar Mullins | 22 | 04-28-1996 | yellow | 95 | Omar Mullins |
Spencer McDaniel | 21 | 12-16-1995 | green | 70 | Spencer McDaniel |
create a new column with year of date field 'birth_date'
#pandas datetimeindex docs: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DatetimeIndex.html
#efficient way to extract year from string format date
df['year'] = pd.DatetimeIndex(df['birth_date']).year
df.head()
age | birth_date | favorite_color | grade | name | year | |
---|---|---|---|---|---|---|
Willard Morris | 20 | 01-02-1996 | blue | 88 | Willard Morris | 1996 |
Al Jennings | 19 | 08-05-1997 | red | 92 | Al Jennings | 1997 |
Omar Mullins | 22 | 04-28-1996 | yellow | 95 | Omar Mullins | 1996 |
Spencer McDaniel | 21 | 12-16-1995 | green | 70 | Spencer McDaniel | 1995 |
create a new column with month of date field 'birth_date'
#pandas datetimeindex docs: https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DatetimeIndex.html
df['month'] = pd.DatetimeIndex(df['birth_date']).month
df.head()
age | birth_date | favorite_color | grade | name | year | month | |
---|---|---|---|---|---|---|---|
Willard Morris | 20 | 01-02-1996 | blue | 88 | Willard Morris | 1996 | 1 |
Al Jennings | 19 | 08-05-1997 | red | 92 | Al Jennings | 1997 | 8 |
Omar Mullins | 22 | 04-28-1996 | yellow | 95 | Omar Mullins | 1996 | 4 |
Spencer McDaniel | 21 | 12-16-1995 | green | 70 | Spencer McDaniel | 1995 | 12 |
#if the date format comes in datetime, we can also extract the day/month/year using the to_period function
#where 'D', 'M', 'Y' are inputs
df['month_year'] = pd.to_datetime(df['birth_date']).dt.to_period('M')
df.head()
age | birth_date | favorite_color | grade | name | year | month | month_year | |
---|---|---|---|---|---|---|---|---|
Willard Morris | 20 | 01-02-1996 | blue | 88 | Willard Morris | 1996 | 1 | 1996-01 |
Al Jennings | 19 | 08-05-1997 | red | 92 | Al Jennings | 1997 | 8 | 1997-08 |
Omar Mullins | 22 | 04-28-1996 | yellow | 95 | Omar Mullins | 1996 | 4 | 1996-04 |
Spencer McDaniel | 21 | 12-16-1995 | green | 70 | Spencer McDaniel | 1995 | 12 | 1995-12 |