# Created By: Debasis Das (18-Feb-2021)
import pandas as pd
import numpy as np
df = pd.read_csv("SalesData.csv",low_memory=False)
df
Region | Country | Product | WK_1 | WK_2 | WK_3 | WK_4 | |
---|---|---|---|---|---|---|---|
0 | America | USA | Laptop | 1241 | 1160 | 1929 | 1174 |
1 | America | USA | Phone | 1098 | 1092 | 1089 | 1819 |
2 | America | Canada | Laptop | 1441 | 1099 | 1950 | 1394 |
3 | America | Canada | Phone | 1990 | 1057 | 1656 | 1060 |
4 | Europe | Belgium | Laptop | 1084 | 1116 | 1002 | 1566 |
5 | Europe | Belgium | Phone | 1574 | 1958 | 1793 | 1213 |
6 | Europe | Finland | Laptop | 1325 | 1374 | 1300 | 1579 |
7 | Europe | Finland | Phone | 1347 | 1736 | 1782 | 1921 |
df1 = pd.pivot_table(df,index=["Region","Country","Product"],aggfunc=np.sum)
df1
WK_1 | WK_2 | WK_3 | WK_4 | |||
---|---|---|---|---|---|---|
Region | Country | Product | ||||
America | Canada | Laptop | 1441 | 1099 | 1950 | 1394 |
Phone | 1990 | 1057 | 1656 | 1060 | ||
USA | Laptop | 1241 | 1160 | 1929 | 1174 | |
Phone | 1098 | 1092 | 1089 | 1819 | ||
Europe | Belgium | Laptop | 1084 | 1116 | 1002 | 1566 |
Phone | 1574 | 1958 | 1793 | 1213 | ||
Finland | Laptop | 1325 | 1374 | 1300 | 1579 | |
Phone | 1347 | 1736 | 1782 | 1921 |
df2 = pd.pivot_table(df,index=["Region","Country"],aggfunc=np.sum)
df2
WK_1 | WK_2 | WK_3 | WK_4 | ||
---|---|---|---|---|---|
Region | Country | ||||
America | Canada | 3431 | 2156 | 3606 | 2454 |
USA | 2339 | 2252 | 3018 | 2993 | |
Europe | Belgium | 2658 | 3074 | 2795 | 2779 |
Finland | 2672 | 3110 | 3082 | 3500 |
df3 = pd.pivot_table(df,index=["Region","Country"],columns=["Product"],aggfunc=np.sum)
df3
WK_1 | WK_2 | WK_3 | WK_4 | ||||||
---|---|---|---|---|---|---|---|---|---|
Product | Laptop | Phone | Laptop | Phone | Laptop | Phone | Laptop | Phone | |
Region | Country | ||||||||
America | Canada | 1441 | 1990 | 1099 | 1057 | 1950 | 1656 | 1394 | 1060 |
USA | 1241 | 1098 | 1160 | 1092 | 1929 | 1089 | 1174 | 1819 | |
Europe | Belgium | 1084 | 1574 | 1116 | 1958 | 1002 | 1793 | 1566 | 1213 |
Finland | 1325 | 1347 | 1374 | 1736 | 1300 | 1782 | 1579 | 1921 |
df4 = pd.pivot_table(df,index=["Region","Country"],columns=["Product"],values = ["WK_1","WK_2"],aggfunc=np.sum)
df4
WK_1 | WK_2 | ||||
---|---|---|---|---|---|
Product | Laptop | Phone | Laptop | Phone | |
Region | Country | ||||
America | Canada | 1441 | 1990 | 1099 | 1057 |
USA | 1241 | 1098 | 1160 | 1092 | |
Europe | Belgium | 1084 | 1574 | 1116 | 1958 |
Finland | 1325 | 1347 | 1374 | 1736 |
df5 = pd.pivot_table(df,index=["Region","Country"], aggfunc=np.sum)
df5['Total'] = df5['WK_1'] + df5['WK_2']+ df5['WK_3'] + df5['WK_4']
df5
WK_1 | WK_2 | WK_3 | WK_4 | Total | ||
---|---|---|---|---|---|---|
Region | Country | |||||
America | Canada | 3431 | 2156 | 3606 | 2454 | 11647 |
USA | 2339 | 2252 | 3018 | 2993 | 10602 | |
Europe | Belgium | 2658 | 3074 | 2795 | 2779 | 11306 |
Finland | 2672 | 3110 | 3082 | 3500 | 12364 |
gdf1 = df.groupby(['Region']).sum()
gdf1
WK_1 | WK_2 | WK_3 | WK_4 | |
---|---|---|---|---|
Region | ||||
America | 5770 | 4408 | 6624 | 5447 |
Europe | 5330 | 6184 | 5877 | 6279 |
gdf2 = df.groupby(['Region',"Country","Product"]).sum()
gdf2
WK_1 | WK_2 | WK_3 | WK_4 | |||
---|---|---|---|---|---|---|
Region | Country | Product | ||||
America | Canada | Laptop | 1441 | 1099 | 1950 | 1394 |
Phone | 1990 | 1057 | 1656 | 1060 | ||
USA | Laptop | 1241 | 1160 | 1929 | 1174 | |
Phone | 1098 | 1092 | 1089 | 1819 | ||
Europe | Belgium | Laptop | 1084 | 1116 | 1002 | 1566 |
Phone | 1574 | 1958 | 1793 | 1213 | ||
Finland | Laptop | 1325 | 1374 | 1300 | 1579 | |
Phone | 1347 | 1736 | 1782 | 1921 |
gdf3 = df.groupby(['Region',"Country","Product"],sort=True).sum()
gdf3
WK_1 | WK_2 | WK_3 | WK_4 | |||
---|---|---|---|---|---|---|
Region | Country | Product | ||||
America | Canada | Laptop | 1441 | 1099 | 1950 | 1394 |
Phone | 1990 | 1057 | 1656 | 1060 | ||
USA | Laptop | 1241 | 1160 | 1929 | 1174 | |
Phone | 1098 | 1092 | 1089 | 1819 | ||
Europe | Belgium | Laptop | 1084 | 1116 | 1002 | 1566 |
Phone | 1574 | 1958 | 1793 | 1213 | ||
Finland | Laptop | 1325 | 1374 | 1300 | 1579 | |
Phone | 1347 | 1736 | 1782 | 1921 |
gdf4 = df.groupby(["Country","Product"]).get_group(('Canada','Laptop'))
gdf4
Region | Country | Product | WK_1 | WK_2 | WK_3 | WK_4 | |
---|---|---|---|---|---|---|---|
2 | America | Canada | Laptop | 1441 | 1099 | 1950 | 1394 |
gdf5 = df.groupby(["Region","Product"]).get_group(('America','Phone'))
gdf5
Region | Country | Product | WK_1 | WK_2 | WK_3 | WK_4 | |
---|---|---|---|---|---|---|---|
1 | America | USA | Phone | 1098 | 1092 | 1089 | 1819 |
3 | America | Canada | Phone | 1990 | 1057 | 1656 | 1060 |
# Iterating through a group
gdf6 = df.groupby('Country')
for name,group in gdf6:
print(name)
print(group)
Belgium Region Country Product WK_1 WK_2 WK_3 WK_4 4 Europe Belgium Laptop 1084 1116 1002 1566 5 Europe Belgium Phone 1574 1958 1793 1213 Canada Region Country Product WK_1 WK_2 WK_3 WK_4 2 America Canada Laptop 1441 1099 1950 1394 3 America Canada Phone 1990 1057 1656 1060 Finland Region Country Product WK_1 WK_2 WK_3 WK_4 6 Europe Finland Laptop 1325 1374 1300 1579 7 Europe Finland Phone 1347 1736 1782 1921 USA Region Country Product WK_1 WK_2 WK_3 WK_4 0 America USA Laptop 1241 1160 1929 1174 1 America USA Phone 1098 1092 1089 1819
gdf7 = df.groupby(['Region','Product']).aggregate(np.sum)
gdf7
WK_1 | WK_2 | WK_3 | WK_4 | ||
---|---|---|---|---|---|
Region | Product | ||||
America | Laptop | 2682 | 2259 | 3879 | 2568 |
Phone | 3088 | 2149 | 2745 | 2879 | |
Europe | Laptop | 2409 | 2490 | 2302 | 3145 |
Phone | 2921 | 3694 | 3575 | 3134 |
gdf8 = df.groupby(['Region','Country']).aggregate([np.sum,np.mean])
gdf8
WK_1 | WK_2 | WK_3 | WK_4 | ||||||
---|---|---|---|---|---|---|---|---|---|
sum | mean | sum | mean | sum | mean | sum | mean | ||
Region | Country | ||||||||
America | Canada | 3431 | 1715.5 | 2156 | 1078 | 3606 | 1803.0 | 2454 | 1227.0 |
USA | 2339 | 1169.5 | 2252 | 1126 | 3018 | 1509.0 | 2993 | 1496.5 | |
Europe | Belgium | 2658 | 1329.0 | 3074 | 1537 | 2795 | 1397.5 | 2779 | 1389.5 |
Finland | 2672 | 1336.0 | 3110 | 1555 | 3082 | 1541.0 | 3500 | 1750.0 |
gdf9 = df.groupby(['Region','Country']).aggregate([np.sum,np.mean]).rename(columns = {'sum':'TOTAL','mean':'AVERAGE'})
gdf9
WK_1 | WK_2 | WK_3 | WK_4 | ||||||
---|---|---|---|---|---|---|---|---|---|
TOTAL | AVERAGE | TOTAL | AVERAGE | TOTAL | AVERAGE | TOTAL | AVERAGE | ||
Region | Country | ||||||||
America | Canada | 3431 | 1715.5 | 2156 | 1078 | 3606 | 1803.0 | 2454 | 1227.0 |
USA | 2339 | 1169.5 | 2252 | 1126 | 3018 | 1509.0 | 2993 | 1496.5 | |
Europe | Belgium | 2658 | 1329.0 | 3074 | 1537 | 2795 | 1397.5 | 2779 | 1389.5 |
Finland | 2672 | 1336.0 | 3110 | 1555 | 3082 | 1541.0 | 3500 | 1750.0 |
gdf10 = df.groupby(['Region']).agg({'WK_1': np.sum, 'WK_2':np.mean}).rename(columns = {'WK_2':'WK_2 (Mean)'})
gdf10
WK_1 | WK_2 (Mean) | |
---|---|---|
Region | ||
America | 5770 | 1102 |
Europe | 5330 | 1546 |
df
Region | Country | Product | WK_1 | WK_2 | WK_3 | WK_4 | |
---|---|---|---|---|---|---|---|
0 | America | USA | Laptop | 1241 | 1160 | 1929 | 1174 |
1 | America | USA | Phone | 1098 | 1092 | 1089 | 1819 |
2 | America | Canada | Laptop | 1441 | 1099 | 1950 | 1394 |
3 | America | Canada | Phone | 1990 | 1057 | 1656 | 1060 |
4 | Europe | Belgium | Laptop | 1084 | 1116 | 1002 | 1566 |
5 | Europe | Belgium | Phone | 1574 | 1958 | 1793 | 1213 |
6 | Europe | Finland | Laptop | 1325 | 1374 | 1300 | 1579 |
7 | Europe | Finland | Phone | 1347 | 1736 | 1782 | 1921 |
fdf1 = df[df["Region"] == "America"]
fdf1
Region | Country | Product | WK_1 | WK_2 | WK_3 | WK_4 | |
---|---|---|---|---|---|---|---|
0 | America | USA | Laptop | 1241 | 1160 | 1929 | 1174 |
1 | America | USA | Phone | 1098 | 1092 | 1089 | 1819 |
2 | America | Canada | Laptop | 1441 | 1099 | 1950 | 1394 |
3 | America | Canada | Phone | 1990 | 1057 | 1656 | 1060 |
fdf2 = df[(df.Region == "America") & (df.Country == "USA")]
fdf2
Region | Country | Product | WK_1 | WK_2 | WK_3 | WK_4 | |
---|---|---|---|---|---|---|---|
0 | America | USA | Laptop | 1241 | 1160 | 1929 | 1174 |
1 | America | USA | Phone | 1098 | 1092 | 1089 | 1819 |
fdf3 = df.filter(["Region", "Country", "WK_1"])
fdf3
Region | Country | WK_1 | |
---|---|---|---|
0 | America | USA | 1241 |
1 | America | USA | 1098 |
2 | America | Canada | 1441 |
3 | America | Canada | 1990 |
4 | Europe | Belgium | 1084 |
5 | Europe | Belgium | 1574 |
6 | Europe | Finland | 1325 |
7 | Europe | Finland | 1347 |
df
Region | Country | Product | WK_1 | WK_2 | WK_3 | WK_4 | |
---|---|---|---|---|---|---|---|
0 | America | USA | Laptop | 1241 | 1160 | 1929 | 1174 |
1 | America | USA | Phone | 1098 | 1092 | 1089 | 1819 |
2 | America | Canada | Laptop | 1441 | 1099 | 1950 | 1394 |
3 | America | Canada | Phone | 1990 | 1057 | 1656 | 1060 |
4 | Europe | Belgium | Laptop | 1084 | 1116 | 1002 | 1566 |
5 | Europe | Belgium | Phone | 1574 | 1958 | 1793 | 1213 |
6 | Europe | Finland | Laptop | 1325 | 1374 | 1300 | 1579 |
7 | Europe | Finland | Phone | 1347 | 1736 | 1782 | 1921 |
qdf1 = df[df.WK_2 > df.WK_1]
qdf1
Region | Country | Product | WK_1 | WK_2 | WK_3 | WK_4 | |
---|---|---|---|---|---|---|---|
4 | Europe | Belgium | Laptop | 1084 | 1116 | 1002 | 1566 |
5 | Europe | Belgium | Phone | 1574 | 1958 | 1793 | 1213 |
6 | Europe | Finland | Laptop | 1325 | 1374 | 1300 | 1579 |
7 | Europe | Finland | Phone | 1347 | 1736 | 1782 | 1921 |
qdf2 = df[df.WK_1 > 1500]
qdf2
Region | Country | Product | WK_1 | WK_2 | WK_3 | WK_4 | |
---|---|---|---|---|---|---|---|
3 | America | Canada | Phone | 1990 | 1057 | 1656 | 1060 |
5 | Europe | Belgium | Phone | 1574 | 1958 | 1793 | 1213 |
qdf3 = df[(df.WK_1 > 1500) & (df.WK_2 > 1900)]
qdf3
Region | Country | Product | WK_1 | WK_2 | WK_3 | WK_4 | |
---|---|---|---|---|---|---|---|
5 | Europe | Belgium | Phone | 1574 | 1958 | 1793 | 1213 |
qdf4 = df.query('WK_1 > 1500 & WK_2 > 1900')
qdf4
Region | Country | Product | WK_1 | WK_2 | WK_3 | WK_4 | |
---|---|---|---|---|---|---|---|
5 | Europe | Belgium | Phone | 1574 | 1958 | 1793 | 1213 |