הפקולטה לפיזיקה, הטכניון. חורף 2013
מרצה: רונן אברבנאל
, אז פשוט נציג כאן דוגמה (מתוך הספר Python for Data Analysis)
Python for data analysis cover
הנתונים מתוך: https://github.com/pydata/pydata-book/tree/master/ch02/names ניתן להוריד אותם גם מכאן.
%pylab inline
import os
os.chdir("/home/ronen/temp/pydata-book/ch02/names")
!head -n 10 yob1880.txt
import pandas as pd
names1880 = pd.read_csv('yob1880.txt',names=['name','sex','births'])
names1880
names1880[:5]
names1880.groupby('sex').births.sum()
years = range(1880,2011)
pieces = []
columns = ['name','sex','births']
for year in years:
path = 'yob%d.txt' %year
frame = pd.read_csv(path,names=columns)
frame['year'] = year
pieces.append(frame)
names = pd.concat(pieces, ignore_index=True)
names
names[:5]
total_births = names.pivot_table('births',rows='year',cols='sex',aggfunc=sum)
total_births.tail()
total_births.plot(title='Total births by sex and year')
def add_prop(group):
births = group.births.astype(float)
group['prop'] = births/births.sum()
return group
names = names.groupby(['year','sex']).apply(add_prop)
names.tail()
def get_top1000(group):
return group.sort_index(by='births',ascending=False)[:1000]
grouped = names.groupby(['year','sex'])
top1000 = grouped.apply(get_top1000)
top1000
boys = top1000[top1000.sex == 'M']
girls = top1000[top1000.sex=='F']
total_births = names.pivot_table('births',rows='year', cols='name', aggfunc=sum)
total_births
subset = total_births[['John','Harry','Mary','Marilyn']]
subset.plot(subplots=True,figsize=(12,10),grid=False)
table = top1000.pivot_table('prop',rows='year', cols='sex', aggfunc=sum)
table.plot(title='Sum of...', yticks=np.linspace(0,1.2,13), xticks=range(1880,2020,10))
all_names = top1000.name.unique()
mask = np.array(['lesl' in x.lower() for x in all_names])
lesley_like = all_names[mask]
lesley_like
filterd = top1000[top1000.name.isin(lesley_like)]
filterd.groupby('name').births.sum()
table = filterd.pivot_table('births',rows='year', cols='sex',aggfunc='sum')
table = table.div(table.sum(1),axis=0)
table.tail()
table.plot()