itunes_df['Seconds'] = itunes_df['Milliseconds'] / 1000
itunes_df['len_byte_ratio'] = itunes_df['Milliseconds'] / itunes_df['Bytes']
genre_dict = {'metal': 'Metal', 'met': 'Metal'}
itunes_df['Genre'].replace(genre_dict)
itunes_df['Genre'].apply(lambda x: x.lower())
# the above is the same as this
def lowercase(x):
return x.lower()
itunes_df['Genre'].apply(lowercase)
# but using built-in functions is almost always faster
itunes_df['Genre'].str.lower()
# this is a common sentiment analysis library; polarity is positive/negative sentiment,
# subjectivety is subjective/objective rating.
from textblob import TextBlob
test = TextBlob("Textblob is amazingly simple to use. What great fun!")
test.sentiment
test.sentiment.polarity
# it would be better than apply to use a list comprehension to get sentiment of track names, like this
itunes_df['Track_sentiment'] = [TextBlob(x).sentiment.polarity for x in itunes_df['Track']]
# but, if we wanted to mix polarity and subjectivity into one column, it would be best to use apply:
def pol_sub_mix(x):
tb = TextBlob(x)
return tb.polarity * tb.subjectivity
itunes_df['Track_pol_sub_mix'] = itunes_df['Track'].apply(pol_sub_mix)
# delete these columns
itunes_df.drop(['Track_pol_sub_mix', 'Track_sentiment'], inplace=True, axis=1)
# currently doesn't work with python 3.9
import swifter
itunes_df['Genre'].swifter.apply(lambda x: x.lower())
itunes_df.to_csv('cleaned_itunes_data.csv', index=False)
itunes_df.groupby('Genre').mean()['Seconds'].sort_values().head()
No comments:
Post a Comment