Jake’s Machine Learning Blog - Deep Music Genre Classification

import torch
import pandas as pd

import numpy as np

# for embedding visualization later
import plotly.express as px 
import plotly.io as pio

# for VSCode plotly rendering
pio.renderers.default = "notebook"

# for appearance
pio.templates.default = "plotly_white"

# for train-test split
from sklearn.model_selection import train_test_split

# for suppressing bugged warnings from torchinfo
import warnings 
warnings.filterwarnings("ignore", category = UserWarning)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

url = "https://raw.githubusercontent.com/PhilChodrow/PIC16B/master/datasets/tcc_ceds_music.csv"
df = pd.read_csv(url)

engineered_features = ['dating', 'violence', 'world/life', 'night/time','shake the audience','family/gospel', 'romantic', 'communication','obscene', 'music', 'movement/places', 'light/visual perceptions','family/spiritual', 'like/girls', 'sadness', 'feelings', 'danceability','loudness', 'acousticness', 'instrumentalness', 'valence', 'energy']

df.groupby("genre").size()

genre
blues      4604
country    5445
hip hop     904
jazz       3845
pop        7042
reggae     2498
rock       4034
dtype: int64

genres = {
    "blues": 0,
    "country": 1,
    "hip hop": 2,
    "jazz": 3,
    "pop": 4,
    "reggae": 5,
    "rock": 6
}

df["genre"] = df["genre"].apply(genres.get)
df.head()

	Unnamed: 0	artist_name	track_name	release_date	genre	lyrics	len	dating	violence	world/life	...	sadness	feelings	danceability	loudness	acousticness	instrumentalness	valence	energy	topic	age
0	0	mukesh	mohabbat bhi jhoothi	1950	4	hold time feel break feel untrue convince spea...	95	0.000598	0.063746	0.000598	...	0.380299	0.117175	0.357739	0.454119	0.997992	0.901822	0.339448	0.137110	sadness	1.0
1	4	frankie laine	i believe	1950	4	believe drop rain fall grow believe darkest ni...	51	0.035537	0.096777	0.443435	...	0.001284	0.001284	0.331745	0.647540	0.954819	0.000002	0.325021	0.263240	world/life	1.0
2	6	johnnie ray	cry	1950	4	sweetheart send letter goodbye secret feel bet...	24	0.002770	0.002770	0.002770	...	0.002770	0.225422	0.456298	0.585288	0.840361	0.000000	0.351814	0.139112	music	1.0
3	10	pérez prado	patricia	1950	4	kiss lips want stroll charm mambo chacha merin...	54	0.048249	0.001548	0.001548	...	0.225889	0.001548	0.686992	0.744404	0.083935	0.199393	0.775350	0.743736	romantic	1.0
4	12	giorgos papadopoulos	apopse eida oneiro	1950	4	till darling till matter know till dream live ...	48	0.001350	0.001350	0.417772	...	0.068800	0.001350	0.291671	0.646489	0.975904	0.000246	0.597073	0.394375	romantic	1.0

5 rows × 31 columns

from torch.utils.data import Dataset, DataLoader

class TextDataFromDF(Dataset):
    def __init__(self, df):
        self.df = df
    
    def __getitem__(self, index):
        return self.df.iloc[index, 5], self.df.iloc[index, 0]

    def __len__(self):
        return len(self.df)

df_train, df_val = train_test_split(df,shuffle = True, test_size = 0.2)
train_data = TextDataFromDF(df_train)
val_data   = TextDataFromDF(df_val)

train_data[194]

('morning ride think morning ride morning ride nice ride miss morning ride longest ride morning ride morning ride morning ride morning ride slip slide go break slip slide go break matter hide send send morning ride morning ride morning ride morning ride tell ellington work jamaica buerue credit station gemini port portland morning ride morning ride morning ride morning ride',
 64271)

from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

tokenizer = get_tokenizer('basic_english')

tokenized = tokenizer(train_data[194][0])
tokenized

OSError: dlopen(/Users/jakegilbert/anaconda3/envs/ml-0451/lib/python3.9/site-packages/torchtext/lib/libtorchtext.so, 0x0006): Symbol not found: __ZN2at4_ops15to_dtype_layout4callERKNS_6TensorENSt3__18optionalIN3c1010ScalarTypeEEENS6_INS7_6LayoutEEENS6_INS7_6DeviceEEENS6_IbEEbbNS6_INS7_12MemoryFormatEEE
  Referenced from: <B145C7C7-A04C-3975-B142-8B160ADC1CFF> /Users/jakegilbert/anaconda3/envs/ml-0451/lib/python3.9/site-packages/torchtext/lib/libtorchtext.so
  Expected in:     <6B754090-A299-3FA1-B21D-A3C9B7051AD1> /Users/jakegilbert/anaconda3/envs/ml-0451/lib/python3.9/site-packages/torch/lib/libtorch_cpu.dylib