Deep Music Genre Classification

Using deep learning and neural networks to classify music genres
Author

Jake gilbert

Published

May 2, 2024

import torch
import pandas as pd

import numpy as np

# for embedding visualization later
import plotly.express as px 
import plotly.io as pio

# for VSCode plotly rendering
pio.renderers.default = "notebook"

# for appearance
pio.templates.default = "plotly_white"

# for train-test split
from sklearn.model_selection import train_test_split

# for suppressing bugged warnings from torchinfo
import warnings 
warnings.filterwarnings("ignore", category = UserWarning)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

url = "https://raw.githubusercontent.com/PhilChodrow/PIC16B/master/datasets/tcc_ceds_music.csv"
df = pd.read_csv(url)
engineered_features = ['dating', 'violence', 'world/life', 'night/time','shake the audience','family/gospel', 'romantic', 'communication','obscene', 'music', 'movement/places', 'light/visual perceptions','family/spiritual', 'like/girls', 'sadness', 'feelings', 'danceability','loudness', 'acousticness', 'instrumentalness', 'valence', 'energy']

df.groupby("genre").size()
genre
blues      4604
country    5445
hip hop     904
jazz       3845
pop        7042
reggae     2498
rock       4034
dtype: int64
genres = {
    "blues": 0,
    "country": 1,
    "hip hop": 2,
    "jazz": 3,
    "pop": 4,
    "reggae": 5,
    "rock": 6
}

df["genre"] = df["genre"].apply(genres.get)
df.head()
Unnamed: 0 artist_name track_name release_date genre lyrics len dating violence world/life ... sadness feelings danceability loudness acousticness instrumentalness valence energy topic age
0 0 mukesh mohabbat bhi jhoothi 1950 4 hold time feel break feel untrue convince spea... 95 0.000598 0.063746 0.000598 ... 0.380299 0.117175 0.357739 0.454119 0.997992 0.901822 0.339448 0.137110 sadness 1.0
1 4 frankie laine i believe 1950 4 believe drop rain fall grow believe darkest ni... 51 0.035537 0.096777 0.443435 ... 0.001284 0.001284 0.331745 0.647540 0.954819 0.000002 0.325021 0.263240 world/life 1.0
2 6 johnnie ray cry 1950 4 sweetheart send letter goodbye secret feel bet... 24 0.002770 0.002770 0.002770 ... 0.002770 0.225422 0.456298 0.585288 0.840361 0.000000 0.351814 0.139112 music 1.0
3 10 pérez prado patricia 1950 4 kiss lips want stroll charm mambo chacha merin... 54 0.048249 0.001548 0.001548 ... 0.225889 0.001548 0.686992 0.744404 0.083935 0.199393 0.775350 0.743736 romantic 1.0
4 12 giorgos papadopoulos apopse eida oneiro 1950 4 till darling till matter know till dream live ... 48 0.001350 0.001350 0.417772 ... 0.068800 0.001350 0.291671 0.646489 0.975904 0.000246 0.597073 0.394375 romantic 1.0

5 rows × 31 columns

from torch.utils.data import Dataset, DataLoader

class TextDataFromDF(Dataset):
    def __init__(self, df):
        self.df = df
    
    def __getitem__(self, index):
        return self.df.iloc[index, 5], self.df.iloc[index, 0]

    def __len__(self):
        return len(self.df)                
df_train, df_val = train_test_split(df,shuffle = True, test_size = 0.2)
train_data = TextDataFromDF(df_train)
val_data   = TextDataFromDF(df_val)
train_data[194]
('morning ride think morning ride morning ride nice ride miss morning ride longest ride morning ride morning ride morning ride morning ride slip slide go break slip slide go break matter hide send send morning ride morning ride morning ride morning ride tell ellington work jamaica buerue credit station gemini port portland morning ride morning ride morning ride morning ride',
 64271)
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator

tokenizer = get_tokenizer('basic_english')

tokenized = tokenizer(train_data[194][0])
tokenized
OSError: dlopen(/Users/jakegilbert/anaconda3/envs/ml-0451/lib/python3.9/site-packages/torchtext/lib/libtorchtext.so, 0x0006): Symbol not found: __ZN2at4_ops15to_dtype_layout4callERKNS_6TensorENSt3__18optionalIN3c1010ScalarTypeEEENS6_INS7_6LayoutEEENS6_INS7_6DeviceEEENS6_IbEEbbNS6_INS7_12MemoryFormatEEE
  Referenced from: <B145C7C7-A04C-3975-B142-8B160ADC1CFF> /Users/jakegilbert/anaconda3/envs/ml-0451/lib/python3.9/site-packages/torchtext/lib/libtorchtext.so
  Expected in:     <6B754090-A299-3FA1-B21D-A3C9B7051AD1> /Users/jakegilbert/anaconda3/envs/ml-0451/lib/python3.9/site-packages/torch/lib/libtorch_cpu.dylib