Loading

Task 2: Next Product Recommendation for Underrepresented Languages

MODEL 1 SAMPLE

SMALL SAMPLE TO TRAIN A LSTM

daniel_cordoba

We used a small sample to train a model to use as recomendation system to all countries.

 

In [ ]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
In [ ]:

In [ ]:
import tensorflow as tf
# Verifica que la GPU está disponible
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
In [ ]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
In [ ]:
# Asegurémonos de que se utiliza la GPU deseada
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.set_visible_devices(gpus[0], 'GPU')  # utiliza la primera GPU, cámbialo según tu configuración
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
  except RuntimeError as e:
    print(e)

Get small sample:

In [ ]:
ACB_result = pd.read_csv("sessions_train_f_ALL.csv",sep=";")

ACB_result=ACB_result.groupby('Country', group_keys=False).apply(lambda x: x.sample(frac=0.1))

ACB_result.head(2)
In [ ]:
import ast

# Convertir las cadenas en 'prev_items_f_1' que se parecen a listas en listas reales
ACB_result['prev_items_f_1'] = ACB_result['prev_items_f_1'].apply(ast.literal_eval)

# Ahora deberías poder convertir los elementos en 'prev_items_f_1' a enteros sin problemas
ACB_result['prev_items_f_1'] = ACB_result['prev_items_f_1'].apply(lambda x: [int(item) for item in x])
In [ ]:
import numpy as np

# Primero, es necesario que cada secuencia termine con el 'Country_NUM'.
# Así que vamos a agregar 'Country_NUM' al final de cada secuencia en 'prev_items_f_1'.
ACB_result['sequences'] = ACB_result.apply(lambda row: row['prev_items_f_1'] + [row['Country_NUM']], axis=1)
# Ahora podemos definir nuestras secuencias y etiquetas.
sequences = ACB_result['sequences'].tolist()
labels = ACB_result['next_item_f_1'].tolist()

labels = np.array(labels)
In [ ]:
X_train, X_test, y_train, y_test = train_test_split(sequences, labels, test_size=0.2, random_state=42)
In [ ]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Rellena las secuencias para que todas tengan la misma longitud
X_train = pad_sequences(X_train, padding='post')
X_test = pad_sequences(X_test, padding='post', maxlen=X_train.shape[1])
In [ ]:
from keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer()

# Convert the entries of the series to string
ACB_result['prev_items_f'] = ACB_result['prev_items_f'].astype(str)
ACB_result['next_item_f'] = ACB_result['next_item_f'].astype(str)


tokenizer.fit_on_texts(ACB_result.prev_items_f)
tokenizer.fit_on_texts(ACB_result.next_item_f)


# Check the length of ACB.next_item_f
In [ ]:

Version Recurrente con 2 caracteristicas:

In [ ]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from keras.layers import Dense, InputLayer, Dropout, Flatten


# Parámetros
embedding_dim = 50  # O el valor que consideres adecuado para tu problema
lstm_units = 100  # O el valor que consideres adecuado para tu problema
num_classes = len(np.unique(labels))
vocab_size = tokenizer.document_count +5
In [ ]:
# Definición del modelo
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=X_train.shape[1]))
model.add(LSTM(lstm_units, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(45, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

# Compilación del modelo
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Entrenamiento del modelo
model.fit(X_train, y_train, epochs=20, validation_split=0.2, verbose=1)
In [ ]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print("Loss: ", loss)
print("Accuracy: ", accuracy)
In [ ]:
model.save('MODEL_ALL_MARKETS.h5')
model.save_weights('MODEL_ALL_MARKETS_weights.h5')
In [ ]:

In [ ]:


Comments

You must login before you can post a comment.

Execute