# Importar las librerías.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import statsmodels.formula.api as smf
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Cargar los datos.
url = 'https://raw.githubusercontent.com/estefaniadelarosa/IA-I/refs/heads/main/A1.3%20Soluci%C3%B3n%20de%20problemas%20y%20selecci%C3%B3n%20de%20caracter%C3%ADsticas/Calificaciones.csv'
df = pd.read_csv(url)
print(df.shape)
df.head()

(395, 10)

df.dtypes

Escuela           object
Sexo              object
Edad               int64
HorasDeEstudio     int64
Reprobadas         int64
Internet          object
Faltas             int64
G1                 int64
G2                 int64
G3                 int64
dtype: object

df.head(10)

df.tail(10)

# Verificar si hay datos vacíos.
df.isna().sum().sort_values(ascending = False)

Escuela           0
Sexo              0
Edad              0
HorasDeEstudio    0
Reprobadas        0
Internet          0
Faltas            0
G1                0
G2                0
G3                0
dtype: int64

# NaN.
df.isnull().sum()

Escuela           0
Sexo              0
Edad              0
HorasDeEstudio    0
Reprobadas        0
Internet          0
Faltas            0
G1                0
G2                0
G3                0
dtype: int64

df

df.value_counts('Escuela')

Escuela
GP    349
MS     46
Name: count, dtype: int64

# Convertimos las variables categóricas a categóricas numéricas.
from sklearn.preprocessing import LabelEncoder 

# Aplicamos la transformación.
df['Escuela_Num'] = LabelEncoder().fit_transform(df['Escuela'])
df['Sexo_Num'] = LabelEncoder().fit_transform(df['Sexo'])
df['Internet_Num'] = LabelEncoder().fit_transform(df['Internet'])

df.sample(5)

q1 = df.Edad.quantile(0.25)
q3 = df.Edad.quantile(0.75)
iqr = q3 - q1
li = q1 - (1.5 * iqr)
ls = q3 + (1.5 * iqr)
out = np.where((df.Edad <= li) | (df.Edad >= ls))[0]
print(df.iloc[out, :])

    Escuela Sexo  Edad  HorasDeEstudio  Reprobadas Internet  Faltas  G1  G2  \
247      GP    M    22               1           3      yes      16   6   8   
392      MS    M    21               1           3       no       3  10   8   

     G3  Escuela_Num  Sexo_Num  Internet_Num  
247   8            0         1             1  
392   7            1         1             0

plt.boxplot(df['Edad'].dropna())
plt.title('Boxplot de edades.')
plt.xlabel('Edades.')
plt.show()

q1 = df.HorasDeEstudio.quantile(0.25)
q3 = df.HorasDeEstudio.quantile(0.75)
iqr = q3 - q1
li = q1 - (1.5 * iqr)
ls = q3 + (1.5 * iqr)
out = np.where((df.Edad <= li) | (df.Edad >= ls))[0]
print(df.iloc[out, :])

    Escuela Sexo  Edad  HorasDeEstudio  Reprobadas Internet  Faltas  G1  G2  \
0        GP    F    18               2           0       no       6   5   6   
1        GP    F    17               2           0      yes       4   5   5   
2        GP    F    15               2           3      yes      10   7   8   
3        GP    F    15               3           0      yes       2  15  14   
4        GP    F    16               2           0       no       4   6  10   
..      ...  ...   ...             ...         ...      ...     ...  ..  ..   
390      MS    M    20               2           2       no      11   9   9   
391      MS    M    17               1           0      yes       3  14  16   
392      MS    M    21               1           3       no       3  10   8   
393      MS    M    18               1           0      yes       0  11  12   
394      MS    M    19               1           0      yes       5   8   9   

     G3  Escuela_Num  Sexo_Num  Internet_Num  
0     6            0         0             0  
1     6            0         0             1  
2    10            0         0             1  
3    15            0         0             1  
4    10            0         0             0  
..   ..          ...       ...           ...  
390   9            1         1             0  
391  16            1         1             1  
392   7            1         1             0  
393  10            1         1             1  
394   9            1         1             1  

[395 rows x 13 columns]

plt.boxplot(df['HorasDeEstudio'].dropna())
plt.title('Boxplot de horas de estudio.')
plt.xlabel('Horas de estudio.')
plt.show()

# Aplicar el filtro.
df[df['HorasDeEstudio'] > 3]

print('Max = ', df['G1'].max())
print('Max = ', df['G2'].max())
print('Max = ', df['G3'].max())

Max =  19
Max =  19
Max =  20

q1 = df.Reprobadas.quantile(0.25)
q3 = df.Reprobadas.quantile(0.75)
iqr = q3 - q1
li = q1 - (1.5 * iqr)
ls = q3 + (1.5 * iqr)
out = np.where((df.Edad <= li) | (df.Edad >= ls))[0]
print(df.iloc[out, :])

    Escuela Sexo  Edad  HorasDeEstudio  Reprobadas Internet  Faltas  G1  G2  \
0        GP    F    18               2           0       no       6   5   6   
1        GP    F    17               2           0      yes       4   5   5   
2        GP    F    15               2           3      yes      10   7   8   
3        GP    F    15               3           0      yes       2  15  14   
4        GP    F    16               2           0       no       4   6  10   
..      ...  ...   ...             ...         ...      ...     ...  ..  ..   
390      MS    M    20               2           2       no      11   9   9   
391      MS    M    17               1           0      yes       3  14  16   
392      MS    M    21               1           3       no       3  10   8   
393      MS    M    18               1           0      yes       0  11  12   
394      MS    M    19               1           0      yes       5   8   9   

     G3  Escuela_Num  Sexo_Num  Internet_Num  
0     6            0         0             0  
1     6            0         0             1  
2    10            0         0             1  
3    15            0         0             1  
4    10            0         0             0  
..   ..          ...       ...           ...  
390   9            1         1             0  
391  16            1         1             1  
392   7            1         1             0  
393  10            1         1             1  
394   9            1         1             1  

[395 rows x 13 columns]

plt.boxplot(df['Reprobadas'].dropna())
plt.title('Boxplot de materias reprobadas.')
plt.xlabel('Cantidad de materias reprobadas.')
plt.show()

q1 = df.Faltas.quantile(0.25)
q3 = df.Faltas.quantile(0.75)
iqr = q3 - q1
li = q1 - (1.5 * iqr)
ls = q3 + (1.5 * iqr)
out = np.where((df.Edad <= li) | (df.Edad >= ls))[0]
print(df.iloc[out, :])

    Escuela Sexo  Edad  HorasDeEstudio  Reprobadas Internet  Faltas  G1  G2  \
247      GP    M    22               1           3      yes      16   6   8   
306      GP    M    20               1           0       no       0  17  18   
376      MS    F    20               3           2      yes       4  15  14   
390      MS    M    20               2           2       no      11   9   9   
392      MS    M    21               1           3       no       3  10   8   

     G3  Escuela_Num  Sexo_Num  Internet_Num  
247   8            0         1             1  
306  18            0         1             0  
376  15            1         0             1  
390   9            1         1             0  
392   7            1         1             0

plt.boxplot(df['Faltas'].dropna())
plt.title('Boxplot de faltas.')
plt.xlabel('Cantidad de faltas.')
plt.show()

print('Min = ', df['Faltas'].min())
print('Max = ', df['Faltas'].max())
print('Mean = ', df['Faltas'].mean())

Min =  0
Max =  75
Mean =  5.708860759493671

# Aplicar el filtro.
df[df['Faltas'] > 70]

# Determinar la correlación de variables.
df.corr(numeric_only = True).sort_values(by = 'G3', ascending = False)

plt.figure(figsize = (10, 6))
sns.heatmap(df.corr(numeric_only = True), annot = True)
plt.show()

X = df[['Edad', 'HorasDeEstudio', 'Reprobadas', 'Faltas', 'G1', 'G2', 'Escuela_Num', 'Sexo_Num', 'Internet_Num']]

vif_data = pd.DataFrame()
vif_data["feature"] = X.columns

vif_data["VIF"] = [variance_inflation_factor(X.values, i)
                          for i in range(len(X.columns))]
print(vif_data)

          feature        VIF
0            Edad  25.287109
1  HorasDeEstudio   8.192844
2      Reprobadas   1.487592
3          Faltas   1.608433
4              G1  45.063213
5              G2  34.240901
6     Escuela_Num   1.249744
7        Sexo_Num   2.171089
8    Internet_Num   6.096964

# Definir quién sera mi "x" y "y".
y = df[['G3']] # Doble [].
x = df[['HorasDeEstudio', 'Reprobadas', 'G2', 'Faltas', 'Escuela_Num', 'Sexo_Num', 'Internet_Num']]
print(y.shape)
print(x.shape)

(395, 1)
(395, 7)

# Dividir los tratos en train y test.
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((316, 7), (79, 7), (316, 1), (79, 1))

# Formulamos el modelo.
modelRLM = LinearRegression()
# Entrenamos el modelo.
modelRLM.fit(x_train, y_train)

LinearRegression()

x = df[['HorasDeEstudio', 'Reprobadas', 'G2', 'Faltas', 'Escuela_Num', 'Sexo_Num', 'Internet_Num']]
y = df['G3']

# Split train-test.
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42
)

# Escalado.
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

# DataFrame y agregar const.
x_train_scaled_df = pd.DataFrame(x_train_scaled, columns=x.columns, index=x_train.index)
x_test_scaled_df = pd.DataFrame(x_test_scaled, columns=x.columns, index=x_test.index)

# Agrega const para intercepto.
x_train_final = sm.add_constant(x_train_scaled_df, has_constant='add')
x_test_final = sm.add_constant(x_test_scaled_df, has_constant='add')

model = sm.OLS(y_train, x_train_final).fit()

print(model.summary())

# Predicciones
y_train_pred = model.predict(x_train_final)
y_test_pred = model.predict(x_test_final)

# Métricas de sklearn (estas son las correctas).
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)

mae_train = mean_absolute_error(y_train, y_train_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)

rmse_train = np.sqrt(mean_squared_error(y_train, y_train_pred))
rmse_test = np.sqrt(mean_squared_error(y_test, y_test_pred))
print('---------------------MÉTRICAS------------------------------------')
print('MAE_TRAIN: ',mae_train,'MAE_TEST: ',mae_test)
print('RMSE_TRAIN: ',rmse_train,'RMSE_TEST: ',rmse_test)

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                     G3   R-squared:                       0.836
Model:                            OLS   Adj. R-squared:                  0.833
Method:                 Least Squares   F-statistic:                     225.0
Date:                Thu, 05 Feb 2026   Prob (F-statistic):          4.86e-117
Time:                        12:01:39   Log-Likelihood:                -643.42
No. Observations:                 316   AIC:                             1303.
Df Residuals:                     308   BIC:                             1333.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
==================================================================================
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const             10.3259      0.106     97.761      0.000      10.118      10.534
HorasDeEstudio     0.0300      0.115      0.262      0.794      -0.196       0.256
Reprobadas        -0.3709      0.113     -3.287      0.001      -0.593      -0.149
G2                 4.0368      0.115     35.201      0.000       3.811       4.262
Faltas             0.3581      0.108      3.317      0.001       0.146       0.571
Escuela_Num       -0.0404      0.107     -0.377      0.707      -0.252       0.171
Sexo_Num           0.1595      0.114      1.396      0.164      -0.065       0.384
Internet_Num      -0.0895      0.108     -0.827      0.409      -0.303       0.124
==============================================================================
Omnibus:                      184.840   Durbin-Watson:                   2.092
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1228.667
Skew:                          -2.425   Prob(JB):                    1.58e-267
Kurtosis:                      11.355   Cond. No.                         1.66
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
---------------------MÉTRICAS------------------------------------
MAE_TRAIN:  1.1266476314729104 MAE_TEST:  1.3169048266963088
RMSE_TRAIN:  1.8537025107720928 RMSE_TEST:  2.150807482261075

# Validar si el modelo pronostica adecuadamente.
y_pred_test = modelRLM.predict(x_test)
print(y_pred_test[0:5])
print(y_test.head())

[[ 6.55282932]
 [12.04640883]
 [ 4.32823332]
 [ 8.71947473]
 [ 8.88172922]]
78     10
371    12
248     5
55     10
390     9
Name: G3, dtype: int64

R2_train = modelRLM.score(x_train, y_train)
print('R2 train = {:.2f}'.format(R2_train))
R2_test = modelRLM.score(x_test, y_test)
print('R2 test = {:.2f}'.format(R2_test))
print('Diferencia = {:.4f}%'.format(np.abs(R2_train-R2_test)*100))

R2 train = 0.83
R2 test = 0.79
Diferencia = 4.8047%

# Obtener y_pred_train - pred = gorrito.
y_pred_train = modelRLM.predict(x_train)

# Importar las librerías para el cálculo de error.
from sklearn.metrics import mean_squared_error # MSE.
from sklearn.metrics import mean_absolute_percentage_error #MAPE.

# Calculamos los errores de train.
mse_train = mean_squared_error(y_train, y_pred_train)
rmse_train = np.sqrt(mse_train) # SQRT DEL MSE ES EL RMSE.
mape_train = mean_absolute_percentage_error(y_train, y_pred_train)*100

print('MSE train = {:.2f}'.format(mse_train))
print('RMSE train = {:.2f}'.format(rmse_train))
print('MAPE train = {:.2f}'.format(mape_train))

# Calculamos los errores de test.
mse_test = mean_squared_error(y_test, y_pred_test)
rmse_test = np.sqrt(mse_train) # SQRT DEL MSE ES EL RMSE.
mape_test = mean_absolute_percentage_error(y_test, y_pred_test)*100

print('\nMSE test = {:.2f}'.format(mse_test))
print('RMSE test = {:.2f}'.format(rmse_test))
print('MAPE test = {:.2f}'.format(mape_test))

MSE train = 3.48
RMSE train = 1.87
MAPE train = 197809661420952352.00

MSE test = 4.39
RMSE test = 1.87
MAPE test = 186480198849301376.00

df

modelRLM.predict([[1, 0, 10, 0, 1, 0, 1]])

/opt/anaconda3/lib/python3.13/site-packages/sklearn/utils/validation.py:2749: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names
  warnings.warn(

array([[9.51160084]])

	Edad	HorasDeEstudio	Reprobadas	Faltas	G1	G2	G3	Escuela_Num	Sexo_Num	Internet_Num
G3	-0.161579	0.097820	-0.360415	0.034247	0.801468	0.904868	1.000000	-0.045017	0.103456	0.098483
G2	-0.143474	0.135880	-0.355896	-0.031777	0.852118	1.000000	0.904868	-0.050086	0.091099	0.119439
G1	-0.064081	0.160612	-0.354718	-0.031003	1.000000	0.852118	0.801468	-0.025731	0.091839	0.071619
Sexo_Num	-0.028606	-0.306268	0.044436	-0.066962	0.091839	0.091099	0.103456	-0.012286	1.000000	0.044113
Internet_Num	-0.112094	0.059422	-0.063451	0.101701	0.071619	0.119439	0.098483	-0.133578	0.044113	1.000000
HorasDeEstudio	-0.004140	1.000000	-0.173563	-0.062700	0.160612	0.135880	0.097820	-0.090681	-0.306268	0.059422
Faltas	0.175230	-0.062700	0.063726	1.000000	-0.031003	-0.031777	0.034247	-0.088480	-0.066962	0.101701
Escuela_Num	0.377610	-0.090681	0.059804	-0.088480	-0.025731	-0.050086	-0.045017	1.000000	-0.012286	-0.133578
Edad	1.000000	-0.004140	0.243665	0.175230	-0.064081	-0.143474	-0.161579	0.377610	-0.028606	-0.112094
Reprobadas	0.243665	-0.173563	1.000000	0.063726	-0.354718	-0.355896	-0.360415	0.059804	0.044436	-0.063451

	fit_intercept	True
	copy_X	True
	tol	1e-06
	n_jobs	None
	positive	False

Calificaciones.¶

1.1 Introducción.¶

1.2 Objetivo.¶

2.1 Descripción del conjunto de datos.¶

2.2 Preparación y limpieza del conjunto de datos.¶

3.1 Selección de características.¶

3.1 Variables.¶

4.1 Metodología.¶

5.1 Resultados.¶

6.1 Discusiones.¶

7.1 Conclusiones.¶

7.2 Aprendizajes.¶

7.3 Implicaciones.¶

7.5 Posibles líneas futuras.¶

8.1 Referencias.¶

9.1 Código de Honor de la Universidad de Monterrey.¶

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3
0	GP	F	18	2	0	no	6	5	6	6
1	GP	F	17	2	0	yes	4	5	5	6
2	GP	F	15	2	3	yes	10	7	8	10
3	GP	F	15	3	0	yes	2	15	14	15
4	GP	F	16	2	0	no	4	6	10	10

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3
0	GP	F	18	2	0	no	6	5	6	6
1	GP	F	17	2	0	yes	4	5	5	6
2	GP	F	15	2	3	yes	10	7	8	10
3	GP	F	15	3	0	yes	2	15	14	15
4	GP	F	16	2	0	no	4	6	10	10
5	GP	M	16	2	0	yes	10	15	15	15
6	GP	M	16	2	0	yes	0	12	12	11
7	GP	F	17	2	0	no	6	6	5	6
8	GP	M	15	2	0	yes	0	16	18	19
9	GP	M	15	2	0	yes	0	14	15	15

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3
385	MS	F	18	3	0	no	2	10	9	10
386	MS	F	18	1	0	yes	7	6	5	6
387	MS	F	19	3	1	yes	0	7	5	0
388	MS	F	18	2	0	yes	0	7	9	8
389	MS	F	18	2	1	no	0	6	5	0
390	MS	M	20	2	2	no	11	9	9	9
391	MS	M	17	1	0	yes	3	14	16	16
392	MS	M	21	1	3	no	3	10	8	7
393	MS	M	18	1	0	yes	0	11	12	10
394	MS	M	19	1	0	yes	5	8	9	9

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3
0	GP	F	18	2	0	no	6	5	6	6
1	GP	F	17	2	0	yes	4	5	5	6
2	GP	F	15	2	3	yes	10	7	8	10
3	GP	F	15	3	0	yes	2	15	14	15
4	GP	F	16	2	0	no	4	6	10	10
...	...	...	...	...	...	...	...	...	...	...
390	MS	M	20	2	2	no	11	9	9	9
391	MS	M	17	1	0	yes	3	14	16	16
392	MS	M	21	1	3	no	3	10	8	7
393	MS	M	18	1	0	yes	0	11	12	10
394	MS	M	19	1	0	yes	5	8	9	9

	Escuela	Sexo	Edad	HorasDeEstudio	Internet	Faltas	G1	G2	G3	Sexo_Num	Internet_Num
32	GP	M	15	2	yes	0	17	16	16	1	1
299	GP	M	18	1	yes	5	16	15	16	1	1
100	GP	M	16	1	yes	14	7	7	5	1	1
122	GP	F	16	2	yes	2	13	13	13	0	1
114	GP	M	15	2	yes	8	9	9	9	1	1

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3	Sexo_Num	Internet_Num
47	GP	M	16	4	0	yes	4	19	19	20	1	1
66	GP	M	15	4	0	yes	4	13	13	12	1	1
67	GP	F	16	4	0	yes	4	7	7	6	0	1
69	GP	F	15	4	0	yes	12	16	16	16	0	1
70	GP	M	16	4	0	yes	0	13	15	15	1	1
71	GP	M	15	4	0	yes	0	10	10	10	1	1
76	GP	M	15	4	0	yes	8	11	11	10	1	1
77	GP	F	16	4	0	yes	0	11	11	11	0	1
94	GP	M	15	4	0	yes	6	11	13	14	1	1
95	GP	F	15	4	1	yes	2	7	10	10	0	1
105	GP	F	15	4	0	no	10	10	11	11	0	0
106	GP	F	15	4	0	yes	8	7	8	8	0	1
108	GP	M	15	4	0	yes	6	10	13	13	1	1
121	GP	M	15	4	0	yes	6	16	14	15	1	1
140	GP	M	15	4	0	yes	0	7	9	0	1	1
204	GP	F	16	4	0	yes	6	10	10	11	0	1
210	GP	F	19	4	0	yes	10	8	8	8	0	1
256	GP	F	17	4	0	yes	6	14	12	13	0	1
259	GP	F	17	4	0	yes	0	10	9	0	0	1
271	GP	F	18	4	0	yes	4	15	14	14	0	1
282	GP	F	18	4	0	no	1	12	12	12	0	0
293	GP	F	17	4	0	no	6	18	18	18	0	0
298	GP	F	18	4	0	yes	0	14	13	14	0	1
303	GP	F	17	4	0	yes	0	17	17	18	0	1
330	GP	M	18	4	0	yes	2	9	8	8	1	1
334	GP	F	18	4	0	no	0	10	9	0	0	0
338	GP	F	18	4	0	yes	7	16	15	17	0	1

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3	Escuela_Num	Sexo_Num	Internet_Num
0	GP	F	18	2	0	no	6	5	6	6	0	0	0
1	GP	F	17	2	0	yes	4	5	5	6	0	0	1
2	GP	F	15	2	3	yes	10	7	8	10	0	0	1
3	GP	F	15	3	0	yes	2	15	14	15	0	0	1
4	GP	F	16	2	0	no	4	6	10	10	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...
390	MS	M	20	2	2	no	11	9	9	9	1	1	0
391	MS	M	17	1	0	yes	3	14	16	16	1	1	1
392	MS	M	21	1	3	no	3	10	8	7	1	1	0
393	MS	M	18	1	0	yes	0	11	12	10	1	1	1
394	MS	M	19	1	0	yes	5	8	9	9	1	1	1

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3
0	GP	F	18	2	0	no	6	5	6	6
1	GP	F	17	2	0	yes	4	5	5	6
2	GP	F	15	2	3	yes	10	7	8	10
3	GP	F	15	3	0	yes	2	15	14	15
4	GP	F	16	2	0	no	4	6	10	10

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3
0	GP	F	18	2	0	no	6	5	6	6
1	GP	F	17	2	0	yes	4	5	5	6
2	GP	F	15	2	3	yes	10	7	8	10
3	GP	F	15	3	0	yes	2	15	14	15
4	GP	F	16	2	0	no	4	6	10	10
5	GP	M	16	2	0	yes	10	15	15	15
6	GP	M	16	2	0	yes	0	12	12	11
7	GP	F	17	2	0	no	6	6	5	6
8	GP	M	15	2	0	yes	0	16	18	19
9	GP	M	15	2	0	yes	0	14	15	15

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3
385	MS	F	18	3	0	no	2	10	9	10
386	MS	F	18	1	0	yes	7	6	5	6
387	MS	F	19	3	1	yes	0	7	5	0
388	MS	F	18	2	0	yes	0	7	9	8
389	MS	F	18	2	1	no	0	6	5	0
390	MS	M	20	2	2	no	11	9	9	9
391	MS	M	17	1	0	yes	3	14	16	16
392	MS	M	21	1	3	no	3	10	8	7
393	MS	M	18	1	0	yes	0	11	12	10
394	MS	M	19	1	0	yes	5	8	9	9

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3
0	GP	F	18	2	0	no	6	5	6	6
1	GP	F	17	2	0	yes	4	5	5	6
2	GP	F	15	2	3	yes	10	7	8	10
3	GP	F	15	3	0	yes	2	15	14	15
4	GP	F	16	2	0	no	4	6	10	10
...	...	...	...	...	...	...	...	...	...	...
390	MS	M	20	2	2	no	11	9	9	9
391	MS	M	17	1	0	yes	3	14	16	16
392	MS	M	21	1	3	no	3	10	8	7
393	MS	M	18	1	0	yes	0	11	12	10
394	MS	M	19	1	0	yes	5	8	9	9

	Escuela	Sexo	Edad	HorasDeEstudio	Internet	Faltas	G1	G2	G3	Sexo_Num	Internet_Num
32	GP	M	15	2	yes	0	17	16	16	1	1
299	GP	M	18	1	yes	5	16	15	16	1	1
100	GP	M	16	1	yes	14	7	7	5	1	1
122	GP	F	16	2	yes	2	13	13	13	0	1
114	GP	M	15	2	yes	8	9	9	9	1	1

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3	Sexo_Num	Internet_Num
47	GP	M	16	4	0	yes	4	19	19	20	1	1
66	GP	M	15	4	0	yes	4	13	13	12	1	1
67	GP	F	16	4	0	yes	4	7	7	6	0	1
69	GP	F	15	4	0	yes	12	16	16	16	0	1
70	GP	M	16	4	0	yes	0	13	15	15	1	1
71	GP	M	15	4	0	yes	0	10	10	10	1	1
76	GP	M	15	4	0	yes	8	11	11	10	1	1
77	GP	F	16	4	0	yes	0	11	11	11	0	1
94	GP	M	15	4	0	yes	6	11	13	14	1	1
95	GP	F	15	4	1	yes	2	7	10	10	0	1
105	GP	F	15	4	0	no	10	10	11	11	0	0
106	GP	F	15	4	0	yes	8	7	8	8	0	1
108	GP	M	15	4	0	yes	6	10	13	13	1	1
121	GP	M	15	4	0	yes	6	16	14	15	1	1
140	GP	M	15	4	0	yes	0	7	9	0	1	1
204	GP	F	16	4	0	yes	6	10	10	11	0	1
210	GP	F	19	4	0	yes	10	8	8	8	0	1
256	GP	F	17	4	0	yes	6	14	12	13	0	1
259	GP	F	17	4	0	yes	0	10	9	0	0	1
271	GP	F	18	4	0	yes	4	15	14	14	0	1
282	GP	F	18	4	0	no	1	12	12	12	0	0
293	GP	F	17	4	0	no	6	18	18	18	0	0
298	GP	F	18	4	0	yes	0	14	13	14	0	1
303	GP	F	17	4	0	yes	0	17	17	18	0	1
330	GP	M	18	4	0	yes	2	9	8	8	1	1
334	GP	F	18	4	0	no	0	10	9	0	0	0
338	GP	F	18	4	0	yes	7	16	15	17	0	1

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3	Escuela_Num	Sexo_Num	Internet_Num
0	GP	F	18	2	0	no	6	5	6	6	0	0	0
1	GP	F	17	2	0	yes	4	5	5	6	0	0	1
2	GP	F	15	2	3	yes	10	7	8	10	0	0	1
3	GP	F	15	3	0	yes	2	15	14	15	0	0	1
4	GP	F	16	2	0	no	4	6	10	10	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...
390	MS	M	20	2	2	no	11	9	9	9	1	1	0
391	MS	M	17	1	0	yes	3	14	16	16	1	1	1
392	MS	M	21	1	3	no	3	10	8	7	1	1	0
393	MS	M	18	1	0	yes	0	11	12	10	1	1	1
394	MS	M	19	1	0	yes	5	8	9	9	1	1	1

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3
0	GP	F	18	2	0	no	6	5	6	6
1	GP	F	17	2	0	yes	4	5	5	6
2	GP	F	15	2	3	yes	10	7	8	10
3	GP	F	15	3	0	yes	2	15	14	15
4	GP	F	16	2	0	no	4	6	10	10

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3
0	GP	F	18	2	0	no	6	5	6	6
1	GP	F	17	2	0	yes	4	5	5	6
2	GP	F	15	2	3	yes	10	7	8	10
3	GP	F	15	3	0	yes	2	15	14	15
4	GP	F	16	2	0	no	4	6	10	10
5	GP	M	16	2	0	yes	10	15	15	15
6	GP	M	16	2	0	yes	0	12	12	11
7	GP	F	17	2	0	no	6	6	5	6
8	GP	M	15	2	0	yes	0	16	18	19
9	GP	M	15	2	0	yes	0	14	15	15

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3
385	MS	F	18	3	0	no	2	10	9	10
386	MS	F	18	1	0	yes	7	6	5	6
387	MS	F	19	3	1	yes	0	7	5	0
388	MS	F	18	2	0	yes	0	7	9	8
389	MS	F	18	2	1	no	0	6	5	0
390	MS	M	20	2	2	no	11	9	9	9
391	MS	M	17	1	0	yes	3	14	16	16
392	MS	M	21	1	3	no	3	10	8	7
393	MS	M	18	1	0	yes	0	11	12	10
394	MS	M	19	1	0	yes	5	8	9	9

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3
0	GP	F	18	2	0	no	6	5	6	6
1	GP	F	17	2	0	yes	4	5	5	6
2	GP	F	15	2	3	yes	10	7	8	10
3	GP	F	15	3	0	yes	2	15	14	15
4	GP	F	16	2	0	no	4	6	10	10
...	...	...	...	...	...	...	...	...	...	...
390	MS	M	20	2	2	no	11	9	9	9
391	MS	M	17	1	0	yes	3	14	16	16
392	MS	M	21	1	3	no	3	10	8	7
393	MS	M	18	1	0	yes	0	11	12	10
394	MS	M	19	1	0	yes	5	8	9	9

	Escuela	Sexo	Edad	HorasDeEstudio	Internet	Faltas	G1	G2	G3	Sexo_Num	Internet_Num
32	GP	M	15	2	yes	0	17	16	16	1	1
299	GP	M	18	1	yes	5	16	15	16	1	1
100	GP	M	16	1	yes	14	7	7	5	1	1
122	GP	F	16	2	yes	2	13	13	13	0	1
114	GP	M	15	2	yes	8	9	9	9	1	1

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3	Sexo_Num	Internet_Num
47	GP	M	16	4	0	yes	4	19	19	20	1	1
66	GP	M	15	4	0	yes	4	13	13	12	1	1
67	GP	F	16	4	0	yes	4	7	7	6	0	1
69	GP	F	15	4	0	yes	12	16	16	16	0	1
70	GP	M	16	4	0	yes	0	13	15	15	1	1
71	GP	M	15	4	0	yes	0	10	10	10	1	1
76	GP	M	15	4	0	yes	8	11	11	10	1	1
77	GP	F	16	4	0	yes	0	11	11	11	0	1
94	GP	M	15	4	0	yes	6	11	13	14	1	1
95	GP	F	15	4	1	yes	2	7	10	10	0	1
105	GP	F	15	4	0	no	10	10	11	11	0	0
106	GP	F	15	4	0	yes	8	7	8	8	0	1
108	GP	M	15	4	0	yes	6	10	13	13	1	1
121	GP	M	15	4	0	yes	6	16	14	15	1	1
140	GP	M	15	4	0	yes	0	7	9	0	1	1
204	GP	F	16	4	0	yes	6	10	10	11	0	1
210	GP	F	19	4	0	yes	10	8	8	8	0	1
256	GP	F	17	4	0	yes	6	14	12	13	0	1
259	GP	F	17	4	0	yes	0	10	9	0	0	1
271	GP	F	18	4	0	yes	4	15	14	14	0	1
282	GP	F	18	4	0	no	1	12	12	12	0	0
293	GP	F	17	4	0	no	6	18	18	18	0	0
298	GP	F	18	4	0	yes	0	14	13	14	0	1
303	GP	F	17	4	0	yes	0	17	17	18	0	1
330	GP	M	18	4	0	yes	2	9	8	8	1	1
334	GP	F	18	4	0	no	0	10	9	0	0	0
338	GP	F	18	4	0	yes	7	16	15	17	0	1

	Escuela	Sexo	Edad	HorasDeEstudio	Reprobadas	Internet	Faltas	G1	G2	G3	Escuela_Num	Sexo_Num	Internet_Num
0	GP	F	18	2	0	no	6	5	6	6	0	0	0
1	GP	F	17	2	0	yes	4	5	5	6	0	0	1
2	GP	F	15	2	3	yes	10	7	8	10	0	0	1
3	GP	F	15	3	0	yes	2	15	14	15	0	0	1
4	GP	F	16	2	0	no	4	6	10	10	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...
390	MS	M	20	2	2	no	11	9	9	9	1	1	0
391	MS	M	17	1	0	yes	3	14	16	16	1	1	1
392	MS	M	21	1	3	no	3	10	8	7	1	1	0
393	MS	M	18	1	0	yes	0	11	12	10	1	1	1
394	MS	M	19	1	0	yes	5	8	9	9	1	1	1