#Importar las librerías.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split # Permite implementar la mayoría de modelos.
from sklearn.linear_model import LogisticRegression
import statsmodels.formula.api as smf
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.model_selection import cross_val_score

# Cargar los datos.
url = 'https://raw.githubusercontent.com/estefaniadelarosa/IA-I/refs/heads/main/P1.%20Regresi%C3%B3n/P1.%20Regresi%C3%B3n/2024_2025_salud_mental.csv'
df = pd.read_csv(url)
print(df.shape)
df.head()

(48224, 14)

df1 = df[df['municipio_unidad_medica'] == 'SAN PEDRO GARZA GARCIA']
print(df1.shape)
df1.head()

(1060, 14)

# Aplicar el filtro de personas dentro del rango de edad de 18 a 65 años.
df2 = df1[(df1['edad'] >= 18) & (df1['edad'] <= 65)]
print(df2.shape)
df2.head()

(720, 14)

# Verificar si hay datos vacíos.
df2.isna().sum().sort_values(ascending = False)

fecha                           0
id_consulta                     0
edad                            0
edad_meses                      0
edad_dias                       0
sexo                            0
peso                            0
altura                          0
municipio_unidad_medica         0
institucion_unidad_medica       0
clave_grupo_ enfermedad         0
descripcion_grupo_enfermedad    0
clave_enfermedad                0
descripcion_enfermedad          0
dtype: int64

df2['descripcion_grupo_enfermedad'].value_counts()

descripcion_grupo_enfermedad
TRASTORNOS MENTALES Y DEL COMPORTAMIENTO                                             547
FACTORES QUE INFLUYEN EN EL ESTADO DE SALUD Y CONTACTO CON LOS SERVICIOS DE SALUD    173
Name: count, dtype: int64

# Para el porcentaje.
df2['descripcion_grupo_enfermedad'].value_counts()/(df2['descripcion_grupo_enfermedad'].value_counts().sum())*100

descripcion_grupo_enfermedad
TRASTORNOS MENTALES Y DEL COMPORTAMIENTO                                             75.972222
FACTORES QUE INFLUYEN EN EL ESTADO DE SALUD Y CONTACTO CON LOS SERVICIOS DE SALUD    24.027778
Name: count, dtype: float64

# Gráfica del balance de clases.
df2['descripcion_grupo_enfermedad'].str[:43].value_counts().plot(kind = 'bar')
plt.show()

df2.dtypes

fecha                           object
id_consulta                     object
edad                             int64
edad_meses                       int64
edad_dias                        int64
sexo                            object
peso                            object
altura                          object
municipio_unidad_medica         object
institucion_unidad_medica       object
clave_grupo_ enfermedad         object
descripcion_grupo_enfermedad    object
clave_enfermedad                object
descripcion_enfermedad          object
dtype: object

# Convertimos las variables categóricas a categóricas numéricas.
from sklearn.preprocessing import LabelEncoder 

# Aplicamos la transformación.
df2['sexo_num'] = LabelEncoder().fit_transform(df2['sexo'])
df2['institucion_unidad_medica_num'] = LabelEncoder().fit_transform(df2['institucion_unidad_medica'])
df2['descripcion_grupo_enfermedad_num'] = LabelEncoder().fit_transform(df2['descripcion_grupo_enfermedad'])
df2['clave_enfermedad_num'] = LabelEncoder().fit_transform(df2['clave_enfermedad'])
df2['descripcion_enfermedad_num'] = LabelEncoder().fit_transform(df2['descripcion_enfermedad'])

df2.sample(5)

/var/folders/rw/krrrqrzn68j3jq_d4yl8mzk80000gn/T/ipykernel_47110/763801538.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['sexo_num'] = LabelEncoder().fit_transform(df2['sexo'])
/var/folders/rw/krrrqrzn68j3jq_d4yl8mzk80000gn/T/ipykernel_47110/763801538.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['institucion_unidad_medica_num'] = LabelEncoder().fit_transform(df2['institucion_unidad_medica'])
/var/folders/rw/krrrqrzn68j3jq_d4yl8mzk80000gn/T/ipykernel_47110/763801538.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['descripcion_grupo_enfermedad_num'] = LabelEncoder().fit_transform(df2['descripcion_grupo_enfermedad'])
/var/folders/rw/krrrqrzn68j3jq_d4yl8mzk80000gn/T/ipykernel_47110/763801538.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['clave_enfermedad_num'] = LabelEncoder().fit_transform(df2['clave_enfermedad'])
/var/folders/rw/krrrqrzn68j3jq_d4yl8mzk80000gn/T/ipykernel_47110/763801538.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['descripcion_enfermedad_num'] = LabelEncoder().fit_transform(df2['descripcion_enfermedad'])

df2 = df2.drop(columns=['edad_meses', 'edad_dias'])

# Convertir peso y altura a enteros.
df2['peso'] = df2['peso'].astype(float)
df2['altura'] = df2['altura'].astype(float)

# Verificar.
print(df2.dtypes[['peso','altura']])

peso      float64
altura    float64
dtype: object

# Convertir altura a metros.
df2['altura_m'] = df2['altura'] / 100
# IMC.
df2['IMC'] = df2['peso'] / (df2['altura_m']**2)

plt.figure(figsize = (10, 6))
sns.heatmap(df2.corr(numeric_only = True), annot = True)
plt.show()

# Definimos el modelo de Feature Selection - ANOVA.
modelFS = smf.ols(formula='descripcion_grupo_enfermedad_num ~ edad + sexo_num + IMC + institucion_unidad_medica_num + clave_enfermedad_num + descripcion_enfermedad_num', data = df2).fit()
modelFS.summary()

# Imprimimos los P value para ver cuál es más importante.
pvalue_percent = (modelFS.pvalues * 100).sort_values(ascending = True)
pvalue_percent

clave_enfermedad_num             6.584618e-56
Intercept                        6.892180e-50
descripcion_enfermedad_num       3.738917e-06
sexo_num                         3.829040e-06
institucion_unidad_medica_num    6.021948e+00
IMC                              3.815116e+01
edad                             5.652051e+01
dtype: float64

X = df2[['edad', 'sexo_num', 'IMC', 'institucion_unidad_medica_num', 'clave_enfermedad_num', 'descripcion_enfermedad_num']]

vif_data = pd.DataFrame()
vif_data["feature"] = X.columns

vif_data["VIF"] = [variance_inflation_factor(X.values, i)
                          for i in range(len(X.columns))]
print(vif_data)

                         feature        VIF
0                           edad   9.406207
1                       sexo_num   2.008371
2                            IMC  20.720012
3  institucion_unidad_medica_num   2.930642
4           clave_enfermedad_num   8.236488
5     descripcion_enfermedad_num   6.445571

X = df2[['edad', 'sexo_num', 'institucion_unidad_medica_num', 'descripcion_enfermedad_num']]

vif_data = pd.DataFrame()
vif_data["feature"] = X.columns

vif_data["VIF"] = [variance_inflation_factor(X.values, i)
                          for i in range(len(X.columns))]
print(vif_data)

                         feature       VIF
0                           edad  4.187639
1                       sexo_num  1.918244
2  institucion_unidad_medica_num  2.000134
3     descripcion_enfermedad_num  4.060178

# Definimos entrada y salida.
X = df2[['edad', 'sexo_num', 'institucion_unidad_medica_num', 'descripcion_enfermedad_num']]
y = df2['descripcion_grupo_enfermedad_num']
print(X.shape)
print(y.shape)

(720, 4)
(720,)

# Dividir los datos en train y test.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0, stratify = y)
X_train.shape, X_test.shape, y_train.shape, y_test.shape
# No dividir de manera que se desbalanceen los datos -> cuidado con el random_state).

((576, 4), (144, 4), (576,), (144,))

y_train.value_counts()

# 0: Factores influyendo en el estado de salud. 1: Trastorno mental y del comportamiento.

descripcion_grupo_enfermedad_num
1    438
0    138
Name: count, dtype: int64

y_test.value_counts()

# 0: Factores influyendo en el estado de salud. 1: Trastorno mental y del comportamiento.

descripcion_grupo_enfermedad_num
1    109
0     35
Name: count, dtype: int64

# Definimos el modelo.
model1 = LogisticRegression(random_state = 0)
# Entrenamos el modelo.
model1.fit(X_train, y_train)

LogisticRegression(random_state=0)

# Validar si el modelo pronostica adecuadamente.
y_pred_test = model1.predict(X_test)
print(y_pred_test[0:5])
print(y_test.head())

[1 0 0 1 1]
46382    0
32275    0
7409     1
32248    1
46407    1
Name: descripcion_grupo_enfermedad_num, dtype: int64

accuracy_train = model1.score(X_train, y_train)
print('Accuracy train = {:.2f}'.format(accuracy_train))
accuracy_test = model1.score(X_test, y_test)
print('Accuracy test = {:.2f}'.format(accuracy_test))
print('Diferencia = {:.4f}%'.format(np.abs(accuracy_train-accuracy_test)*100))

Accuracy train = 0.87
Accuracy test = 0.87
Diferencia = 0.0000%

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred_test) # Siempre datos de validación.
cm

array([[29,  6],
       [13, 96]])

scores = cross_val_score(model1, X, y, cv = 5)
print(scores.mean())

0.8722222222222221

OR = np.exp(model1.coef_[0])
coef_df = pd.DataFrame({
  "Variables": X.columns,
  "Coeficientes": model1.coef_[0],
  "Odds Ratio": OR
})

print(coef_df)

                       Variables  Coeficientes  Odds Ratio
0                           edad     -0.008490    0.991546
1                       sexo_num      0.513594    1.671287
2  institucion_unidad_medica_num     -1.923495    0.146096
3     descripcion_enfermedad_num      0.124889    1.133023

# Reporte de clasificación.
from sklearn.metrics import classification_report
from imblearn.metrics import geometric_mean_score

names = ['Factores influyendo en el estado de salud.', 'Trastorno mental y del comportamiento.']
print(classification_report(y_test, y_pred_test, target_names = names))
print('G-mean =', geometric_mean_score(y_test, y_pred_test))

                                            precision    recall  f1-score   support

Factores influyendo en el estado de salud.       0.69      0.83      0.75        35
    Trastorno mental y del comportamiento.       0.94      0.88      0.91       109

                                  accuracy                           0.87       144
                                 macro avg       0.82      0.85      0.83       144
                              weighted avg       0.88      0.87      0.87       144

G-mean = 0.8542546359031318

from IPython.display import Image
Image(filename='/Users/estefaniadelarosa/Downloads/roc-curve-v2.png')

from sklearn.model_selection import cross_val_predict
from sklearn.metrics import roc_curve, roc_auc_score

# Validación cruzada.
y_probs = cross_val_predict(model1, X, y, cv = 5, method = 'predict_proba')[:,1] # Agregando método como scores.

# Curva ROC.
fpr, tpr, thresholds = roc_curve(y, y_probs)
auc = roc_auc_score(y, y_probs)

# Graficar
plt.plot(fpr, tpr)
plt.plot([0,1], [0,1])
plt.xlabel("False Positive Rate.")
plt.ylabel("True Positive Rate.")
plt.title("ROC Curve.")
plt.show()

print("AUC:", auc)

AUC: 0.9028014075725713

model2 = LogisticRegression(class_weight = 'balanced', random_state = 0)
# Entrenamos el modelo.
model2.fit(X_train, y_train)

LogisticRegression(class_weight='balanced', random_state=0)

# Validar si el modelo pronostica adecuadamente.
y_pred_test = model2.predict(X_test)
print(y_pred_test[0:5])
print(y_test.head())

[1 0 0 1 1]
46382    0
32275    0
7409     1
32248    1
46407    1
Name: descripcion_grupo_enfermedad_num, dtype: int64

accuracy_train = model2.score(X_train, y_train)
print('Accuracy train = {:.2f}'.format(accuracy_train))
accuracy_test = model2.score(X_test, y_test)
print('Accuracy test = {:.2f}'.format(accuracy_test))
print('Diferencia = {:.4f}%'.format(np.abs(accuracy_train-accuracy_test)*100))

Accuracy train = 0.89
Accuracy test = 0.86
Diferencia = 2.6042%

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred_test) # Siempre datos de validación.
cm

array([[32,  3],
       [17, 92]])

scores = cross_val_score(model2, X, y, cv = 5)
print(scores.mean())

0.8777777777777777

OR = np.exp(model2.coef_[0])
coef_df = pd.DataFrame({
  "Variables": X.columns,
  "Coeficientes": model2.coef_[0],
  "Odds Ratio": OR
})

print(coef_df)

                       Variables  Coeficientes  Odds Ratio
0                           edad     -0.004255    0.995754
1                       sexo_num      0.659833    1.934469
2  institucion_unidad_medica_num     -1.579543    0.206069
3     descripcion_enfermedad_num      0.165406    1.179871

# Reporte de clasificación.
from sklearn.metrics import classification_report
from imblearn.metrics import geometric_mean_score

names = ['Factores influyendo en el estado de salud.', 'Trastorno mental y del comportamiento.']
print(classification_report(y_test, y_pred_test, target_names = names))
print('G-mean =', geometric_mean_score(y_test, y_pred_test))

                                            precision    recall  f1-score   support

Factores influyendo en el estado de salud.       0.65      0.91      0.76        35
    Trastorno mental y del comportamiento.       0.97      0.84      0.90       109

                                  accuracy                           0.86       144
                                 macro avg       0.81      0.88      0.83       144
                              weighted avg       0.89      0.86      0.87       144

G-mean = 0.8784592731746159

from sklearn.model_selection import cross_val_predict
from sklearn.metrics import roc_curve, roc_auc_score

# Validación cruzada.
y_probs = cross_val_predict(model2, X, y, cv = 5, method = 'predict_proba')[:,1] # Agregando método como scores.

# Curva ROC.
fpr, tpr, thresholds = roc_curve(y, y_probs)
auc = roc_auc_score(y, y_probs)

# Graficar
plt.plot(fpr, tpr)
plt.plot([0,1], [0,1])
plt.xlabel("False Positive Rate.")
plt.ylabel("True Positive Rate.")
plt.title("ROC Curve.")
plt.show()

print("AUC:", auc)

AUC: 0.8935866682165464

y_train.value_counts()

# 0: Factores influyendo en el estado de salud. 1: Trastorno mental y del comportamiento.

descripcion_grupo_enfermedad_num
1    438
0    138
Name: count, dtype: int64

y_test.value_counts()

# 0: Factores influyendo en el estado de salud. 1: Trastorno mental y del comportamiento.

descripcion_grupo_enfermedad_num
1    109
0     35
Name: count, dtype: int64

model3 = LogisticRegression(class_weight = {0:0.55, 1:0.45}, random_state = 0) # Llegar hasta 100.
# Entrenamos el modelo.
model3.fit(X_train, y_train)

LogisticRegression(class_weight={0: 0.55, 1: 0.45}, random_state=0)

# Validar si el modelo pronostica adecuadamente.
y_pred_test = model3.predict(X_test)
print(y_pred_test[0:5])
print(y_test.head())

[1 0 0 1 1]
46382    0
32275    0
7409     1
32248    1
46407    1
Name: descripcion_grupo_enfermedad_num, dtype: int64

accuracy_train = model3.score(X_train, y_train)
print('Accuracy train = {:.2f}'.format(accuracy_train))
accuracy_test = model3.score(X_test, y_test)
print('Accuracy test = {:.2f}'.format(accuracy_test))
print('Diferencia = {:.4f}%'.format(np.abs(accuracy_train-accuracy_test)*100))

Accuracy train = 0.86
Accuracy test = 0.85
Diferencia = 0.8681%

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred_test) # Siempre datos de validación.
cm

array([[29,  6],
       [15, 94]])

scores = cross_val_score(model3, X, y, cv = 5)
print(scores.mean())

0.8569444444444445

OR = np.exp(model3.coef_[0])
coef_df = pd.DataFrame({
  "Variables": X.columns,
  "Coeficientes": model3.coef_[0],
  "Odds Ratio": OR
})

print(coef_df)

                       Variables  Coeficientes  Odds Ratio
0                           edad     -0.008226    0.991808
1                       sexo_num      0.536212    1.709519
2  institucion_unidad_medica_num     -1.729901    0.177302
3     descripcion_enfermedad_num      0.128327    1.136925

# Reporte de clasificación.
from sklearn.metrics import classification_report
from imblearn.metrics import geometric_mean_score

names = ['Factores influyendo en el estado de salud.', 'Trastorno mental y del comportamiento.']
print(classification_report(y_test, y_pred_test, target_names = names))
print('G-mean =', geometric_mean_score(y_test, y_pred_test))

                                            precision    recall  f1-score   support

Factores influyendo en el estado de salud.       0.66      0.83      0.73        35
    Trastorno mental y del comportamiento.       0.94      0.86      0.90       109

                                  accuracy                           0.85       144
                                 macro avg       0.80      0.85      0.82       144
                              weighted avg       0.87      0.85      0.86       144

G-mean = 0.8453093146793175

from sklearn.model_selection import cross_val_predict
from sklearn.metrics import roc_curve, roc_auc_score

# Validación cruzada.
y_probs = cross_val_predict(model3, X, y, cv = 5, method = 'predict_proba')[:,1] # Agregando método como scores.

# Curva ROC.
fpr, tpr, thresholds = roc_curve(y, y_probs)
auc = roc_auc_score(y, y_probs)

# Graficar.
plt.plot(fpr, tpr)
plt.plot([0,1], [0,1])
plt.xlabel("False Positive Rate.")
plt.ylabel("True Positive Rate.")
plt.title("ROC Curve.")
plt.show()

print("AUC:", auc)

AUC: 0.9046929653073515

from imblearn.over_sampling import SMOTE

# Definir la técnica.
smote = SMOTE(random_state = 0)
# Aplicamos.
X_smote, y_smote = smote.fit_resample(X, y)

# Comprobar si funicona.
print('Tamaño de X antes de SMOTE:', X.shape)
print('Tamaño de X después de SMOTE:', X_smote.shape)
print('Balance de clases con SMOTE:', y_smote.value_counts())
print('Nuestras clases están balanceadas.')

Tamaño de X antes de SMOTE: (720, 4)
Tamaño de X después de SMOTE: (1094, 4)
Balance de clases con SMOTE: descripcion_grupo_enfermedad_num
1    547
0    547
Name: count, dtype: int64
Nuestras clases están balanceadas.

# Dividir los tratos en train y test.
X_train, X_test, y_train, y_test = train_test_split(X_smote, y_smote, test_size = 0.2, random_state = 0, stratify = y_smote)
print(y_train.value_counts())
X_train.shape, X_test.shape, y_train.shape, y_test.shape
# No dividir de manera que se desbalanceen los datos -> cuidado con el random_state).

descripcion_grupo_enfermedad_num
1    438
0    437
Name: count, dtype: int64

((875, 4), (219, 4), (875,), (219,))

# Definimos el modelo.
model4 = LogisticRegression(random_state = 0)
# Entrenamos el modelo.
model4.fit(X_train, y_train)

LogisticRegression(random_state=0)

# Validar si el modelo pronostica adecuadamente.
y_pred_test = model4.predict(X_test)
print(y_pred_test[0:5])
print(y_test.head())

[0 1 0 0 0]
944     0
199     1
1034    0
405     0
727     0
Name: descripcion_grupo_enfermedad_num, dtype: int64

accuracy_train = model4.score(X_train, y_train)
print('Accuracy train = {:.2f}'.format(accuracy_train))
accuracy_test = model4.score(X_test, y_test)
print('Accuracy test = {:.2f}'.format(accuracy_test))
print('Diferencia = {:.4f}%'.format(np.abs(accuracy_train-accuracy_test)*100))

Accuracy train = 0.90
Accuracy test = 0.92
Diferencia = 2.6374%

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred_test) # Siempre datos de validación.
cm

array([[102,   8],
       [  9, 100]])

scores = cross_val_score(model4, X, y, cv = 5)
print(scores.mean())

0.8722222222222221

OR = np.exp(model4.coef_[0])
coef_df = pd.DataFrame({
  "Variables": X.columns,
  "Coeficientes": model4.coef_[0],
  "Odds Ratio": OR
})

print(coef_df)

                       Variables  Coeficientes  Odds Ratio
0                           edad     -0.001302    0.998699
1                       sexo_num      1.051918    2.863137
2  institucion_unidad_medica_num     -1.215389    0.296595
3     descripcion_enfermedad_num      0.151412    1.163476

# Reporte de clasificación.
from sklearn.metrics import classification_report
from imblearn.metrics import geometric_mean_score

names = ['Factores influyendo en el estado de salud.', 'Trastorno mental y del comportamiento.']
print(classification_report(y_test, y_pred_test, target_names = names))
print('G-mean =', geometric_mean_score(y_test, y_pred_test))

                                            precision    recall  f1-score   support

Factores influyendo en el estado de salud.       0.92      0.93      0.92       110
    Trastorno mental y del comportamiento.       0.93      0.92      0.92       109

                                  accuracy                           0.92       219
                                 macro avg       0.92      0.92      0.92       219
                              weighted avg       0.92      0.92      0.92       219

G-mean = 0.9223388336741652

from sklearn.model_selection import cross_val_predict
from sklearn.metrics import roc_curve, roc_auc_score
import matplotlib.pyplot as plt

# Validación cruzada.
y_probs = cross_val_predict(model4, X, y, cv = 5, method = 'predict_proba')[:,1] # Agregando método como scores.

# Curva ROC.
fpr, tpr, thresholds = roc_curve(y, y_probs)
auc = roc_auc_score(y, y_probs)

# Graficar.
plt.plot(fpr, tpr)
plt.plot([0,1], [0,1])
plt.xlabel("False Positive Rate.")
plt.ylabel("True Positive Rate.")
plt.title("ROC Curve.")
plt.show()

print("AUC:", auc)

AUC: 0.9028014075725713

X.head()

X_new = pd.DataFrame({
    'edad': [33, 53, 60],
    'sexo_num': [1, 0, 0],
    'institucion_unidad_medica_num': [0, 0, 0],
    'descripcion_enfermedad_num': [44, 38, 6]
})

y_pred_new = model4.predict(X_new)
y_pred_new

array([1, 1, 0])

	fecha	id_consulta	edad	sexo	peso	altura	municipio_unidad_medica	institucion_unidad_medica	clave_grupo_ enfermedad	descripcion_grupo_enfermedad	clave_enfermedad	descripcion_enfermedad
0	02/10/2024	SM_2024_38869	21	Masculino	82	174	LINARES	HOSPITAL GENERAL DE LINARES	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F412	TRASTORNO MIXTO DE ANSIEDAD Y DEPRESION
1	08/10/2024	SM_2024_38870	21	Masculino	82	174	LINARES	HOSPITAL GENERAL DE LINARES	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F412	TRASTORNO MIXTO DE ANSIEDAD Y DEPRESION
2	08/10/2024	SM_2024_38871	5	Masculino	21	111	LINARES	HOSPITAL GENERAL DE LINARES	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F919	TRASTORNO DE LA CONDUCTA NO ESPECIFICADO
3	09/10/2024	SM_2024_38872	69	Masculino	sin valor	sin valor	LINARES	HOSPITAL GENERAL DE LINARES	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F321	EPISODIO DEPRESIVO MODERADO
4	09/10/2024	SM_2024_38873	78	Masculino	sin valor	sin valor	LINARES	HOSPITAL GENERAL DE LINARES	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F321	EPISODIO DEPRESIVO MODERADO

	fecha	id_consulta	edad	sexo	peso	altura	municipio_unidad_medica	institucion_unidad_medica	clave_grupo_ enfermedad	descripcion_grupo_enfermedad	clave_enfermedad	descripcion_enfermedad
2625	02/10/2024	SM_2024_41494	33	Masculino	60	160	SAN PEDRO GARZA GARCIA	CENTRO COMUNITARIO DE SALUD MENTAL Y ADICCIONE...	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F122	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO DEBID...
2626	02/10/2024	SM_2024_41495	53	Femenino	60	160	SAN PEDRO GARZA GARCIA	CENTRO COMUNITARIO DE SALUD MENTAL Y ADICCIONE...	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F412	TRASTORNO MIXTO DE ANSIEDAD Y DEPRESION
2627	02/10/2024	SM_2024_41496	60	Femenino	60	160	SAN PEDRO GARZA GARCIA	CENTRO COMUNITARIO DE SALUD MENTAL Y ADICCIONE...	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F630	JUEGO PATOLOGICO
2628	04/10/2024	SM_2024_41497	46	Femenino	70	165	SAN PEDRO GARZA GARCIA	CENTRO COMUNITARIO DE SALUD MENTAL Y ADICCIONE...	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F412	TRASTORNO MIXTO DE ANSIEDAD Y DEPRESION
2629	04/10/2024	SM_2024_41498	47	Femenino	60	160	SAN PEDRO GARZA GARCIA	CENTRO COMUNITARIO DE SALUD MENTAL Y ADICCIONE...	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F412	TRASTORNO MIXTO DE ANSIEDAD Y DEPRESION

	fecha	id_consulta	edad	sexo	peso	altura	municipio_unidad_medica	institucion_unidad_medica	clave_grupo_ enfermedad	descripcion_grupo_enfermedad	clave_enfermedad	descripcion_enfermedad
2625	02/10/2024	SM_2024_41494	33	Masculino	60	160	SAN PEDRO GARZA GARCIA	CENTRO COMUNITARIO DE SALUD MENTAL Y ADICCIONE...	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F122	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO DEBID...
2626	02/10/2024	SM_2024_41495	53	Femenino	60	160	SAN PEDRO GARZA GARCIA	CENTRO COMUNITARIO DE SALUD MENTAL Y ADICCIONE...	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F412	TRASTORNO MIXTO DE ANSIEDAD Y DEPRESION
2627	02/10/2024	SM_2024_41496	60	Femenino	60	160	SAN PEDRO GARZA GARCIA	CENTRO COMUNITARIO DE SALUD MENTAL Y ADICCIONE...	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F630	JUEGO PATOLOGICO
2628	04/10/2024	SM_2024_41497	46	Femenino	70	165	SAN PEDRO GARZA GARCIA	CENTRO COMUNITARIO DE SALUD MENTAL Y ADICCIONE...	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F412	TRASTORNO MIXTO DE ANSIEDAD Y DEPRESION
2629	04/10/2024	SM_2024_41498	47	Femenino	60	160	SAN PEDRO GARZA GARCIA	CENTRO COMUNITARIO DE SALUD MENTAL Y ADICCIONE...	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F412	TRASTORNO MIXTO DE ANSIEDAD Y DEPRESION

Dep. Variable:	descripcion_grupo_enfermedad_num	R-squared:	0.557
Model:	OLS	Adj. R-squared:	0.553
Method:	Least Squares	F-statistic:	149.4
Date:	Mon, 02 Mar 2026	Prob (F-statistic):	1.74e-122
Time:	13:09:38	Log-Likelihood:	-116.25
No. Observations:	720	AIC:	246.5
Df Residuals:	713	BIC:	278.6
Df Model:	6
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	1.2893	0.078	16.470	0.000	1.136	1.443
edad	-0.0005	0.001	-0.575	0.565	-0.002	0.001
sexo_num	-0.1472	0.026	-5.559	0.000	-0.199	-0.095
IMC	-0.0018	0.002	-0.876	0.382	-0.006	0.002
institucion_unidad_medica_num	-0.0496	0.026	-1.882	0.060	-0.101	0.002
clave_enfermedad_num	-0.0189	0.001	-17.611	0.000	-0.021	-0.017
descripcion_enfermedad_num	0.0056	0.001	5.564	0.000	0.004	0.008

Estatus de la Salud Mental en adultos del municipio más acaudalado de América Látina.¶

1.1 Introducción.¶

1.2 Objetivo.¶

2.1 Descripción del conjunto de datos.¶

2.2 Preparación y limpieza del conjunto de datos.¶

3.1 Selección de características.¶

3.1 Variables.¶

4.1 Metodología.¶

5.1 Resultados.¶

6.1 Discusiones.¶

7.1 Conclusiones.¶

7.2 Aprendizajes.¶

7.3 Implicaciones.¶

7.4 Posibles líneas futuras.¶

8.1 Referencias.¶

9.1 Código de Honor de la Universidad de Monterrey.¶

	fecha	id_consulta	edad	sexo	peso	altura	municipio_unidad_medica	institucion_unidad_medica	clave_grupo_ enfermedad	descripcion_grupo_enfermedad	clave_enfermedad	descripcion_enfermedad	sexo_num	institucion_unidad_medica_num	descripcion_grupo_enfermedad_num	clave_enfermedad_num	descripcion_enfermedad_num
46621	26/08/2025	SM_2025_33757	32	Femenino	71.8	167	SAN PEDRO GARZA GARCIA	CENTRO DE SALUD CON SERVICIOS AMPLIADOS SAN PE...	XXI	FACTORES QUE INFLUYEN EN EL ESTADO DE SALUD Y ...	Z631	PROBLEMAS EN LA RELACION CON LOS PADRES Y LOS ...	0	1	0	47	15
18782	19/02/2025	SM_2025_5918	21	Femenino	55.4	162	SAN PEDRO GARZA GARCIA	CENTRO DE SALUD CON SERVICIOS AMPLIADOS SAN PE...	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F412	TRASTORNO MIXTO DE ANSIEDAD Y DEPRESION	0	1	1	32	38
36865	13/06/2025	SM_2025_24001	32	Masculino	75	175	SAN PEDRO GARZA GARCIA	CENTRO COMUNITARIO DE SALUD MENTAL Y ADICCIONE...	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F140	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO DEBID...	1	0	1	8	47
37238	11/06/2025	SM_2025_24374	55	Femenino	82	154	SAN PEDRO GARZA GARCIA	CENTRO DE SALUD CON SERVICIOS AMPLIADOS SAN PE...	XXI	FACTORES QUE INFLUYEN EN EL ESTADO DE SALUD Y ...	Z631	PROBLEMAS EN LA RELACION CON LOS PADRES Y LOS ...	0	1	0	47	15
36906	23/06/2025	SM_2025_24042	28	Masculino	75	170	SAN PEDRO GARZA GARCIA	CENTRO COMUNITARIO DE SALUD MENTAL Y ADICCIONE...	V	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO	F122	TRASTORNOS MENTALES Y DEL COMPORTAMIENTO DEBID...	1	0	1	6	44

Omnibus:	758.401	Durbin-Watson:	1.802
Prob(Omnibus):	0.000	Jarque-Bera (JB):	46.874
Skew:	0.010	Prob(JB):	6.63e-11
Kurtosis:	1.750	Cond. No.	473.

	penalty	'l2'
	dual	False
	tol	0.0001
	C	1.0
	fit_intercept	True
	intercept_scaling	1
	class_weight	None
	random_state	0
	solver	'lbfgs'
	max_iter	100
	multi_class	'deprecated'
	verbose	0
	warm_start	False
	n_jobs	None
	l1_ratio	None