Thursday, April 23, 2026

dataprocessing

 Column Transformer

----------------

from sklearn.compose import ColumnTransformer


ct = ColumnTransformer(

    transformers=[

        ("ohe", OneHotEncoder(sparse_output=False), ["city", "gender"])

    ], 

    remainder="passthrough" # Keeps all other columns (like age, salary)

).set_output(transform="pandas")


df_processed = ct.fit_transform(df)


--------------------

--------------------

scaler = StandardScaler().set_output(transform="pandas")
df_scaled = scaler.fit_transform(df)

집값 프로세싱
bhp_null = bhp.copy()

bhp_null.isnull().sum()
bhp_null['TOWN'] = bhp_null['TOWN'].fillna(bhp_null['TOWN'].mode()[0])
bhp_null['CRIM'] = bhp_null['CRIM'].fillna(bhp_null['CRIM'].mean())
bhp_null.drop('LAT', axis=1, inplace=True)
bhp_null.isnull().sum()



집값 시각화

import seaborn as sns

sns.histplot(data=bhp_null, x='CMEDV', )



plt.figure(figsize=(12,7))
corr = bhp_null.select_dtypes('number').corr()
sns.heatmap(corr, annot=True)

결측치 확인

import seaborn as sns
sns.scatterplot(data=bcc, x='mean radius', y='mean area', hue='target')




------------------------------
Liniar regression


keras.backend.clear_session()

model = Sequential()
model.add(Dense(32, activation='swish', input_shape=(x_train.shape[-1],)))
model.add(BatchNormalization())
model.add(Dense(32, activation='swish'))
model.add(BatchNormalization())
model.add(Dense(1))

model.compile(optimizer='adam',
loss='mse' )

mc = ModelCheckpoint('best_model.keras', monitor='val_loss', mode='min', save_best_only=True)
history = model.fit(x_train, y_train, epochs=200, batch_size=32,
verbose=1,validation_data=(x_test, y_test),callbacks=[mc])


-------------------------------
결측치 평균값

bcc_null['mean radius'] = bcc_null['mean radius'].fillna(bcc_null['mean radius'].mean())
bcc_null['mean concavity'] = bcc_null['mean concavity'].fillna(bcc_null['mean concavity'].mean())

About Me

Hostway Korea R&D Center