Column Transformer
----------------
from sklearn.compose import ColumnTransformer
ct = ColumnTransformer(
transformers=[
("ohe", OneHotEncoder(sparse_output=False), ["city", "gender"])
],
remainder="passthrough" # Keeps all other columns (like age, salary)
).set_output(transform="pandas")
df_processed = ct.fit_transform(df)
--------------------
--------------------
scaler = StandardScaler().set_output(transform="pandas")
df_scaled = scaler.fit_transform(df)
집값 프로세싱bhp_null = bhp.copy()
bhp_null.isnull().sum()bhp_null['TOWN'] = bhp_null['TOWN'].fillna(bhp_null['TOWN'].mode()[0])bhp_null['CRIM'] = bhp_null['CRIM'].fillna(bhp_null['CRIM'].mean())bhp_null.drop('LAT', axis=1, inplace=True)bhp_null.isnull().sum()
집값 시각화
import seaborn as sns
sns.histplot(data=bhp_null, x='CMEDV', )
plt.figure(figsize=(12,7))corr = bhp_null.select_dtypes('number').corr()sns.heatmap(corr, annot=True)
결측치 확인
import seaborn as snssns.scatterplot(data=bcc, x='mean radius', y='mean area', hue='target')
------------------------------Liniar regression
keras.backend.clear_session()
model = Sequential()model.add(Dense(32, activation='swish', input_shape=(x_train.shape[-1],)))model.add(BatchNormalization())model.add(Dense(32, activation='swish'))model.add(BatchNormalization())model.add(Dense(1))
model.compile(optimizer='adam', loss='mse' )
mc = ModelCheckpoint('best_model.keras', monitor='val_loss', mode='min', save_best_only=True)history = model.fit(x_train, y_train, epochs=200, batch_size=32, verbose=1,validation_data=(x_test, y_test),callbacks=[mc])
-------------------------------결측치 평균값
bcc_null['mean radius'] = bcc_null['mean radius'].fillna(bcc_null['mean radius'].mean())bcc_null['mean concavity'] = bcc_null['mean concavity'].fillna(bcc_null['mean concavity'].mean())