OLS预测电脑价格

__author__ = 'liulibo'
import patsy
import statsmodels.api as sm
import pandas as pd
import numpy as np

CSV_PATH = r"/temp/computer_data.csv"
df = pd.read_csv(CSV_PATH)

f = '价格 ~ 品牌 + CPU  + 内存 + SSD + 机械硬盘 + 显卡 + 显示器 + 重量'

y, X = patsy.dmatrices(f, df, return_type='dataframe')
results = sm.OLS(y, X).fit()

print(results.summary())

print(X.head())

to_pred_idx = X.iloc[0].index
to_pred_zeros = np.zeros(len(to_pred_idx))

tpdf = pd.DataFrame(to_pred_zeros,index = to_pred_idx,columns = ['value'])

tpdf.loc['Intercept'] = 1
tpdf.loc['品牌[T.DELL]'] = 1
tpdf.loc['CPU[T.I7-8550U]'] = 1
tpdf.loc['显卡[T.Nvidia Geforce MX150]'] = 1
tpdf.loc['内存'] = 16
tpdf.loc['SSD'] = 512
tpdf.loc['机械硬盘'] = 0
tpdf.loc['显示器'] = 15.6
tpdf.loc['重量'] = 3

print(results.predict(tpdf['value']))

computer_data