123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- import pandas as pd
- from sklearn.linear_model import LinearRegression
- from xgboost import XGBRegressor
- def pipeline(city_name, year, land_type, policy_factors):
- """
- 预测主函数
- :param land_type: 土地类别
- :param city_name: 城市名
- :param year: 年份
- :param policy_factors: 政策因子
- :return:
- """
- print(f"{city_name},{year},{land_type}")
- land_type_list = ["工业用地(万元/㎡)", "住宅用地(万元/㎡)", "商服用地(万元/㎡)", "其他用地(万元/㎡)"]
- land_type2feature_list = {}
- for sample_land in land_type_list:
- with open(f"jupyter/{sample_land.split('(')[0]}", "r", encoding="utf-8") as f:
- land_type2feature_list[sample_land] = f.read().split(",")
- # -------------------------------- 1. 读取历年土地基本信息 --------------------
- feature_file_path = r"jupyter/浙江省经济和社会指标-汇总.xlsx"
- feature_data = pd.read_excel(feature_file_path, dtype={"年度": str})
- feature_data.bfill(inplace=True)
- feature_data.set_index(['城市', '年度'], inplace=True)
- # -------------------------------- 2. 预测土地类别对应的特征 --------------------
- feature_list = []
- for sample_feature in land_type2feature_list[land_type]:
- if sample_feature == "政策影响" and policy_factors:
- feature_list.append(round(float(policy_factors), 4))
- continue
- y_data = [row[sample_feature] for _, row in feature_data.iterrows() if
- _[0] == city_name and _[1] in ["2022", "2023"]]
- x_data = [[int(_[1])] for _, row in feature_data.iterrows() if
- _[0] == city_name and _[1] in ["2022", "2023"]]
- y_data = list(reversed(y_data))
- x_data = list(reversed(x_data))
- model = LinearRegression()
- model.fit(x_data, y_data)
- pred_feature = model.predict([[year]])
- pred_data = max(round(pred_feature[0], 4), 0)
- if year == 2025:
- x_data.append([2024])
- y_data.append(pred_data)
- model.fit(x_data, y_data)
- pred_feature = model.predict([[year]])
- pred_data = round(pred_feature[0], 4)
- feature_list.append(pred_data)
- # -------------------------------- 3. 预测当年土地类别的地价 --------------------
- land_type2xgb_model_path = {"工业用地(万元/㎡)": 'jupyter/industrial_land_xgb_regressor_boston.model',
- "住宅用地(万元/㎡)": "jupyter/residential_land_xgb_regressor_boston.model",
- "商服用地(万元/㎡)": "jupyter/business_land_xgb_regressor_boston.model",
- "其他用地(万元/㎡)": "jupyter/other_land_xgb_regressor_boston.model"}
- model = XGBRegressor()
- model.load_model(land_type2xgb_model_path[land_type])
- y_pred = model.predict([feature_list])
- if land_type == "商服用地(万元/㎡)" and year > 2023:
- policy_factors = policy_factors if policy_factors else 0.5
- y_pred[0] += round(float(policy_factors) * (3 / 5), 4) if policy_factors else 0
- return y_pred[0], {land_type2feature_list[land_type][num]: i for num, i in enumerate(feature_list)}
|