pipeline.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. import pandas as pd
  2. from sklearn.linear_model import LinearRegression
  3. from xgboost import XGBRegressor
  4. def pipeline(city_name, year, land_type, policy_factors):
  5. """
  6. 预测主函数
  7. :param land_type: 土地类别
  8. :param city_name: 城市名
  9. :param year: 年份
  10. :param policy_factors: 政策因子
  11. :return:
  12. """
  13. print(f"{city_name},{year},{land_type}")
  14. land_type_list = ["工业用地(万元/㎡)", "住宅用地(万元/㎡)", "商服用地(万元/㎡)", "其他用地(万元/㎡)"]
  15. land_type2feature_list = {}
  16. for sample_land in land_type_list:
  17. with open(f"jupyter/{sample_land.split('(')[0]}", "r", encoding="utf-8") as f:
  18. land_type2feature_list[sample_land] = f.read().split(",")
  19. # -------------------------------- 1. 读取历年土地基本信息 --------------------
  20. feature_file_path = r"jupyter/浙江省经济和社会指标-汇总.xlsx"
  21. feature_data = pd.read_excel(feature_file_path, dtype={"年度": str})
  22. feature_data.bfill(inplace=True)
  23. feature_data.set_index(['城市', '年度'], inplace=True)
  24. # -------------------------------- 2. 预测土地类别对应的特征 --------------------
  25. feature_list = []
  26. for sample_feature in land_type2feature_list[land_type]:
  27. if sample_feature == "政策影响" and policy_factors:
  28. feature_list.append(round(float(policy_factors), 4))
  29. continue
  30. y_data = [row[sample_feature] for _, row in feature_data.iterrows() if
  31. _[0] == city_name and _[1] in ["2022", "2023"]]
  32. x_data = [[int(_[1])] for _, row in feature_data.iterrows() if
  33. _[0] == city_name and _[1] in ["2022", "2023"]]
  34. y_data = list(reversed(y_data))
  35. x_data = list(reversed(x_data))
  36. model = LinearRegression()
  37. model.fit(x_data, y_data)
  38. pred_feature = model.predict([[year]])
  39. pred_data = max(round(pred_feature[0], 4), 0)
  40. if year == 2025:
  41. x_data.append([2024])
  42. y_data.append(pred_data)
  43. model.fit(x_data, y_data)
  44. pred_feature = model.predict([[year]])
  45. pred_data = round(pred_feature[0], 4)
  46. feature_list.append(pred_data)
  47. # -------------------------------- 3. 预测当年土地类别的地价 --------------------
  48. land_type2xgb_model_path = {"工业用地(万元/㎡)": 'jupyter/industrial_land_xgb_regressor_boston.model',
  49. "住宅用地(万元/㎡)": "jupyter/residential_land_xgb_regressor_boston.model",
  50. "商服用地(万元/㎡)": "jupyter/business_land_xgb_regressor_boston.model",
  51. "其他用地(万元/㎡)": "jupyter/other_land_xgb_regressor_boston.model"}
  52. model = XGBRegressor()
  53. model.load_model(land_type2xgb_model_path[land_type])
  54. y_pred = model.predict([feature_list])
  55. if land_type == "商服用地(万元/㎡)" and year > 2023:
  56. policy_factors = policy_factors if policy_factors else 0.5
  57. y_pred[0] += round(float(policy_factors) * (3 / 5), 4) if policy_factors else 0
  58. return y_pred[0], {land_type2feature_list[land_type][num]: i for num, i in enumerate(feature_list)}