diff --git a/aisenzhecode/沥青/定性模型数据项12-11.xlsx b/aisenzhecode/沥青/定性模型数据项12-11.xlsx index cf7ddad..7fd829c 100644 Binary files a/aisenzhecode/沥青/定性模型数据项12-11.xlsx and b/aisenzhecode/沥青/定性模型数据项12-11.xlsx differ diff --git a/aisenzhecode/沥青/沥青定性每日执行.py b/aisenzhecode/沥青/沥青定性每日执行.py index 0656f30..43ef7f3 100644 --- a/aisenzhecode/沥青/沥青定性每日执行.py +++ b/aisenzhecode/沥青/沥青定性每日执行.py @@ -1,18 +1,29 @@ +import numpy as np +import logging import requests import json import xlrd import xlwt -from datetime import datetime, timedelta +from datetime import datetime, timedelta import time import pandas as pd pd.set_option('display.max_columns', None) -import numpy as np + +# 配置日志功能 +logging.basicConfig( + filename='沥青定性每日执行.log', # 日志文件名 + level=logging.INFO, # 日志级别,INFO 表示记录所有信息 + format='%(asctime)s - %(levelname)s - %(message)s', # 日志格式 + datefmt='%Y-%m-%d %H:%M:%S' # 日期格式 +) + # 变量定义 login_url = "http://10.200.32.39/jingbo-api/api/server/login" login_push_url = "http://10.200.32.39/jingbo-api/api/server/login" # query_data_list_item_nos_url -search_url = "http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos" #jingbo-dev/api/warehouse/dwDataItem/queryDataListItemNos +# jingbo-dev/api/warehouse/dwDataItem/queryDataListItemNos +search_url = "http://10.200.32.39/jingbo-api/api/warehouse/dwDataItem/queryByItemNos" upload_url = "http://10.200.32.39/jingbo-api/api/dw/dataValue/pushDataValueList" queryDataListItemNos_url = "http://10.200.32.39/jingbo-api//api/warehouse/dwDataItem/queryDataListItemNos" @@ -55,25 +66,40 @@ read_file_path_name = "定性模型数据项12-11.xlsx" one_cols = [] two_cols = [] + def get_head_auth(): - login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5)) - text = json.loads(login_res.text) - if text["status"]: - token = text["data"]["accessToken"] - return token - else: - print("获取认证失败") + try: + login_res = requests.post( + url=login_url, json=login_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + logging.info("成功获取认证 token") + return token + else: + logging.error("获取认证失败,响应信息: %s", login_res.text) + print("获取认证失败") + return None + except Exception as e: + logging.error("获取认证时发生异常: %s", str(e)) return None def get_head_push_auth(): - login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5)) - text = json.loads(login_res.text) - if text["status"]: - token = text["data"]["accessToken"] - return token - else: - print("获取认证失败") + try: + login_res = requests.post( + url=login_push_url, json=login_push_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + logging.info("成功获取推送认证 token") + return token + else: + logging.error("获取推送认证失败,响应信息: %s", login_res.text) + print("获取认证失败") + return None + except Exception as e: + logging.error("获取推送认证时发生异常: %s", str(e)) return None @@ -88,1030 +114,858 @@ def update_e_value(file_path, column_index, threshold): :param column_index: 需要更新的列索引 :param threshold: 变化阈值 """ - # 读取Excel文件 - # try: - # df = pd.read_excel(file_path, engine='openpyxl') - # except: - # df = pd.read_excel(file_path, engine='xlrd') - - df = pd.read_excel(file_path) - # 所有列列统一数据格式为float - df = df.applymap(lambda x: float(x) if isinstance(x, (int, float)) else x) - - # print(df.tail()) - # 填充缺失值 - df = df.fillna(method='ffill') + try: + logging.info("开始更新 Excel 文件中指定列的值,文件路径: %s", file_path) + df = pd.read_excel(file_path) + df = df.applymap(lambda x: float( + x) if isinstance(x, (int, float)) else x) + df = df.fillna(method='ffill') + df1 = df[-3:-1] + previous_value = df1.iloc[0, column_index] + current_value = df1.iloc[1, column_index] + if abs(current_value - previous_value) > threshold: + df.iloc[-2, column_index] = previous_value + logging.info("指定列值变化大于阈值,已将当前值修改为前一天的值") + df.to_excel(file_path, index=False, engine='openpyxl') + logging.info("Excel 文件更新完成") + except Exception as e: + logging.error("更新 Excel 文件时发生异常: %s", str(e)) - # 获取昨天,前天数据 - df1 = df[-3:-1] - print(df1) - # 获取前一天的指定列值 - previous_value = df1.iloc[0, column_index] - print('前一天的',previous_value,type(previous_value)) - # 获取当前的指定列值 - current_value = df1.iloc[1, column_index] - print('现在的',current_value,type(current_value)) - # 判断指定列值的变化是否大于阈值 - if abs(current_value - previous_value) > threshold: - # 如果变化大于阈值,将当前的指定列值改为前一天的值 - df.iloc[-2, column_index] = previous_value - print('修改了') - # print(df.tail()) - # 将修改后的数据写回Excel文件 - df.to_excel(file_path, index=False,engine='openpyxl') def getLogToken(): - login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5)) - text = json.loads(login_res.text) - if text["status"]: - token = text["data"]["accessToken"] - else: - print("获取认证失败") - token = None - return token - -def updateExcelDatabak(date='',token=None): - workbook = xlrd.open_workbook(read_file_path_name) - - # 选择第一个表格 - sheet = workbook.sheet_by_index(0) - - row_data = sheet.row_values(1) - one_cols = row_data - - cur_time,cur_time2 = getNow(date) - search_data = { - "data": { - "date": cur_time, - "dataItemNoList": one_cols[1:] - }, - "funcModule": "数据项", - "funcOperation": "查询" - } - headers = {"Authorization": token} - search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) - search_value = json.loads(search_res.text)["data"] -# datas = search_value - if search_value: - datas = search_value - else : - datas = None - - - append_rows = [cur_time2] - dataItemNo_dataValue = {} -# for data_value in datas: -# dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] - for data_value in datas: - if "dataValue" not in data_value: - print(data_value) - dataItemNo_dataValue[data_value["dataItemNo"]] = "" + try: + login_res = requests.post( + url=login_url, json=login_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + logging.info("成功获取日志 token") + return token else: - dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] - for value in one_cols[1:]: - if value in dataItemNo_dataValue: - append_rows.append(dataItemNo_dataValue[value]) - else: - append_rows.append("") - - workbook = xlrd.open_workbook('定性模型数据项12-11.xlsx') - - # 获取所有sheet的个数 - sheet_count = len(workbook.sheet_names()) - - # 获取所有sheet的名称 - sheet_names = workbook.sheet_names() - - new_workbook = xlwt.Workbook() - for i in range(sheet_count): - # 获取当前sheet - sheet = workbook.sheet_by_index(i) - - # 获取sheet的行数和列数 - row_count = sheet.nrows - col_count = sheet.ncols - # 获取原有数据 - data = [] - for row in range(row_count): - row_data = [] - for col in range(col_count): - row_data.append(sheet.cell_value(row, col)) - data.append(row_data) - # 创建xlwt的Workbook对象 - # 创建sheet - new_sheet = new_workbook.add_sheet(sheet_names[i]) - - # 将原有的数据写入新的sheet - for row in range(row_count): - for col in range(col_count): - new_sheet.write(row, col, data[row][col]) - - if i == 0: - - # 在新的sheet中添加数据 - for col in range(col_count): - new_sheet.write(row_count, col, append_rows[col]) - - # 保存新的xls文件 - new_workbook.save("定性模型数据项12-11.xlsx") - -def updateYesterdayExcelData(date='', token=None): - # 使用pandas读取Excel文件 - df = pd.read_excel(read_file_path_name, engine='openpyxl') - - # 获取第二行的数据作为列名 - one_cols = df.iloc[0,:].tolist() - - # 获取当前日期的前一天 - if date == '': - previous_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d') - else: - # 字符串转日期 - previous_date = (datetime.strptime(date, "%Y-%m-%d")-timedelta(days=1)).strftime('%Y-%m-%d') - - - cur_time, cur_time2 = getNow(previous_date) - search_data = { - "data": { - "date": cur_time, - "dataItemNoList": one_cols[1:] - }, - "funcModule": "数据项", - "funcOperation": "查询" - } - headers = {"Authorization": token} - search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) - search_value = json.loads(search_res.text)["data"] - if search_value: - datas = search_value - else: - datas = None - - append_rows = [cur_time2] - dataItemNo_dataValue = {} - for data_value in datas: - if "dataValue" not in data_value: - print(data_value) - dataItemNo_dataValue[data_value["dataItemNo"]] = "" - else: - dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] - for value in one_cols[1:]: - if value in dataItemNo_dataValue: - append_rows.append(dataItemNo_dataValue[value]) - else: - append_rows.append("") - - print('更新数据前') - print(df.tail(1)) - # 检查日期是否已存在于数据中 - if previous_date not in df['日期'].values: - # 将新的数据添加到DataFrame中 - new_row = pd.DataFrame([append_rows], columns=df.columns.tolist()) - df = pd.concat([df, new_row], ignore_index=True) - else: - # 更新现有数据 - print('日期存在,即将更新') - print('新数据',append_rows[1:]) - df.loc[df['日期'] == previous_date, df.columns.tolist()[1:]] = append_rows[1:] - - print('更新数据后') - print(df.tail(1)) - # 使用pandas保存Excel文件 - df.to_excel("定性模型数据项12-11.xlsx", index=False, engine='openpyxl') - - -def updateExcelData(date='', token=None): - # 使用pandas读取Excel文件 - df = pd.read_excel(read_file_path_name, engine='openpyxl') - - # 获取第一行的数据作为列名 - # one_cols = df.columns.tolist() - - # 获取第二行的数据作为列名 - one_cols = df.iloc[0,:].tolist() - - cur_time, cur_time2 = getNow(date) - search_data = { - "data": { - "date": cur_time, - "dataItemNoList": one_cols[1:] - }, - "funcModule": "数据项", - "funcOperation": "查询" - } - headers = {"Authorization": token} - search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) - search_value = json.loads(search_res.text)["data"] - if search_value: - datas = search_value - else: - datas = None - - append_rows = [cur_time2] - dataItemNo_dataValue = {} - for data_value in datas: - if "dataValue" not in data_value: - print(data_value) - dataItemNo_dataValue[data_value["dataItemNo"]] = "" - else: - dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] - for value in one_cols[1:]: - if value in dataItemNo_dataValue: - append_rows.append(dataItemNo_dataValue[value]) - else: - append_rows.append("") - - # 将新的数据添加到DataFrame中 - new_row = pd.DataFrame([append_rows], columns=df.columns.tolist()) - df = pd.concat([df, new_row], ignore_index=True) - # df = df.append(pd.Series(append_rows), ignore_index=True) - - # 使用pandas保存Excel文件 - df.to_excel("定性模型数据项12-11.xlsx", index=False, engine='openpyxl') - - -def qualitativeModel(): - df = pd.read_excel('定性模型数据项12-11.xlsx') - - df=df.fillna(df.ffill()) - df1 = df[-3:-1].reset_index() - print(df1) - ''' - # if df1.loc[1,'70号沥青开工率'] > 0.3: - 2025年1月8日 修改: - 复盘分析后发现2024-7月开始,开工率数据从0.28 变为了28 ,改为下面的判断规则 - ''' - if df1.loc[1,'70号沥青开工率'] / 100 > 0.3: - a = -(df1.loc[1,'70号沥青开工率'] / 100 -0.2)*5/0.1 - else : - a = 0 - b = df1.loc[1,'资金因素'] - - print('昨日计划提货偏差改之前',df1.loc[1,'昨日计划提货偏差']) - # 昨日计划提货偏差 = 京博产量 - 计划产量 - df1.loc[1,'昨日计划提货偏差'] = df1.loc[1,'京博产量'] - df1.loc[1,'计划产量'] - - print('昨日计划提货偏差改之后',df1.loc[1,'昨日计划提货偏差']) - if df1.loc[1,'昨日计划提货偏差']>0: - c = df1.loc[1,'昨日计划提货偏差']*10/2000 - else : - c = df1.loc[1,'昨日计划提货偏差']*10/3000 - - # 生产情况 = (京博产量 - 计划产量)/500*5 - d = (df1.loc[1,'京博产量'] - df1.loc[1,'计划产量']) / 500 * 5 - - if df1.loc[1,'基质沥青库存']/265007 >0.8: - e = (df1.loc[1,'基质沥青库存'] - df1.loc[0,'基质沥青库存'])*10/-5000 - else : - e = 0 -# f = df1.loc[1,'下游客户价格预期'] - f = 1 # 2025年1月23日修改:价格预期都按1计算 - if abs(df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])>=100: - g = (df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])*50/100 - else : - g = 0 - h = df1.loc[1,'订单结构'] - x = round(0.08*a+0*b+0.15*c+0.08*d +0.03*e +0.08*f +0.4*g+0.18*h+df1.loc[0,'京博指导价'],2) - return x - - -def getNow(date='',offset=0): - if date == '': - now = datetime.now() - timedelta(days=offset) - else: - try: - date = datetime.strptime(date, "%Y-%m-%d") - except: - date = datetime.strptime(date, "%Y%m%d") - now = date - - year = now.year - month = now.month - day = now.day - - if month < 10: - month = "0" + str(month) - if day < 10: - day = "0" + str(day) - cur_time = str(year) + str(month) + str(day) - cur_time2 = str(year) + "-" + str(month) + "-" + str(day) - return cur_time,cur_time2 - -def pushData(cur_time,x,token_push): - data1 = { - "funcModule": "数据表信息列表", - "funcOperation": "新增", - "data": [ - {"dataItemNo": "C01100036|Forecast_Price|DX|ACN", - "dataDate": cur_time, - "dataStatus": "add", - "dataValue": x - } - ] - } - headers1 = {"Authorization": token_push} - res = requests.post(url=upload_url, headers=headers1, json=data1, timeout=(3, 5)) - -def start_2(date='',token=None): - workbook = xlrd.open_workbook(read_file_path_name) - - # 选择第一个表格 - sheet = workbook.sheet_by_index(0) - - # 获取行数和列数 - num_rows = sheet.nrows - - row_data = sheet.row_values(1) - one_cols = row_data - - cur_time,cur_time2 = getNow(date) - - - - search_data = { - "data": { - "date": cur_time, - "dataItemNoList": one_cols[1:] - }, - "funcModule": "数据项", - "funcOperation": "查询" - } - headers = {"Authorization": token} - search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) - search_value = json.loads(search_res.text)["data"] -# datas = search_value - if search_value: - datas = search_value - else : - datas = None - - - append_rows = [cur_time2] - dataItemNo_dataValue = {} -# for data_value in datas: -# dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] - for data_value in datas: - if "dataValue" not in data_value: - print(data_value) - dataItemNo_dataValue[data_value["dataItemNo"]] = "" - else: - dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] - for value in one_cols[1:]: - if value in dataItemNo_dataValue: - append_rows.append(dataItemNo_dataValue[value]) - else: - append_rows.append("") - - workbook = xlrd.open_workbook('定性模型数据项12-11.xlsx') - - # 获取所有sheet的个数 - sheet_count = len(workbook.sheet_names()) - - # 获取所有sheet的名称 - sheet_names = workbook.sheet_names() - - new_workbook = xlwt.Workbook() - for i in range(sheet_count): - # 获取当前sheet - sheet = workbook.sheet_by_index(i) - - # 获取sheet的行数和列数 - row_count = sheet.nrows - col_count = sheet.ncols - # 获取原有数据 - data = [] - for row in range(row_count): - row_data = [] - for col in range(col_count): - row_data.append(sheet.cell_value(row, col)) - data.append(row_data) - # 创建xlwt的Workbook对象 - # 创建sheet - new_sheet = new_workbook.add_sheet(sheet_names[i]) - - # 将原有的数据写入新的sheet - for row in range(row_count): - for col in range(col_count): - new_sheet.write(row, col, data[row][col]) - - if i == 0: - - # 在新的sheet中添加数据 - for col in range(col_count): - new_sheet.write(row_count, col, append_rows[col]) - - # 保存新的xls文件 - new_workbook.save("定性模型数据项12-11.xlsx") - - update_e_value('定性模型数据项12-11.xlsx', 8, 1000) - - df = pd.read_excel('定性模型数据项12-11.xlsx') - - df=df.fillna(df.ffill()) - df1 = df[-2:].reset_index() - ''' - # if df1.loc[1,'70号沥青开工率'] > 0.3: - 2025年1月8日 修改: - 复盘分析后发现2024-7月开始,开工率数据从0.28 变为了28 ,改为下面的判断规则 - ''' - if df1.loc[1,'70号沥青开工率'] > 30: - a = (df1.loc[1,'70号沥青开工率']-0.2)*5/0.1 - else : - a = 0 - b = df1.loc[1,'资金因素'] - if df1.loc[1,'昨日计划提货偏差']>0: - c = df1.loc[1,'昨日计划提货偏差']*10/2000 - else : - c = df1.loc[1,'昨日计划提货偏差']*10/3000 - d = df1.loc[1,'生产情况'] - if df1.loc[1,'基质沥青库存']/265007 >0.8: - e = (df1.loc[1,'基质沥青库存'] - df1.loc[0,'基质沥青库存'])*10/-5000 - else : - e = 0 -# f = df1.loc[1,'下游客户价格预期'] - f = 1 # 2025年1月23日修改:价格预期都按1计算 - if abs(df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])>=100: - g = (df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])*50/100 - else : - g = 0 - h = df1.loc[1,'订单结构'] - x = round(0.08*a+0*b+0.15*c+0.08*d +0.03*e +0.08*f +0.4*g+0.18*h+df1.loc[0,'京博指导价'],2) - - login_res1 = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5)) - text1 = json.loads(login_res1.text) - token_push = text1["data"]["accessToken"] - - - data1 = { - "funcModule": "数据表信息列表", - "funcOperation": "新增", - "data": [ - {"dataItemNo": "C01100036|Forecast_Price|DX|ACN", - "dataDate": cur_time, - "dataStatus": "add", - "dataValue": x - } - - ] - } - headers1 = {"Authorization": token_push} - # res = requests.post(url=upload_url, headers=headers1, json=data1, timeout=(3, 5)) - - -def start(now=None): - workbook = xlrd.open_workbook(read_file_path_name) - - - - # 选择第一个表格 - sheet = workbook.sheet_by_index(0) - - # 获取行数和列数 - num_rows = sheet.nrows - - - - row_data = sheet.row_values(1) - one_cols = row_data - - - login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5)) - text = json.loads(login_res.text) - if text["status"]: - token = text["data"]["accessToken"] - else: - print("获取认证失败") - token = None - - if now is None: - now = datetime.now() - year = now.year - month = now.month - day = now.day - - if month < 10: - month = "0" + str(month) - if day < 10: - day = "0" + str(day) - cur_time = str(year) + str(month) + str(day) - cur_time2 = str(year) + "-" + str(month) + "-" + str(day) - search_data = { - "data": { - "date": cur_time, - "dataItemNoList": one_cols[1:] - }, - "funcModule": "数据项", - "funcOperation": "查询" - } - headers = {"Authorization": token} - search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) - search_value = json.loads(search_res.text)["data"] -# datas = search_value - if search_value: - datas = search_value - else : - datas = None - - - append_rows = [cur_time2] - dataItemNo_dataValue = {} -# for data_value in datas: -# dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] - for data_value in datas: - if "dataValue" not in data_value: - print(data_value) - dataItemNo_dataValue[data_value["dataItemNo"]] = "" - else: - dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] - for value in one_cols[1:]: - if value in dataItemNo_dataValue: - append_rows.append(dataItemNo_dataValue[value]) - else: - append_rows.append("") - - workbook = xlrd.open_workbook('定性模型数据项12-11.xlsx') - - # 获取所有sheet的个数 - sheet_count = len(workbook.sheet_names()) - - # 获取所有sheet的名称 - sheet_names = workbook.sheet_names() - - new_workbook = xlwt.Workbook() - for i in range(sheet_count): - # 获取当前sheet - sheet = workbook.sheet_by_index(i) - - # 获取sheet的行数和列数 - row_count = sheet.nrows - col_count = sheet.ncols - # 获取原有数据 - data = [] - for row in range(row_count): - row_data = [] - for col in range(col_count): - row_data.append(sheet.cell_value(row, col)) - data.append(row_data) - # 创建xlwt的Workbook对象 - # 创建sheet - new_sheet = new_workbook.add_sheet(sheet_names[i]) - - # 将原有的数据写入新的sheet - for row in range(row_count): - for col in range(col_count): - new_sheet.write(row, col, data[row][col]) - - if i == 0: - # 在新的sheet中添加数据 - for col in range(col_count): - new_sheet.write(row_count, col, append_rows[col]) - - # 保存新的xls文件 - new_workbook.save("定性模型数据项12-11.xlsx") - - - update_e_value('定性模型数据项12-11.xlsx', 8, 1000) - - df = pd.read_excel('定性模型数据项12-11.xlsx') - df=df.fillna(df.ffill()) - df1 = df[-2:].reset_index() - print(df1) - # if df1.loc[1,'70号沥青开工率'] > 0.3: -- 2025年1月9日 发版更改 - if df1.loc[1,'70号沥青开工率'] / 100 > 0.3: - a = (df1.loc[1,'70号沥青开工率'] / 100 -0.2)*5/0.1 - else : - a = 0 - b = df1.loc[1,'资金因素'] - if df1.loc[1,'昨日计划提货偏差']>0: - c = df1.loc[1,'昨日计划提货偏差']*10/2000 - else : - c = df1.loc[1,'昨日计划提货偏差']*10/3000 - d = df1.loc[1,'生产情况'] - if df1.loc[1,'基质沥青库存']/265007 >0.8: - e = (df1.loc[1,'基质沥青库存'] - df1.loc[0,'基质沥青库存'])*10/-5000 - else : - e = 0 -# f = df1.loc[1,'下游客户价格预期'] - f = 1 # 2025年1月23日修改:价格预期都按1计算 - if abs(df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])>=100: - g = (df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])*50/100 - else : - g = 0 - h = df1.loc[1,'订单结构'] - x = round(0.08*a+0*b+0.15*c+0.08*d +0.03*e +0.08*f +0.4*g+0.18*h+df1.loc[0,'京博指导价'],2) - - - # login_res1 = requests.post(url=login_url, json=login_data, timeout=(3, 30)) - # text1 = json.loads(login_res1.text) - # token_push = text1["data"]["accessToken"] - - - # data1 = { - # "funcModule": "数据表信息列表", - # "funcOperation": "新增", - # "data": [ - # {"dataItemNo": "C01100036|Forecast_Price|DX|ACN", - # "dataDate": cur_time, - # "dataStatus": "add", - # "dataValue": x - # } - - # ] - # } - # headers1 = {"Authorization": token_push} - # res = requests.post(url=upload_url, headers=headers1, json=data1, timeout=(3, 5)) - - - -def start_test(): - workbook = xlrd.open_workbook(read_file_path_name) - - - - # 选择第一个表格 - sheet = workbook.sheet_by_index(0) - - # 获取行数和列数 - num_rows = sheet.nrows - - - - row_data = sheet.row_values(1) - one_cols = row_data - - - login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5)) - text = json.loads(login_res.text) - if text["status"]: - token = text["data"]["accessToken"] - else: - print("获取认证失败") - token = None - - - now = datetime.now() - year = now.year - month = now.month - day = now.day - - if month < 10: - month = "0" + str(month) - if day < 10: - day = "0" + str(day) - cur_time = str(year) + str(month) + str(day) - cur_time2 = str(year) + "-" + str(month) + "-" + str(day) - search_data = { - "data": { - "date": cur_time, - "dataItemNoList": one_cols[1:] - }, - "funcModule": "数据项", - "funcOperation": "查询" - } - headers = {"Authorization": token} - search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) - search_value = json.loads(search_res.text)["data"] -# datas = search_value - if search_value: - datas = search_value - else : - datas = None - - - append_rows = [cur_time2] - dataItemNo_dataValue = {} -# for data_value in datas: -# dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] - for data_value in datas: - if "dataValue" not in data_value: - print(data_value) - dataItemNo_dataValue[data_value["dataItemNo"]] = "" - else: - dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] - for value in one_cols[1:]: - if value in dataItemNo_dataValue: - append_rows.append(dataItemNo_dataValue[value]) - else: - append_rows.append("") - - workbook = xlrd.open_workbook('定性模型数据项12-11.xlsx') - - # 获取所有sheet的个数 - sheet_count = len(workbook.sheet_names()) - - # 获取所有sheet的名称 - sheet_names = workbook.sheet_names() - - new_workbook = xlwt.Workbook() - for i in range(sheet_count): - # 获取当前sheet - sheet = workbook.sheet_by_index(i) - - # 获取sheet的行数和列数 - row_count = sheet.nrows - col_count = sheet.ncols - # 获取原有数据 - data = [] - for row in range(row_count): - row_data = [] - for col in range(col_count): - row_data.append(sheet.cell_value(row, col)) - data.append(row_data) - # 创建xlwt的Workbook对象 - # 创建sheet - new_sheet = new_workbook.add_sheet(sheet_names[i]) - - # 将原有的数据写入新的sheet - for row in range(row_count): - for col in range(col_count): - new_sheet.write(row, col, data[row][col]) - - if i == 0: - # 在新的sheet中添加数据 - for col in range(col_count): - new_sheet.write(row_count, col, append_rows[col]) - - # 保存新的xls文件 - new_workbook.save("定性模型数据项12-11.xlsx") - - update_e_value('定性模型数据项12-11.xlsx', 8, 1000) - - df = pd.read_excel('定性模型数据项12-11.xlsx') - df=df.fillna(df.ffill()) - df1 = df[-2:].reset_index() - # if df1.loc[1,'70号沥青开工率'] > 0.3: -- 2025年1月9日 发版更改 - if df1.loc[1,'70号沥青开工率'] / 100 > 0.3: - a = (df1.loc[1,'70号沥青开工率'] / 100 -0.2)*5/0.1 - else : - a = 0 - b = df1.loc[1,'资金因素'] - if df1.loc[1,'昨日计划提货偏差']>0: - c = df1.loc[1,'昨日计划提货偏差']*10/2000 - else : - c = df1.loc[1,'昨日计划提货偏差']*10/3000 - d = df1.loc[1,'生产情况'] - if df1.loc[1,'基质沥青库存']/265007 >0.8: - e = (df1.loc[1,'基质沥青库存'] - df1.loc[0,'基质沥青库存'])*10/-5000 - else : - e = 0 -# f = df1.loc[1,'下游客户价格预期'] - f = 1 # 2025年1月23日修改:价格预期都按1计算 - if abs(df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])>=100: - g = (df1.loc[1,'即期成本'] - df1.loc[0,'即期成本'])*50/100 - else : - g = 0 - h = df1.loc[1,'订单结构'] - x = round(0.08*a+0*b+0.15*c+0.08*d +0.03*e +0.08*f +0.4*g+0.18*h+df1.loc[0,'京博指导价'],2) - - - # login_res1 = requests.post(url=login_url, json=login_data, timeout=(3, 30)) - # text1 = json.loads(login_res1.text) - # token_push = text1["data"]["accessToken"] - - - # data1 = { - # "funcModule": "数据表信息列表", - # "funcOperation": "新增", - # "data": [ - # {"dataItemNo": "C01100036|Forecast_Price|DX|ACN", - # "dataDate": cur_time, - # "dataStatus": "add", - # "dataValue": x - # } - - # ] - # } - # headers1 = {"Authorization": token_push} - # res = requests.post(url=upload_url, headers=headers1, json=data1, timeout=(3, 5)) - - - - - -def start_1(): - workbook = xlrd.open_workbook(read_file_path_name) - - - - # 选择第一个表格 - sheet = workbook.sheet_by_index(0) - - # 获取行数和列数 - num_rows = sheet.nrows - - - - row_data = sheet.row_values(1) - one_cols = row_data - - - login_res = requests.post(url=login_url, json=login_data, timeout=(3, 5)) - text = json.loads(login_res.text) - if text["status"]: - token = text["data"]["accessToken"] - else: - print("获取认证失败") - token = None - - - now = datetime.now() - timedelta(days=1) - year = now.year - month = now.month - day = now.day - - if month < 10: - month = "0" + str(month) - if day < 10: - day = "0" + str(day) - cur_time = str(year) + str(month) + str(day) - cur_time2 = str(year) + "-" + str(month) + "-" + str(day) - search_data = { - "data": { - "date": cur_time, - "dataItemNoList": one_cols[1:] - }, - "funcModule": "数据项", - "funcOperation": "查询" - } - headers = {"Authorization": token} - search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) - search_value = json.loads(search_res.text)["data"] -# datas = search_value - if search_value: - datas = search_value - else : - datas = None - - - - append_rows = [cur_time2] - dataItemNo_dataValue = {} -# for data_value in datas: -# dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] - for data_value in datas: - if "dataValue" not in data_value: - print(data_value) - dataItemNo_dataValue[data_value["dataItemNo"]] = "" - else: - dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] - for value in one_cols[1:]: - if value in dataItemNo_dataValue: - append_rows.append(dataItemNo_dataValue[value]) - else: - append_rows.append("") - - workbook = xlrd.open_workbook('定性模型数据项12-11.xlsx') - - # 获取所有sheet的个数 - sheet_count = len(workbook.sheet_names()) - - # 获取所有sheet的名称 - sheet_names = workbook.sheet_names() - - new_workbook = xlwt.Workbook() - for i in range(sheet_count): - # 获取当前sheet - sheet = workbook.sheet_by_index(i) - - # 获取sheet的行数和列数 - row_count = sheet.nrows - 1 - col_count = sheet.ncols - # 获取原有数据 - data = [] - for row in range(row_count): - row_data = [] - for col in range(col_count): - row_data.append(sheet.cell_value(row, col)) - data.append(row_data) - # 创建xlwt的Workbook对象 - # 创建sheet - new_sheet = new_workbook.add_sheet(sheet_names[i]) - - # 将原有的数据写入新的sheet - for row in range(row_count): - for col in range(col_count): - new_sheet.write(row, col, data[row][col]) - - if i == 0: - # 在新的sheet中添加数据 - for col in range(col_count): - new_sheet.write(row_count, col, append_rows[col]) - - # 保存新的xls文件 - new_workbook.save("定性模型数据项12-11.xlsx") - - - - - - - -def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd): - - search_data = { - "funcModule": "数据项", - "funcOperation": "查询", - "data": { - "dateStart": dateStart, - "dateEnd": dateEnd, - "dataItemNoList": dataItemNoList # 数据项编码,代表 brent最低价和最高价 - } - } - - headers = {"Authorization": token} - search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5)) - search_value = json.loads(search_res.text)["data"] - if search_value: - return search_value - else: + logging.error("获取日志 token 失败,响应信息: %s", login_res.text) + print("获取认证失败") + token = None + return token + except Exception as e: + logging.error("获取日志 token 时发生异常: %s", str(e)) return None - -def save_queryDataListItemNos_xls(data_df,dataItemNoList): - - current_year_month = datetime.now().strftime('%Y-%m') - grouped = data_df.groupby("dataDate") +def updateExcelDatabak(date='', token=None): + try: + logging.info("开始备份更新 Excel 数据,日期: %s", date) + workbook = xlrd.open_workbook(read_file_path_name) + sheet = workbook.sheet_by_index(0) + row_data = sheet.row_values(1) + one_cols = row_data + cur_time, cur_time2 = getNow(date) + search_data = { + "data": { + "date": cur_time, + "dataItemNoList": one_cols[1:] + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post( + url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + datas = search_value + else: + datas = None + append_rows = [cur_time2] + dataItemNo_dataValue = {} + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"] + ] = data_value["dataValue"] + for value in one_cols[1:]: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + workbook = xlrd.open_workbook('定性模型数据项12-11.xlsx') + sheet_count = len(workbook.sheet_names()) + sheet_names = workbook.sheet_names() + new_workbook = xlwt.Workbook() + for i in range(sheet_count): + sheet = workbook.sheet_by_index(i) + row_count = sheet.nrows + col_count = sheet.ncols + data = [] + for row in range(row_count): + row_data = [] + for col in range(col_count): + row_data.append(sheet.cell_value(row, col)) + data.append(row_data) + new_sheet = new_workbook.add_sheet(sheet_names[i]) + for row in range(row_count): + for col in range(col_count): + new_sheet.write(row, col, data[row][col]) + if i == 0: + for col in range(col_count): + new_sheet.write(row_count, col, append_rows[col]) + new_workbook.save("定性模型数据项12-11.xlsx") + logging.info("备份更新 Excel 数据完成") + except Exception as e: + logging.error("备份更新 Excel 数据时发生异常: %s", str(e)) - df_old = pd.read_excel('定性模型数据项12-11.xlsx') - df_old0 = df_old[:1] - result_dict = {df_old0.iloc[0][col] : col for col in df_old0.columns} - df_old1 = df_old[1:].copy() - - df_old1["日期"] = pd.to_datetime(df_old1["日期"]) - # 删除日期列为本月的数据 - df_old1 = df_old1[~df_old1["日期"].dt.strftime('%Y-%m').eq(current_year_month)] - df_old1["日期"] = df_old1["日期"].dt.strftime('%Y-%m-%d') +def updateYesterdayExcelData(date='', token=None): + try: + logging.info("开始更新昨天的 Excel 数据,日期: %s", date) + df = pd.read_excel(read_file_path_name, engine='openpyxl') + one_cols = df.iloc[0, :].tolist() + if date == '': + previous_date = (datetime.now() - timedelta(days=1) + ).strftime('%Y-%m-%d') + else: + previous_date = (datetime.strptime(date, "%Y-%m-%d") - + timedelta(days=1)).strftime('%Y-%m-%d') + cur_time, cur_time2 = getNow(previous_date) + search_data = { + "data": { + "date": cur_time, + "dataItemNoList": one_cols[1:] + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post( + url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + datas = search_value + else: + datas = None + append_rows = [cur_time2] + dataItemNo_dataValue = {} + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"] + ] = data_value["dataValue"] + for value in one_cols[1:]: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + print('更新数据前') + print(df.tail(1)) + if previous_date not in df['日期'].values: + new_row = pd.DataFrame([append_rows], columns=df.columns.tolist()) + df = pd.concat([df, new_row], ignore_index=True) + else: + print('日期存在,即将更新') + print('新数据', append_rows[1:]) + df.loc[df['日期'] == previous_date, + df.columns.tolist()[1:]] = append_rows[1:] + print('更新数据后') + print(df.tail(1)) + df.to_excel("定性模型数据项12-11.xlsx", index=False, engine='openpyxl') + logging.info("更新昨天的 Excel 数据完成") + except Exception as e: + logging.error("更新昨天的 Excel 数据时发生异常: %s", str(e)) - list_data = [] - for date, group in grouped: - dict_data = {"日期": date} - for index, row in group.iterrows(): - dict_data[result_dict[row['dataItemNo']]] = row['dataValue'] - list_data.append(dict_data) - - df_current_year_month = pd.DataFrame(list_data) - df_current_year_month - - df_merged = pd.concat([df_old0, df_old1, df_current_year_month], ignore_index=True) - - df_merged.to_excel('定性模型数据项12-11.xlsx', index=False) +def updateExcelData(date='', token=None): + try: + logging.info("开始更新 Excel 数据,日期: %s", date) + df = pd.read_excel(read_file_path_name, engine='openpyxl') + one_cols = df.iloc[0, :].tolist() + cur_time, cur_time2 = getNow(date) + search_data = { + "data": { + "date": cur_time, + "dataItemNoList": one_cols[1:] + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post( + url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + datas = search_value + else: + datas = None + append_rows = [cur_time2] + dataItemNo_dataValue = {} + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"] + ] = data_value["dataValue"] + for value in one_cols[1:]: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + new_row = pd.DataFrame([append_rows], columns=df.columns.tolist()) + df = pd.concat([df, new_row], ignore_index=True) + df.to_excel("定性模型数据项12-11.xlsx", index=False, engine='openpyxl') + logging.info("更新 Excel 数据完成") + except Exception as e: + logging.error("更新 Excel 数据时发生异常: %s", str(e)) +def qualitativeModel(): + try: + logging.info("开始执行定性模型计算") + df = pd.read_excel('定性模型数据项12-11.xlsx') + df = df.fillna(df.ffill()) + df1 = df[-3:-1].reset_index() + if df1.loc[1, '70号沥青开工率'] / 100 > 0.3: + a = -(df1.loc[1, '70号沥青开工率'] / 100 - 0.2)*5/0.1 + else: + a = 0 + b = df1.loc[1, '资金因素'] + df1.loc[1, '昨日计划提货偏差'] = df1.loc[1, '京博产量'] - df1.loc[1, '计划产量'] + if df1.loc[1, '昨日计划提货偏差'] > 0: + c = df1.loc[1, '昨日计划提货偏差']*10/2000 + else: + c = df1.loc[1, '昨日计划提货偏差']*10/3000 + d = (df1.loc[1, '京博产量'] - df1.loc[1, '计划产量']) / 500 * 5 + if df1.loc[1, '基质沥青库存']/265007 > 0.8: + e = (df1.loc[1, '基质沥青库存'] - df1.loc[0, '基质沥青库存'])*10/-5000 + else: + e = 0 + f = 1 + if abs(df1.loc[1, '即期成本'] - df1.loc[0, '即期成本']) >= 100: + g = (df1.loc[1, '即期成本'] - df1.loc[0, '即期成本'])*50/100 + else: + g = 0 + h = df1.loc[1, '订单结构'] + x = round(0.08*a+0*b+0.15*c+0.08*d + 0.03*e + 0.08 * + f + 0.4*g+0.18*h+df1.loc[0, '京博指导价'], 2) + logging.info("定性模型计算完成,预测结果: %s", x) + return x + except Exception as e: + logging.error("定性模型计算时发生异常: %s", str(e)) + return None -def queryDataListItemNos(date=None,token=None): - df = pd.read_excel('定性模型数据项12-11.xlsx') - dataItemNoList = df.iloc[0].tolist()[1:] - if token is None: - token = getLogToken() +def getNow(date='', offset=0): + try: + if date == '': + now = datetime.now() - timedelta(days=offset) + else: + try: + date = datetime.strptime(date, "%Y-%m-%d") + except: + date = datetime.strptime(date, "%Y%m%d") + now = date + year = now.year + month = now.month + day = now.day + if month < 10: + month = "0" + str(month) + if day < 10: + day = "0" + str(day) + cur_time = str(year) + str(month) + str(day) + cur_time2 = str(year) + "-" + str(month) + "-" + str(day) + return cur_time, cur_time2 + except Exception as e: + logging.error("获取当前日期时发生异常: %s", str(e)) + return None, None + + +def pushData(cur_time, x, token_push): + try: + logging.info("开始推送数据,日期: %s,预测值: %s", cur_time, x) + data1 = { + "funcModule": "数据表信息列表", + "funcOperation": "新增", + "data": [ + {"dataItemNo": "C01100036|Forecast_Price|DX|ACN", + "dataDate": cur_time, + "dataStatus": "add", + "dataValue": x + } + ] + } + headers1 = {"Authorization": token_push} + res = requests.post(url=upload_url, headers=headers1, + json=data1, timeout=(3, 5)) + logging.info("数据推送完成,响应信息: %s", res.text) + except Exception as e: + logging.error("数据推送时发生异常: %s", str(e)) + + +def start_2(date='', token=None): + try: + logging.info("开始执行 start_2 函数,日期: %s", date) + workbook = xlrd.open_workbook(read_file_path_name) + sheet = workbook.sheet_by_index(0) + num_rows = sheet.nrows + row_data = sheet.row_values(1) + one_cols = row_data + cur_time, cur_time2 = getNow(date) + search_data = { + "data": { + "date": cur_time, + "dataItemNoList": one_cols[1:] + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post( + url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + datas = search_value + else: + datas = None + append_rows = [cur_time2] + dataItemNo_dataValue = {} + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"] + ] = data_value["dataValue"] + for value in one_cols[1:]: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + workbook = xlrd.open_workbook('定性模型数据项12-11.xlsx') + sheet_count = len(workbook.sheet_names()) + sheet_names = workbook.sheet_names() + new_workbook = xlwt.Workbook() + for i in range(sheet_count): + sheet = workbook.sheet_by_index(i) + row_count = sheet.nrows + col_count = sheet.ncols + data = [] + for row in range(row_count): + row_data = [] + for col in range(col_count): + row_data.append(sheet.cell_value(row, col)) + data.append(row_data) + new_sheet = new_workbook.add_sheet(sheet_names[i]) + for row in range(row_count): + for col in range(col_count): + new_sheet.write(row, col, data[row][col]) + if i == 0: + for col in range(col_count): + new_sheet.write(row_count, col, append_rows[col]) + new_workbook.save("定性模型数据项12-11.xlsx") + update_e_value('定性模型数据项12-11.xlsx', 8, 1000) + df = pd.read_excel('定性模型数据项12-11.xlsx') + df = df.fillna(df.ffill()) + df1 = df[-2:].reset_index() + if df1.loc[1, '70号沥青开工率'] > 30: + a = (df1.loc[1, '70号沥青开工率']-0.2)*5/0.1 + else: + a = 0 + b = df1.loc[1, '资金因素'] + if df1.loc[1, '昨日计划提货偏差'] > 0: + c = df1.loc[1, '昨日计划提货偏差']*10/2000 + else: + c = df1.loc[1, '昨日计划提货偏差']*10/3000 + d = df1.loc[1, '生产情况'] + if df1.loc[1, '基质沥青库存']/265007 > 0.8: + e = (df1.loc[1, '基质沥青库存'] - df1.loc[0, '基质沥青库存'])*10/-5000 + else: + e = 0 + f = 1 + if abs(df1.loc[1, '即期成本'] - df1.loc[0, '即期成本']) >= 100: + g = (df1.loc[1, '即期成本'] - df1.loc[0, '即期成本'])*50/100 + else: + g = 0 + h = df1.loc[1, '订单结构'] + x = round(0.08*a+0*b+0.15*c+0.08*d + 0.03*e + 0.08 * + f + 0.4*g+0.18*h+df1.loc[0, '京博指导价'], 2) + login_res1 = requests.post( + url=login_push_url, json=login_push_data, timeout=(3, 5)) + text1 = json.loads(login_res1.text) + token_push = text1["data"]["accessToken"] + data1 = { + "funcModule": "数据表信息列表", + "funcOperation": "新增", + "data": [ + {"dataItemNo": "C01100036|Forecast_Price|DX|ACN", + "dataDate": cur_time, + "dataStatus": "add", + "dataValue": x + } + ] + } + headers1 = {"Authorization": token_push} + # res = requests.post(url=upload_url, headers=headers1, json=data1, timeout=(3, 5)) + logging.info("start_2 函数执行完成") + except Exception as e: + logging.error("start_2 函数执行时发生异常: %s", str(e)) + + +def start(now=None): + try: + logging.info("开始执行 start 函数") + workbook = xlrd.open_workbook(read_file_path_name) + sheet = workbook.sheet_by_index(0) + num_rows = sheet.nrows + row_data = sheet.row_values(1) + one_cols = row_data + login_res = requests.post( + url=login_url, json=login_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + else: + logging.error("获取认证失败,响应信息: %s", login_res.text) + print("获取认证失败") + token = None + if now is None: + now = datetime.now() + year = now.year + month = now.month + day = now.day + if month < 10: + month = "0" + str(month) + if day < 10: + day = "0" + str(day) + cur_time = str(year) + str(month) + str(day) + cur_time2 = str(year) + "-" + str(month) + "-" + str(day) + search_data = { + "data": { + "date": cur_time, + "dataItemNoList": one_cols[1:] + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post( + url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + datas = search_value + else: + datas = None + append_rows = [cur_time2] + dataItemNo_dataValue = {} + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"] + ] = data_value["dataValue"] + for value in one_cols[1:]: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + workbook = xlrd.open_workbook('定性模型数据项12-11.xlsx') + sheet_count = len(workbook.sheet_names()) + sheet_names = workbook.sheet_names() + new_workbook = xlwt.Workbook() + for i in range(sheet_count): + sheet = workbook.sheet_by_index(i) + row_count = sheet.nrows + col_count = sheet.ncols + data = [] + for row in range(row_count): + row_data = [] + for col in range(col_count): + row_data.append(sheet.cell_value(row, col)) + data.append(row_data) + new_sheet = new_workbook.add_sheet(sheet_names[i]) + for row in range(row_count): + for col in range(col_count): + new_sheet.write(row, col, data[row][col]) + if i == 0: + for col in range(col_count): + new_sheet.write(row_count, col, append_rows[col]) + new_workbook.save("定性模型数据项12-11.xlsx") + update_e_value('定性模型数据项12-11.xlsx', 8, 1000) + df = pd.read_excel('定性模型数据项12-11.xlsx') + df = df.fillna(df.ffill()) + df1 = df[-2:].reset_index() + if df1.loc[1, '70号沥青开工率'] / 100 > 0.3: + a = (df1.loc[1, '70号沥青开工率'] / 100 - 0.2)*5/0.1 + else: + a = 0 + b = df1.loc[1, '资金因素'] + if df1.loc[1, '昨日计划提货偏差'] > 0: + c = df1.loc[1, '昨日计划提货偏差']*10/2000 + else: + c = df1.loc[1, '昨日计划提货偏差']*10/3000 + d = df1.loc[1, '生产情况'] + if df1.loc[1, '基质沥青库存']/265007 > 0.8: + e = (df1.loc[1, '基质沥青库存'] - df1.loc[0, '基质沥青库存'])*10/-5000 + else: + e = 0 + f = 1 + if abs(df1.loc[1, '即期成本'] - df1.loc[0, '即期成本']) >= 100: + g = (df1.loc[1, '即期成本'] - df1.loc[0, '即期成本'])*50/100 + else: + g = 0 + h = df1.loc[1, '订单结构'] + x = round(0.08*a+0*b+0.15*c+0.08*d + 0.03*e + 0.08 * + f + 0.4*g+0.18*h+df1.loc[0, '京博指导价'], 2) + # login_res1 = requests.post(url=login_url, json=login_data, timeout=(3, 30)) + # text1 = json.loads(login_res1.text) + # token_push = text1["data"]["accessToken"] + # data1 = { + # "funcModule": "数据表信息列表", + # "funcOperation": "新增", + # "data": [ + # {"dataItemNo": "C01100036|Forecast_Price|DX|ACN", + # "dataDate": cur_time, + # "dataStatus": "add", + # "dataValue": x + # } + # ] + # } + # headers1 = {"Authorization": token_push} + # res = requests.post(url=upload_url, headers=headers1, json=data1, timeout=(3, 5)) + logging.info("start 函数执行完成") + except Exception as e: + logging.error("start 函数执行时发生异常: %s", str(e)) + + +def start_test(): + try: + logging.info("开始执行 start_test 函数") + workbook = xlrd.open_workbook(read_file_path_name) + sheet = workbook.sheet_by_index(0) + num_rows = sheet.nrows + row_data = sheet.row_values(1) + one_cols = row_data + login_res = requests.post( + url=login_url, json=login_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + else: + logging.error("获取认证失败,响应信息: %s", login_res.text) + print("获取认证失败") + token = None + now = datetime.now() + year = now.year + month = now.month + day = now.day + if month < 10: + month = "0" + str(month) + if day < 10: + day = "0" + str(day) + cur_time = str(year) + str(month) + str(day) + cur_time2 = str(year) + "-" + str(month) + "-" + str(day) + search_data = { + "data": { + "date": cur_time, + "dataItemNoList": one_cols[1:] + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post( + url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + datas = search_value + else: + datas = None + append_rows = [cur_time2] + dataItemNo_dataValue = {} + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"] + ] = data_value["dataValue"] + for value in one_cols[1:]: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + workbook = xlrd.open_workbook('定性模型数据项12-11.xlsx') + sheet_count = len(workbook.sheet_names()) + sheet_names = workbook.sheet_names() + new_workbook = xlwt.Workbook() + for i in range(sheet_count): + sheet = workbook.sheet_by_index(i) + row_count = sheet.nrows + col_count = sheet.ncols + data = [] + for row in range(row_count): + row_data = [] + for col in range(col_count): + row_data.append(sheet.cell_value(row, col)) + data.append(row_data) + new_sheet = new_workbook.add_sheet(sheet_names[i]) + for row in range(row_count): + for col in range(col_count): + new_sheet.write(row, col, data[row][col]) + if i == 0: + for col in range(col_count): + new_sheet.write(row_count, col, append_rows[col]) + new_workbook.save("定性模型数据项12-11.xlsx") + update_e_value('定性模型数据项12-11.xlsx', 8, 1000) + df = pd.read_excel('定性模型数据项12-11.xlsx') + df = df.fillna(df.ffill()) + df1 = df[-2:].reset_index() + if df1.loc[1, '70号沥青开工率'] / 100 > 0.3: + a = (df1.loc[1, '70号沥青开工率'] / 100 - 0.2)*5/0.1 + else: + a = 0 + b = df1.loc[1, '资金因素'] + if df1.loc[1, '昨日计划提货偏差'] > 0: + c = df1.loc[1, '昨日计划提货偏差']*10/2000 + else: + c = df1.loc[1, '昨日计划提货偏差']*10/3000 + d = df1.loc[1, '生产情况'] + if df1.loc[1, '基质沥青库存']/265007 > 0.8: + e = (df1.loc[1, '基质沥青库存'] - df1.loc[0, '基质沥青库存'])*10/-5000 + else: + e = 0 + f = 1 + if abs(df1.loc[1, '即期成本'] - df1.loc[0, '即期成本']) >= 100: + g = (df1.loc[1, '即期成本'] - df1.loc[0, '即期成本'])*50/100 + else: + g = 0 + h = df1.loc[1, '订单结构'] + x = round(0.08*a+0*b+0.15*c+0.08*d + 0.03*e + 0.08 * + f + 0.4*g+0.18*h+df1.loc[0, '京博指导价'], 2) + # login_res1 = requests.post(url=login_url, json=login_data, timeout=(3, 30)) + # text1 = json.loads(login_res1.text) + # token_push = text1["data"]["accessToken"] + # data1 = { + # "funcModule": "数据表信息列表", + # "funcOperation": "新增", + # "data": [ + # {"dataItemNo": "C01100036|Forecast_Price|DX|ACN", + # "dataDate": cur_time, + # "dataStatus": "add", + # "dataValue": x + # } + # ] + # } + # headers1 = {"Authorization": token_push} + # res = requests.post(url=upload_url, headers=headers1, json=data1, timeout=(3, 5)) + logging.info("start_test 函数执行完成") + except Exception as e: + logging.error("start_test 函数执行时发生异常: %s", str(e)) + + +def start_1(): + try: + logging.info("开始执行 start_1 函数") + workbook = xlrd.open_workbook(read_file_path_name) + sheet = workbook.sheet_by_index(0) + num_rows = sheet.nrows + row_data = sheet.row_values(1) + one_cols = row_data + login_res = requests.post( + url=login_url, json=login_data, timeout=(3, 5)) + text = json.loads(login_res.text) + if text["status"]: + token = text["data"]["accessToken"] + else: + logging.error("获取认证失败,响应信息: %s", login_res.text) + print("获取认证失败") + token = None + now = datetime.now() - timedelta(days=1) + year = now.year + month = now.month + day = now.day + if month < 10: + month = "0" + str(month) + if day < 10: + day = "0" + str(day) + cur_time = str(year) + str(month) + str(day) + cur_time2 = str(year) + "-" + str(month) + "-" + str(day) + search_data = { + "data": { + "date": cur_time, + "dataItemNoList": one_cols[1:] + }, + "funcModule": "数据项", + "funcOperation": "查询" + } + headers = {"Authorization": token} + search_res = requests.post( + url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + datas = search_value + else: + datas = None + append_rows = [cur_time2] + dataItemNo_dataValue = {} + for data_value in datas: + if "dataValue" not in data_value: + print(data_value) + dataItemNo_dataValue[data_value["dataItemNo"]] = "" + else: + dataItemNo_dataValue[data_value["dataItemNo"] + ] = data_value["dataValue"] + for value in one_cols[1:]: + if value in dataItemNo_dataValue: + append_rows.append(dataItemNo_dataValue[value]) + else: + append_rows.append("") + workbook = xlrd.open_workbook('定性模型数据项12-11.xlsx') + sheet_count = len(workbook.sheet_names()) + sheet_names = workbook.sheet_names() + new_workbook = xlwt.Workbook() + for i in range(sheet_count): + sheet = workbook.sheet_by_index(i) + row_count = sheet.nrows - 1 + col_count = sheet.ncols + data = [] + for row in range(row_count): + row_data = [] + for col in range(col_count): + row_data.append(sheet.cell_value(row, col)) + data.append(row_data) + new_sheet = new_workbook.add_sheet(sheet_names[i]) + for row in range(row_count): + for col in range(col_count): + new_sheet.write(row, col, data[row][col]) + if i == 0: + for col in range(col_count): + new_sheet.write(row_count, col, append_rows[col]) + new_workbook.save("定性模型数据项12-11.xlsx") + logging.info("start_1 函数执行完成") + except Exception as e: + logging.error("start_1 函数执行时发生异常: %s", str(e)) + + +def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd): + try: + logging.info("开始获取查询数据列表项值,日期范围: %s 至 %s", dateStart, dateEnd) + search_data = { + "funcModule": "数据项", + "funcOperation": "查询", + "data": { + "dateStart": dateStart, + "dateEnd": dateEnd, + "dataItemNoList": dataItemNoList # 数据项编码,代表 brent最低价和最高价 + } + } + headers = {"Authorization": token} + search_res = requests.post( + url=url, headers=headers, json=search_data, timeout=(3, 5)) + search_value = json.loads(search_res.text)["data"] + if search_value: + logging.info("成功获取查询数据列表项值") + return search_value + else: + logging.warning("未获取到查询数据列表项值") + return None + except Exception as e: + logging.error("获取查询数据列表项值时发生异常: %s", str(e)) + return None + + +def save_queryDataListItemNos_xls(data_df, dataItemNoList): + try: + logging.info("开始保存查询数据列表项到 Excel 文件") + current_year_month = datetime.now().strftime('%Y-%m') + grouped = data_df.groupby("dataDate") + df_old = pd.read_excel('定性模型数据项12-11.xlsx') + df_old0 = df_old[:1] + result_dict = {df_old0.iloc[0][col]: col for col in df_old0.columns} + df_old1 = df_old[1:].copy() + df_old1["日期"] = pd.to_datetime(df_old1["日期"]) + df_old1 = df_old1[~df_old1["日期"].dt.strftime( + '%Y-%m').eq(current_year_month)] + df_old1["日期"] = df_old1["日期"].dt.strftime('%Y-%m-%d') + list_data = [] + for date, group in grouped: + dict_data = {"日期": date} + for index, row in group.iterrows(): + dict_data[result_dict[row['dataItemNo']]] = row['dataValue'] + list_data.append(dict_data) + df_current_year_month = pd.DataFrame(list_data) + df_merged = pd.concat( + [df_old0, df_old1, df_current_year_month], ignore_index=True) + df_merged.to_excel('定性模型数据项12-11.xlsx', index=False) + logging.info("保存查询数据列表项到 Excel 文件完成") + except Exception as e: + logging.error("保存查询数据列表项到 Excel 文件时发生异常: %s", str(e)) + + +def queryDataListItemNos(date=None, token=None): + try: + logging.info("开始查询数据列表项,日期: %s", date) + df = pd.read_excel('定性模型数据项12-11.xlsx') + dataItemNoList = df.iloc[0].tolist()[1:] if token is None: - print("获取token失败") - return - - # 获取当前日期 - if date is None: - date = datetime.now() - current_date = date - - # 获取当月1日 - first_day_of_month = current_date.replace(day=1) - - # 格式化为 YYYYMMDD 格式 - dateEnd = current_date.strftime('%Y%m%d') - dateStart = first_day_of_month.strftime('%Y%m%d') - - search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd) - # print("search_value",search_value) - + token = getLogToken() + if token is None: + logging.error("获取token失败") + print("获取token失败") + return + if date is None: + date = datetime.now() + current_date = date + first_day_of_month = current_date.replace(day=1) + dateEnd = current_date.strftime('%Y%m%d') + dateStart = first_day_of_month.strftime('%Y%m%d') + search_value = get_queryDataListItemNos_value( + token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd) + data_df = pd.DataFrame(search_value) + data_df["dataDate"] = pd.to_datetime(data_df["dataDate"]) + data_df["dataDate"] = data_df["dataDate"].dt.strftime('%Y-%m-%d') + save_queryDataListItemNos_xls(data_df, dataItemNoList) + logging.info("查询数据列表项完成") + except Exception as e: + logging.error("查询数据列表项时发生异常: %s", str(e)) data_df = pd.DataFrame(search_value) data_df["dataDate"] = pd.to_datetime(data_df["dataDate"]) data_df["dataDate"] = data_df["dataDate"].dt.strftime('%Y-%m-%d') - save_queryDataListItemNos_xls(data_df,dataItemNoList) + save_queryDataListItemNos_xls(data_df, dataItemNoList) -def main(start_date=None,token=None,token_push=None): - if start_date is None: - start_date = datetime.now() - if token is None: - token = get_head_auth() - if token_push is None: - token_push = get_head_push_auth() - date = start_date.strftime('%Y%m%d') - print(date) - updateExcelData(date,token) - queryDataListItemNos(token=token) - update_e_value('定性模型数据项12-11.xlsx', 8, 1000) - x = qualitativeModel() - print('**************************************************预测结果:',x) - cur_time,cur_time2 = getNow(date) - pushData(cur_time,x,token) + +def main(start_date=None, token=None, token_push=None): + try: + logging.info("开始执行主函数") + if start_date is None: + start_date = datetime.now() + if token is None: + token = get_head_auth() + if token_push is None: + token_push = get_head_push_auth() + date = start_date.strftime('%Y%m%d') + print(date) + logging.info("当前日期: %s", date) + updateExcelData(date, token) + queryDataListItemNos(token=token) + update_e_value('定性模型数据项12-11.xlsx', 8, 1000) + x = qualitativeModel() + if x is not None: + print('**************************************************预测结果:', x) + logging.info("预测结果: %s", x) + cur_time, cur_time2 = getNow(date) + pushData(cur_time, x, token_push) + logging.info("主函数执行完成") + except Exception as e: + logging.error("主函数执行时发生异常: %s", str(e)) if __name__ == "__main__": print("运行中...") + logging.info("程序启动") main() - + logging.info("程序结束") diff --git a/aisenzhecode/石油苯/日度价格预测_最佳模型.pkl b/aisenzhecode/石油苯/日度价格预测_最佳模型.pkl index 2f7e1d4..80fb1a1 100644 Binary files a/aisenzhecode/石油苯/日度价格预测_最佳模型.pkl and b/aisenzhecode/石油苯/日度价格预测_最佳模型.pkl differ diff --git a/aisenzhecode/石油苯/纯苯数据项.xls b/aisenzhecode/石油苯/纯苯数据项.xls index b7039c9..2e56391 100644 Binary files a/aisenzhecode/石油苯/纯苯数据项.xls and b/aisenzhecode/石油苯/纯苯数据项.xls differ diff --git a/aisenzhecode/石油苯/纯苯每日价格预测.py b/aisenzhecode/石油苯/纯苯每日价格预测.py index 5ce59a6..3d6d517 100644 --- a/aisenzhecode/石油苯/纯苯每日价格预测.py +++ b/aisenzhecode/石油苯/纯苯每日价格预测.py @@ -1,8 +1,34 @@ +from statsmodels.tools.eval_measures import mse, rmse +from pandas import Series, DataFrame +import cufflinks as cf +from sklearn.metrics import r2_score +from sklearn.model_selection import train_test_split +from sklearn.metrics import mean_squared_error +import pickle +import warnings +from sklearn.model_selection import GridSearchCV +from sklearn.metrics import mean_absolute_error +from xgboost import plot_importance, plot_tree +import xgboost as xgb +import plotly.graph_objects as go +import plotly.express as px +import statsmodels.api as sm +from xgboost import XGBRegressor +from sklearn.linear_model import Lasso +import sklearn.datasets as datasets +from sklearn import preprocessing +from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot +from plotly import __version__ +import random +import seaborn as sn +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd import requests import json import xlrd import xlwt -from datetime import datetime,timedelta +from datetime import datetime, timedelta import time # 变量定义 login_url = "http://10.200.32.39/jingbo-api/api/server/login" @@ -38,69 +64,29 @@ read_file_path_name = "纯苯数据项.xls" one_cols = [] two_cols = [] -import pandas as pd -import numpy as np -import matplotlib.pyplot as plt -import seaborn as sn -import random -import time +# 导入机器学习算法模型 - - -from plotly import __version__ -from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot - -from sklearn import preprocessing - -from pandas import Series,DataFrame - -import matplotlib.pyplot as plt - -import sklearn.datasets as datasets - -#导入机器学习算法模型 -from sklearn.linear_model import Lasso -from xgboost import XGBRegressor - -import statsmodels.api as sm try: from keras.preprocessing.sequence import TimeseriesGenerator except: from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator -import plotly.express as px -import plotly.graph_objects as go -import xgboost as xgb -from xgboost import plot_importance, plot_tree -from sklearn.metrics import mean_absolute_error -from statsmodels.tools.eval_measures import mse,rmse -from sklearn.model_selection import GridSearchCV -from xgboost import XGBRegressor -import warnings -import pickle +# 切割训练数据和样本数据 -from sklearn.metrics import mean_squared_error - -#切割训练数据和样本数据 -from sklearn.model_selection import train_test_split - -#用于模型评分 -from sklearn.metrics import r2_score +# 用于模型评分 le = preprocessing.LabelEncoder() # print(__version__) # requires version >= 1.9.0 -import cufflinks as cf cf.go_offline() random.seed(100) - # 数据获取 def get_head_auth(): @@ -114,7 +100,7 @@ def get_head_auth(): return None -def get_data_value(token, dataItemNoList,date): +def get_data_value(token, dataItemNoList, date): search_data = { "data": { "date": getNow(date)[0], @@ -124,7 +110,8 @@ def get_data_value(token, dataItemNoList,date): "funcOperation": "查询" } headers = {"Authorization": token} - search_res = requests.post(url=search_url, headers=headers, json=search_data, timeout=(3, 5)) + search_res = requests.post( + url=search_url, headers=headers, json=search_data, timeout=(3, 5)) search_value = json.loads(search_res.text)["data"] if search_value: return search_value @@ -136,10 +123,7 @@ def get_data_value(token, dataItemNoList,date): # xls文件处理 - - - -def write_xls(data,date): +def write_xls(data, date): # 创建一个Workbook对象 workbook = xlwt.Workbook() @@ -155,7 +139,6 @@ def write_xls(data,date): workbook.save(get_cur_time(date)[0] + '.xls') - def getNow(date='', offset=0): """生成指定日期的两种格式字符串 Args: @@ -168,7 +151,7 @@ def getNow(date='', offset=0): tuple: (紧凑日期字符串, 标准日期字符串) """ # 日期解析逻辑 - from datetime import datetime,timedelta + from datetime import datetime, timedelta if isinstance(date, datetime): now = date else: @@ -186,7 +169,7 @@ def getNow(date='', offset=0): # 应用日期偏移 now = now - timedelta(days=offset) - + # 统一格式化输出 date_str = now.strftime("%Y-%m-%d") compact_date = date_str.replace("-", "") @@ -215,7 +198,8 @@ def get_cur_time(date=''): def get_head_push_auth(): - login_res = requests.post(url=login_push_url, json=login_push_data, timeout=(3, 5)) + login_res = requests.post( + url=login_push_url, json=login_push_data, timeout=(3, 5)) text = json.loads(login_res.text) if text["status"]: token = text["data"]["accessToken"] @@ -225,8 +209,7 @@ def get_head_push_auth(): return None - -def upload_data_to_system(token_push,date): +def upload_data_to_system(token_push, date): datavalue = forecast_price() data = { "funcModule": "数据表信息列表", @@ -242,85 +225,85 @@ def upload_data_to_system(token_push,date): } print(data) headers = {"Authorization": token_push} - res = requests.post(url=upload_url, headers=headers, json=data, timeout=(3, 5)) + res = requests.post(url=upload_url, headers=headers, + json=data, timeout=(3, 5)) print(res.text) - - + def forecast_price(): # df_test = pd.read_csv('定价模型数据收集0212.csv') - df_test = pd.read_excel('纯苯数据项.xls',sheet_name='Sheet1') - df_test.drop([0],inplace=True) + df_test = pd.read_excel('纯苯数据项.xls', sheet_name='Sheet1') + df_test.drop([0], inplace=True) # df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True) - df_test['Date']=pd.to_datetime(df_test['Date'], format=r'%Y-%m-%d',infer_datetime_format=True) - + df_test['Date'] = pd.to_datetime( + df_test['Date'], format=r'%Y-%m-%d', infer_datetime_format=True) df_test_1 = df_test - df_test_1=df_test_1.fillna(df_test.ffill()) - df_test_1=df_test_1.fillna(df_test_1.bfill()) + df_test_1 = df_test_1.fillna(df_test.ffill()) + df_test_1 = df_test_1.fillna(df_test_1.bfill()) # 选择用于模型训练的列名称 col_for_training = df_test_1.columns - import joblib Best_model_DalyLGPrice = joblib.load("日度价格预测_最佳模型.pkl") # 最新的一天为最后一行的数据 - + df_test_1_Day = df_test_1.tail(1) # 移除不需要的列 df_test_1_Day.index = df_test_1_Day["Date"] - df_test_1_Day = df_test_1_Day.drop(["Date"], axis= 1) - df_test_1_Day=df_test_1_Day.drop('Price',axis=1) - df_test_1_Day=df_test_1_Day.dropna() + df_test_1_Day = df_test_1_Day.drop(["Date"], axis=1) + df_test_1_Day = df_test_1_Day.drop('Price', axis=1) + df_test_1_Day = df_test_1_Day.dropna() for col in df_test_1_Day.columns: - df_test_1_Day[col] = pd.to_numeric(df_test_1_Day[col],errors='coerce') - #预测今日价格,显示至小数点后两位 - Ypredict_Today=Best_model_DalyLGPrice.predict(df_test_1_Day) + df_test_1_Day[col] = pd.to_numeric(df_test_1_Day[col], errors='coerce') + # 预测今日价格,显示至小数点后两位 + Ypredict_Today = Best_model_DalyLGPrice.predict(df_test_1_Day) - df_test_1_Day['日度预测价格']=Ypredict_Today + df_test_1_Day['日度预测价格'] = Ypredict_Today print(df_test_1_Day['日度预测价格']) a = df_test_1_Day['日度预测价格'] a = a[0] a = float(a) - a = round(a,2) + a = round(a, 2) return a + + def optimize_Model(): from sklearn.model_selection import train_test_split - from sklearn.impute import SimpleImputer + from sklearn.impute import SimpleImputer from sklearn.preprocessing import OrdinalEncoder from sklearn.feature_selection import SelectFromModel from sklearn.metrics import mean_squared_error, r2_score import pandas as pd - pd.set_option('display.max_rows',40) - pd.set_option('display.max_columns',40) + pd.set_option('display.max_rows', 40) + pd.set_option('display.max_columns', 40) df_test = pd.read_excel('纯苯数据项.xls') - df_test.drop([0],inplace=True) + df_test.drop([0], inplace=True) # df_test['Date']=pd.to_datetime(df_test['Date'], format='%m/%d/%Y',infer_datetime_format=True) - df_test['Date']=pd.to_datetime(df_test['Date'], format='%Y-%m-%d',infer_datetime_format=True) + df_test['Date'] = pd.to_datetime( + df_test['Date'], format='%Y-%m-%d', infer_datetime_format=True) - - #将缺失值补为前一个或者后一个数值 + # 将缺失值补为前一个或者后一个数值 df_test_1 = df_test - df_test_1=df_test_1.fillna(df_test.ffill()) - df_test_1=df_test_1.fillna(df_test_1.bfill()) + df_test_1 = df_test_1.fillna(df_test.ffill()) + df_test_1 = df_test_1.fillna(df_test_1.bfill()) df_test_1["Date"] = pd.to_datetime(df_test_1["Date"]) df_test_1.index = df_test_1["Date"] - df_test_1 = df_test_1.drop(["Date"], axis= 1) + df_test_1 = df_test_1.drop(["Date"], axis=1) df_test_1 = df_test_1.astype('float') - - + import numpy as np import pandas as pd - from pandas import Series,DataFrame + from pandas import Series, DataFrame import matplotlib.pyplot as plt import sklearn.datasets as datasets - #导入机器学习算法模型 + # 导入机器学习算法模型 from sklearn.linear_model import Lasso from xgboost import XGBRegressor @@ -336,7 +319,7 @@ def optimize_Model(): import xgboost as xgb from xgboost import plot_importance, plot_tree from sklearn.metrics import mean_absolute_error - from statsmodels.tools.eval_measures import mse,rmse + from statsmodels.tools.eval_measures import mse, rmse from sklearn.model_selection import GridSearchCV from xgboost import XGBRegressor import warnings @@ -344,63 +327,67 @@ def optimize_Model(): from sklearn.metrics import mean_squared_error - #切割训练数据和样本数据 + # 切割训练数据和样本数据 from sklearn.model_selection import train_test_split - #用于模型评分 + # 用于模型评分 from sklearn.metrics import r2_score - dataset1=df_test_1.drop('Price',axis=1)#.astype(float) + dataset1 = df_test_1.drop('Price', axis=1) # .astype(float) - y=df_test_1['Price'] + y = df_test_1['Price'] - x=dataset1 + x = dataset1 train = x target = y - #切割数据样本集合测试集 - X_train,x_test,y_train,y_true = train_test_split(train,target,test_size=0.2,random_state=0) + # 切割数据样本集合测试集 + X_train, x_test, y_train, y_true = train_test_split( + train, target, test_size=0.2, random_state=0) - #模型缩写 - Lasso = Lasso(random_state=0) + # 模型缩写 + Lasso = Lasso(random_state=0) XGBR = XGBRegressor(random_state=0) - #训练模型 - Lasso.fit(X_train,y_train) - XGBR.fit(X_train,y_train) - #模型拟合 + # 训练模型 + Lasso.fit(X_train, y_train) + XGBR.fit(X_train, y_train) + # 模型拟合 y_pre_Lasso = Lasso.predict(x_test) y_pre_XGBR = XGBR.predict(x_test) - #计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R² - Lasso_score = r2_score(y_true,y_pre_Lasso) - XGBR_score=r2_score(y_true,y_pre_XGBR) + # 计算Lasso、XGBR、RandomForestR、AdaBoostR、GradientBoostingR、BaggingRegressor各模型的R² + Lasso_score = r2_score(y_true, y_pre_Lasso) + XGBR_score = r2_score(y_true, y_pre_XGBR) - #计算Lasso、XGBR的MSE和RMSE - Lasso_MSE=mean_squared_error(y_true, y_pre_Lasso) - XGBR_MSE=mean_squared_error(y_true, y_pre_XGBR) + # 计算Lasso、XGBR的MSE和RMSE + Lasso_MSE = mean_squared_error(y_true, y_pre_Lasso) + XGBR_MSE = mean_squared_error(y_true, y_pre_XGBR) - Lasso_RMSE=np.sqrt(Lasso_MSE) - XGBR_RMSE=np.sqrt(XGBR_MSE) + Lasso_RMSE = np.sqrt(Lasso_MSE) + XGBR_RMSE = np.sqrt(XGBR_MSE) # 将不同模型的不同误差值整合成一个表格 model_results = pd.DataFrame([['Lasso', Lasso_RMSE, Lasso_score], ['XgBoost', XGBR_RMSE, XGBR_score]], - columns = ['模型(Model)','均方根误差(RMSE)', 'R^2 score']) - #将模型名称(Model)列设置为索引 - model_results1=model_results.set_index('模型(Model)') + columns=['模型(Model)', '均方根误差(RMSE)', 'R^2 score']) + # 将模型名称(Model)列设置为索引 + model_results1 = model_results.set_index('模型(Model)') model_results1 - #定义plot_feature_importance函数,该函数用于计算特征重要性。此部分代码无需调整 - def plot_feature_importance(importance,names,model_type): + # 定义plot_feature_importance函数,该函数用于计算特征重要性。此部分代码无需调整 + + def plot_feature_importance(importance, names, model_type): feature_importance = np.array(importance) feature_names = np.array(names) - data={'feature_names':feature_names,'feature_importance':feature_importance} + data = {'feature_names': feature_names, + 'feature_importance': feature_importance} fi_df = pd.DataFrame(data) - fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True) + fi_df.sort_values(by=['feature_importance'], + ascending=False, inplace=True) - plt.figure(figsize=(10,8)) + plt.figure(figsize=(10, 8)) sn.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names']) plt.title(model_type + " "+'FEATURE IMPORTANCE') @@ -408,60 +395,56 @@ def optimize_Model(): plt.ylabel('FEATURE NAMES') from pylab import mpl mpl.rcParams['font.sans-serif'] = ['SimHei'] - ## Xgboost 模型参数优化-初步 - #参考: https://juejin.im/post/6844903661013827598 - #每次调参时,备选参数数值以同数量级的1、3、10设置即可(比如设置1、3、10,或0.1、0.3、1.0,或0.01,0.03,0.10即可) + # Xgboost 模型参数优化-初步 + # 参考: https://juejin.im/post/6844903661013827598 + # 每次调参时,备选参数数值以同数量级的1、3、10设置即可(比如设置1、3、10,或0.1、0.3、1.0,或0.01,0.03,0.10即可) from xgboost import XGBRegressor from sklearn.model_selection import GridSearchCV estimator = XGBRegressor(random_state=0, - nthread=4, - seed=0 - ) + nthread=4, + seed=0 + ) parameters = { - 'max_depth': range (2, 11, 2), # 树的最大深度 - 'n_estimators': range (50, 101, 10), # 迭代次数 + 'max_depth': range(2, 11, 2), # 树的最大深度 + 'n_estimators': range(50, 101, 10), # 迭代次数 'learning_rate': [0.01, 0.03, 0.1, 0.3, 0.5, 1] } grid_search_XGB = GridSearchCV( estimator=estimator, param_grid=parameters, - # n_jobs = 10, - cv = 3, + # n_jobs = 10, + cv=3, verbose=True ) grid_search_XGB.fit(X_train, y_train) - #如果电脑在此步骤报错,可能是因为计算量太大,超过硬件可支持程度,可注释掉“n_jobs=10”一行 + # 如果电脑在此步骤报错,可能是因为计算量太大,超过硬件可支持程度,可注释掉“n_jobs=10”一行 best_parameters = grid_search_XGB.best_estimator_.get_params() y_pred = grid_search_XGB.predict(x_test) - op_XGBR_score = r2_score(y_true,y_pred) - op_XGBR_MSE= mean_squared_error(y_true, y_pred) - op_XGBR_RMSE= np.sqrt(op_XGBR_MSE) + op_XGBR_score = r2_score(y_true, y_pred) + op_XGBR_MSE = mean_squared_error(y_true, y_pred) + op_XGBR_RMSE = np.sqrt(op_XGBR_MSE) model_results2 = pd.DataFrame([['Optimized_Xgboost', op_XGBR_RMSE, op_XGBR_score]], - columns = ['模型(Model)', '均方根误差(RMSE)', 'R^2 score']) - model_results2=model_results2.set_index('模型(Model)') + columns=['模型(Model)', '均方根误差(RMSE)', 'R^2 score']) + model_results2 = model_results2.set_index('模型(Model)') # results = model_results1.append(model_results2, ignore_index = False) - results = pd.concat([model_results1,model_results2],ignore_index=True) - - + results = pd.concat([model_results1, model_results2], ignore_index=True) import pickle - Pkl_Filename = "日度价格预测_最佳模型.pkl" + Pkl_Filename = "日度价格预测_最佳模型.pkl" - with open(Pkl_Filename, 'wb') as file: - pickle.dump(grid_search_XGB, file) + with open(Pkl_Filename, 'wb') as file: + pickle.dump(grid_search_XGB, file) - - def read_xls_data(): global one_cols, two_cols # 打开 XLS 文件 @@ -492,9 +475,7 @@ def read_xls_data(): # workbook.close() - - -def start(date=None,token=None,token_push=None): +def start(date=None, token=None, token_push=None): read_xls_data() if date == None: date = getNow()[0] @@ -502,7 +483,7 @@ def start(date=None,token=None,token_push=None): token = get_head_auth() token_push = get_head_push_auth() - datas = get_data_value(token, one_cols,date) + datas = get_data_value(token, one_cols, date) if not datas: print("今天没有新数据") return @@ -516,21 +497,22 @@ def start(date=None,token=None,token_push=None): print(data_value) dataItemNo_dataValue[data_value["dataItemNo"]] = "" else: - dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] - + dataItemNo_dataValue[data_value["dataItemNo"] + ] = data_value["dataValue"] + for value in one_cols: if value in dataItemNo_dataValue: append_rows.append(dataItemNo_dataValue[value]) else: append_rows.append("") save_xls(append_rows) - + # 获取当月的数据写入到指定文件,如果是补充数据,不需要执行 - queryDataListItemNos() + queryDataListItemNos(date=date) # 模型训练 optimize_Model() # 上传预测数据 - upload_data_to_system(token_push,date) + upload_data_to_system(token_push, date) # data_list.append(three_cols) # write_xls(data_list) @@ -542,9 +524,8 @@ def start_1(date=None): token = get_head_auth() if not token: return - - datas = get_data_value(token, one_cols,date=date) + datas = get_data_value(token, one_cols, date=date) # if not datas: # return @@ -556,8 +537,9 @@ def start_1(date=None): print(data_value) dataItemNo_dataValue[data_value["dataItemNo"]] = "" else: - dataItemNo_dataValue[data_value["dataItemNo"]] = data_value["dataValue"] - + dataItemNo_dataValue[data_value["dataItemNo"] + ] = data_value["dataValue"] + for value in one_cols: if value in dataItemNo_dataValue: append_rows.append(dataItemNo_dataValue[value]) @@ -565,10 +547,10 @@ def start_1(date=None): append_rows.append("") save_xls_1(append_rows) - # data_list.append(three_cols) # write_xls(data_list) - + + def save_xls_1(append_rows): # 打开xls文件 @@ -610,11 +592,9 @@ def save_xls_1(append_rows): new_sheet.write(row_count, col, append_rows[col]) # 保存新的xls文件 - new_workbook.save("纯苯数据项.xls") + new_workbook.save("纯苯数据项.xls") + - - - def check_data(dataItemNo): token = get_head_auth() if not token: @@ -669,8 +649,6 @@ def save_xls(append_rows): new_workbook.save("纯苯数据项.xls") - - def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEnd): search_data = { @@ -684,7 +662,8 @@ def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEn } headers = {"Authorization": token} - search_res = requests.post(url=url, headers=headers, json=search_data, timeout=(3, 5)) + search_res = requests.post( + url=url, headers=headers, json=search_data, timeout=(3, 5)) search_value = json.loads(search_res.text)["data"] if search_value: return search_value @@ -692,9 +671,8 @@ def get_queryDataListItemNos_value(token, url, dataItemNoList, dateStart, dateEn return None - -def save_queryDataListItemNos_xls(data_df,dataItemNoList): - from datetime import datetime,timedelta +def save_queryDataListItemNos_xls(data_df, dataItemNoList): + from datetime import datetime, timedelta current_year_month = datetime.now().strftime('%Y-%m') grouped = data_df.groupby("dataDate") @@ -727,7 +705,6 @@ def save_queryDataListItemNos_xls(data_df,dataItemNoList): # 创建sheet new_sheet = new_workbook.add_sheet(sheet_names[i]) - current_year_month_row = 0 # 将原有的数据写入新的sheet for row in range(row_count): @@ -739,29 +716,28 @@ def save_queryDataListItemNos_xls(data_df,dataItemNoList): break new_sheet.write(row, col, data[row][col]) - # print("current_year_month_row",current_year_month_row) if i == 0: rowFlag = 0 # 查看每组数据 for date, group in grouped: - new_sheet.write(row_count + rowFlag - current_year_month_row, 0, date) + new_sheet.write(row_count + rowFlag - + current_year_month_row, 0, date) for j in range(len(dataItemNoList)): dataItemNo = dataItemNoList[j] if group[group["dataItemNo"] == dataItemNo]["dataValue"].values and (not str(group[group["dataItemNo"] == dataItemNo]["dataValue"].values[0]) == 'nan'): - new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, group[group["dataItemNo"] == dataItemNo]["dataValue"].values[0]) + new_sheet.write(row_count + rowFlag - current_year_month_row, j + 1, + group[group["dataItemNo"] == dataItemNo]["dataValue"].values[0]) rowFlag += 1 - # 保存新的xls文件 new_workbook.save("纯苯数据项.xls") - -def queryDataListItemNos(date=None,token=None): +def queryDataListItemNos(date=None, token=None): from datetime import datetime, timedelta df = pd.read_excel('纯苯数据项.xls') dataItemNoList = df.iloc[0].tolist()[1:] @@ -779,16 +755,29 @@ def queryDataListItemNos(date=None,token=None): first_day_of_month = current_date.replace(day=1) # 格式化为 YYYYMMDD 格式 dateEnd = current_date.strftime('%Y%m%d') + # dateEnd = date.strftime('%Y%m%d') dateStart = first_day_of_month.strftime('%Y%m%d') - search_value = get_queryDataListItemNos_value(token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd) + search_value = get_queryDataListItemNos_value( + token, queryDataListItemNos_url, dataItemNoList, dateStart, dateEnd) data_df = pd.DataFrame(search_value) data_df["dataDate"] = pd.to_datetime(data_df["dataDate"]) data_df["dataDate"] = data_df["dataDate"].dt.strftime('%Y-%m-%d') - save_queryDataListItemNos_xls(data_df,dataItemNoList) + save_queryDataListItemNos_xls(data_df, dataItemNoList) print('当月数据更新完成') if __name__ == "__main__": print('运行中ing') start() + # 自定义日期执行预测 + # start_date = datetime(2025, 7, 6) + # end_date = datetime(2025, 7, 7) + # token = get_head_auth() + # token_push = get_head_push_auth() + # while start_date < end_date: + # print(start_date.strftime('%Y%m%d')) + # start(start_date, token, token_push) + # time.sleep(2) + # # start_1(start_date) + # start_date += timedelta(days=1)