原油特征处理,删除手工数据
This commit is contained in:
parent
757bc5317a
commit
061d38a621
Binary file not shown.
Binary file not shown.
@ -20,8 +20,8 @@ plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
|
||||
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
|
||||
|
||||
from datetime import timedelta
|
||||
# from config_jingbo import *
|
||||
from config_juxiting import *
|
||||
from config_jingbo import *
|
||||
# from config_juxiting import *
|
||||
from sklearn import metrics
|
||||
from reportlab.pdfbase import pdfmetrics # 注册字体
|
||||
from reportlab.pdfbase.ttfonts import TTFont # 字体类
|
||||
@ -473,9 +473,8 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
||||
values = values + [column, last_update_time,0]
|
||||
# 计算特征数据值的时间差
|
||||
try:
|
||||
time_diff = (df1[column].dropna().index.to_series().diff().mode()[0]).total_seconds() / 3600 / 24
|
||||
|
||||
# 计算预警日期
|
||||
time_diff = (df1[column].dropna().index.to_series().diff().mode()[0]).total_seconds() / 3600 / 24
|
||||
from datetime import timedelta
|
||||
early_warning_date = datetime.datetime.strptime(last_update_time, '%Y-%m-%d') + timedelta(days=time_diff)*2
|
||||
early_warning_date = early_warning_date.strftime('%Y-%m-%d')
|
||||
@ -492,9 +491,9 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
||||
|
||||
|
||||
logger.info(f'删除两月不更新特征前数据量:{df.shape}')
|
||||
# 去掉近最后数据对应的日期在两月以前的列,删除近2月的数据是常熟的列
|
||||
# 去掉近最后数据对应的日期在六月以前的列,删除近2月的数据是常熟的列
|
||||
current_date = datetime.datetime.now()
|
||||
two_months_ago = current_date - timedelta(days=40)
|
||||
two_months_ago = current_date - timedelta(days=180)
|
||||
|
||||
def check_column(col_name):
|
||||
'''
|
||||
@ -507,8 +506,11 @@ def datachuli(df_zhibiaoshuju,df_zhibiaoliebiao,datecol='date',end_time='',y='y'
|
||||
df_check_column = df[['ds',col_name]]
|
||||
df_check_column = df_check_column.dropna()
|
||||
if len(df_check_column) == 0:
|
||||
print(f'空值列:{col_name}')
|
||||
return True
|
||||
if df_check_column[(df_check_column['ds']>= two_months_ago)].groupby(col_name).ngroups < 2:
|
||||
# 判断是不是常数列
|
||||
if df_check_column[(df_check_column['ds']>= two_months_ago)].groupby(col_name).ngroups < 2:
|
||||
print(f'180没有更新:{col_name}')
|
||||
return True
|
||||
corresponding_date = df_check_column.iloc[-1]['ds']
|
||||
return corresponding_date < two_months_ago
|
||||
@ -1181,7 +1183,7 @@ class EtaReader():
|
||||
'''
|
||||
|
||||
# 构建新的DataFrame df df1
|
||||
df = pd.DataFrame(columns=['指标分类', '指标名称', '指标id', '频度'])
|
||||
df = pd.DataFrame(columns=['指标分类', '指标名称', '指标id', '频度','指标来源','来源id'])
|
||||
df1 = pd.DataFrame(columns=['DataTime'])
|
||||
|
||||
|
||||
@ -1225,16 +1227,27 @@ class EtaReader():
|
||||
EdbCode = i.get('EdbCode')
|
||||
EdbName = i.get('EdbName') # 指标名称,要保存到df2的指标名称列,df的指标名称列
|
||||
Frequency = i.get('Frequency') # 频度,要保存到df的频度列
|
||||
SourceName = i.get('SourceName') # 来源名称,要保存到df的频度列
|
||||
Source = i.get('Source') # 来源ID,要保存到df的频度列
|
||||
# 频度不是 日 或者 周的 跳过
|
||||
if Frequency not in ['日度','周度','日','周']:
|
||||
continue
|
||||
|
||||
|
||||
# 只保留手工数据中,名称带有 海运出口 海运进口
|
||||
if Source == 9 and not ('海运出口' in EdbName or '海运进口' in EdbName):
|
||||
continue
|
||||
|
||||
# 不要wind数据
|
||||
if Source == 2:
|
||||
continue
|
||||
|
||||
|
||||
# 判断名称是否需要保存
|
||||
isSave = self.filter_yuanyou_data(ClassifyName,EdbName)
|
||||
if isSave:
|
||||
# 保存到df
|
||||
# 保存频度 指标名称 分类 指标id 到 df
|
||||
df2 = pd.DataFrame({'指标分类': ClassifyName, '指标名称': EdbName, '指标id': EdbCode, '频度': Frequency},index=[0])
|
||||
df2 = pd.DataFrame({'指标分类': ClassifyName, '指标名称': EdbName, '指标id': EdbCode, '频度': Frequency,'指标来源':SourceName,'来源id':Source},index=[0])
|
||||
|
||||
# df = pd.merge(df, df2, how='outer')
|
||||
df = pd.concat([df, df2])
|
||||
@ -1255,7 +1268,7 @@ class EtaReader():
|
||||
itemname = item
|
||||
|
||||
df1 = self.edbcodegetdata(df1,item,itemname)
|
||||
df = pd.concat([df, pd.DataFrame({'指标分类': '其他', '指标名称': itemname, '指标id': item, '频度': '其他'},index=[0])])
|
||||
df = pd.concat([df, pd.DataFrame({'指标分类': '其他', '指标名称': itemname, '指标id': item, '频度': '其他','指标来源':'其他','来源id':'其他'},index=[0])])
|
||||
|
||||
# 按时间排序
|
||||
df1.sort_values('DataTime',inplace=True,ascending=False)
|
||||
|
23086
logs/pricepredict.log.1
23086
logs/pricepredict.log.1
File diff suppressed because it is too large
Load Diff
12047
logs/pricepredict.log.2
12047
logs/pricepredict.log.2
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
11449
logs/pricepredict.log.4
11449
logs/pricepredict.log.4
File diff suppressed because one or more lines are too long
11888
logs/pricepredict.log.5
11888
logs/pricepredict.log.5
File diff suppressed because one or more lines are too long
@ -1920,7 +1920,7 @@ def pp_export_pdf_v1(num_indicators=475,num_models=21, num_dayindicator=202,inpu
|
||||
|
||||
### 添加标题
|
||||
content.append(Graphs.draw_title(f'{y}{time}预测报告'))
|
||||
|
||||
|
||||
### 预测结果
|
||||
content.append(Graphs.draw_little_title('一、预测结果:'))
|
||||
# 添加图片
|
||||
|
Loading…
Reference in New Issue
Block a user