Time Series & Feature Engineering Questions

rpleanerrpleaner MemberPosts:5Contributor I
edited December 2018 inHelp

Hi,guys

I am trying to solve a sales forcast problem: there is a monthly sales table(attribute:time,sales) and a consumer record table(attribute:order time,A(id),B,C,D) , assuming that sales are related to the consumer's attribute, How should I creat proper feature as input to build a timeseries model to make predictions?

there are many ways to create input by counting instances in different dimensions as input. as follows

var1 =count A when B=b1,

var2 =count A when B=b2

var3 = count A when B=b1,C=c1,D=d1

var4 = count A when B=b1,C=c1,D=d1

How to select proper input for time series prediction from these variable?Is this the right way to creat feature?

Anybody have any ideas? Appreciate a lot for any tips! Would u mind looking at this?:p@Thomas_Ott

Tagged:

Answers

  • Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, MemberPosts:1,761Unicorn

    @rpleanerdo you have a sample process to share?

  • rpleanerrpleaner MemberPosts:5Contributor I

    I try to build simlar input tables using Generate Data Operators in Rapidminer as follows.

    Supposing that customers' attributes are related to monthly sales, how to create proper input for time series forcasting of next month's sales ?








    <运营商激活= " true " class = "process" compatibility="5.3.015" expanded="true" name="Process">

    <运营商激活= " true " class = "generate_nominal_data" compatibility="5.3.015" expanded="true" height="60" name="Generate Nominal Data" width="90" x="45" y="210"/>
    <运营商激活= " true " class = "generate_id" compatibility="5.3.015" expanded="true" height="76" name="Generate ID" width="90" x="179" y="210"/>
    <运营商激活= " true " class = "rename" compatibility="5.3.015" expanded="true" height="76" name="Rename" width="90" x="313" y="210">




    <运营商激活= " true " class = "set_role" compatibility="5.3.015" expanded="true" height="76" name="Set Role" width="90" x="447" y="210">



    <运营商激活= " true " class = "generate_sales_data" compatibility="5.3.015" expanded="true" height="60" name="Generate Sales Data" width="90" x="45" y="30"/>
    <运营商激活= " true " class = "date_to_nominal" compatibility="5.3.015" expanded="true" height="76" name="Date to Nominal" width="90" x="179" y="30">



    <运营商激活= " true " class = "generate_attributes" compatibility="5.3.015" expanded="true" height="76" name="Generate Attributes" width="90" x="313" y="30">




    <运营商激活= " true " class = "join" compatibility="5.3.015" expanded="true" height="76" name="Join" width="90" x="581" y="120">






    <运营商激活= " true " class = "aggregate" compatibility="5.3.015" expanded="true" height="76" name="custom" width="90" x="715" y="120">





    <运营商激活= " true " class = "aggregate" compatibility="5.3.015" expanded="true" height="76" name="monthly sales" width="90" x="514" y="30">





    <连接从_op="Generate Nominal Data" from_port="output" to_op="Generate ID" to_port="example set input"/>
    <连接从_op="Generate ID" from_port="example set output" to_op="Rename" to_port="example set input"/>
    <连接从_op="Rename" from_port="example set output" to_op="Set Role" to_port="example set input"/>
    <连接从_op="Set Role" from_port="example set output" to_op="Join" to_port="right"/>
    <连接从_op="Generate Sales Data" from_port="output" to_op="Date to Nominal" to_port="example set input"/>
    <连接从_op="Date to Nominal" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
    <连接从_op="Generate Attributes" from_port="example set output" to_op="monthly sales" to_port="example set input"/>
    <连接从_op="Generate Attributes" from_port="original" to_op="Join" to_port="left"/>
    <连接从_op="Join" from_port="join" to_op="custom" to_port="example set input"/>
    <连接从_op="custom" from_port="example set output" to_port="result 2"/>
    <连接从_op="monthly sales" from_port="example set output" to_port="result 1"/>







    @Thomas_Ott

  • Thomas_OttThomas_Ott RapidMiner Certified Analyst, RapidMiner Certified Expert, MemberPosts:1,761Unicorn

    @rpleaneryou can try something like this:







    <运营商激活= " true " class = "process" compatibility="6.0.002" expanded="true" name="Process">

    <运营商激活= " true " class = "generate_nominal_data" compatibility="8.1.001" expanded="true" height="68" name="Generate Nominal Data" width="90" x="45" y="210"/>
    <运营商激活= " true " class = "generate_id" compatibility="8.1.001" expanded="true" height="82" name="Generate ID" width="90" x="179" y="210"/>
    <运营商激活= " true " class = "rename" compatibility="8.1.001" expanded="true" height="82" name="Rename" width="90" x="313" y="210">




    <运营商激活= " true " class = "set_role" compatibility="8.1.001" expanded="true" height="82" name="Set Role" width="90" x="447" y="210">



    <运营商激活= " true " class = "generate_sales_data" compatibility="8.1.001" expanded="true" height="68" name="Generate Sales Data" width="90" x="45" y="30"/>
    <运营商激活= " true " class = "date_to_nominal" compatibility="8.1.001" expanded="true" height="82" name="Date to Nominal" width="90" x="179" y="30">



    <运营商激活= " true " class = "generate_attributes" compatibility="6.4.000" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="30">




    <运营商激活= " true " class = "join" compatibility="8.1.001" expanded="true" height="82" name="Join" width="90" x="581" y="120">





    <运营商激活= " true " class = "series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing" width="90" x="715" y="136">



    <参数键=“地平线”值= " 5 " / >

    <运营商激活= " true " class = "series:windowing" compatibility="7.4.000" expanded="true" height="82" name="Windowing (2)" width="90" x="849" y="238">








    <运营商激活= " true " class = "aggregate" compatibility="6.0.006" expanded="true" height="82" name="monthly sales" width="90" x="581" y="34">





    <运营商激活= " true " class = "series:sliding_window_validation" compatibility="7.4.000" expanded="true" height="145" name="Validation" width="90" x="983" y="85">



    <参数键=“地平线”值= " 5 " / >

    <运营商激活= " true " class = "h2o:generalized_linear_model" compatibility="7.6.001" expanded="true" height="124" name="Generalized Linear Model" width="90" x="112" y="34">



    <连接从_port="training" to_op="Generalized Linear Model" to_port="training set"/>
    <连接从_op="Generalized Linear Model" from_port="model" to_port="model"/>





    <运营商激活= " true " class = "apply_model" compatibility="8.1.001" expanded="true" height="82" name="Apply Model" width="90" x="112" y="34">


    <运营商激活= " true " class = "performance_regression" compatibility="8.1.001" expanded="true" height="82" name="Performance" width="90" x="313" y="34"/>
    <连接从_port="model" to_op="Apply Model" to_port="model"/>
    <连接从_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
    <连接从_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
    <连接从_op="Performance" from_port="performance" to_port="averagable 1"/>








    <运营商激活= " true " class = "apply_model" compatibility="8.1.001" expanded="true" height="82" name="Apply Model (2)" width="90" x="1117" y="289">


    <连接从_op="Generate Nominal Data" from_port="output" to_op="Generate ID" to_port="example set input"/>
    <连接从_op="Generate ID" from_port="example set output" to_op="Rename" to_port="example set input"/>
    <连接从_op="Rename" from_port="example set output" to_op="Set Role" to_port="example set input"/>
    <连接从_op="Set Role" from_port="example set output" to_op="Join" to_port="right"/>
    <连接从_op="Generate Sales Data" from_port="output" to_op="Date to Nominal" to_port="example set input"/>
    <连接从_op="Date to Nominal" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
    <连接从_op="Generate Attributes" from_port="example set output" to_op="monthly sales" to_port="example set input"/>
    <连接从_op="Generate Attributes" from_port="original" to_op="Join" to_port="left"/>
    <连接from_op = "加入" from_port = "加入" to_op = " Windowing" to_port="example set input"/>
    <连接从_op="Windowing" from_port="example set output" to_op="Validation" to_port="training"/>
    <连接从_op="Windowing" from_port="original" to_op="Windowing (2)" to_port="example set input"/>
    <连接从_op="Windowing (2)" from_port="example set output" to_op="Apply Model (2)" to_port="unlabelled data"/>
    <连接从_op="monthly sales" from_port="example set output" to_port="result 1"/>
    <连接从_op="Validation" from_port="model" to_op="Apply Model (2)" to_port="model"/>
    <连接从_op="Validation" from_port="averagable 1" to_port="result 2"/>
    <连接从_op="Apply Model (2)" from_port="labelled data" to_port="result 3"/>








Sign InorRegisterto comment.