Attribute already present - Extract Cluster Prototypes
missbucherle
MemberPosts:1Newbie
inHelp
Hi there !
I am still very new to RapidMiner without any background in analytics at all. So this is probably a stupid question - still I would very much appreciate your help!
For my thesis I want to textmine and cluster a series of documents. (In the end I want to create a bubble diagram with different clusters of variabled discussed in the source files)
I created the attached process (according to an approach in a similar thesis) but struggle with the operator:ExtractClPrototypes
下列错误显示:
Any help about how to solve that error will be greatly appreciated.
Thank you in advance !!
The process looks like this:
I am still very new to RapidMiner without any background in analytics at all. So this is probably a stupid question - still I would very much appreciate your help!
For my thesis I want to textmine and cluster a series of documents. (In the end I want to create a bubble diagram with different clusters of variabled discussed in the source files)
I created the attached process (according to an approach in a similar thesis) but struggle with the operator:ExtractClPrototypes
下列错误显示:
Any help about how to solve that error will be greatly appreciated.
Thank you in advance !!
The process looks like this:
<自由进出rator activated="true" class="process" compatibility="9.5.001" expanded="true" name="Process">
<参数key="logverbosity" value="init"/>
<参数key="random_seed" value="2001"/>
<参数key="send_mail" value="never"/>
<参数key="notification_email" value=""/>
<参数key="process_duration_for_mail" value="30"/>
<参数key="encoding" value="SYSTEM"/>
<自由进出rator activated="true" class="text:process_document_from_file" compatibility="8.2.000" expanded="true" height="82" name="Process Documents from Files" width="90" x="45" y="34">
<参数key="test" value="C:\Users\anchb\Desktop\MA\Literatur\Neuer Ordner"/>
<参数key="file_pattern" value="*"/>
<参数key="extract_text_only" value="true"/>
<参数key="use_file_extension_as_type" value="true"/>
<参数key="content_type" value="txt"/>
<参数key="encoding" value="SYSTEM"/>
<参数key="create_word_vector" value="true"/>
<参数key="vector_creation" value="Term Frequency"/>
<参数key="add_meta_information" value="true"/>
<参数key="keep_text" value="false"/>
<参数key="prune_method" value="percentual"/>
<参数key="prune_below_percent" value="20.0"/>
<参数key="prune_above_percent" value="100.0"/>
<参数key="prune_below_rank" value="0.05"/>
<参数key="prune_above_rank" value="0.95"/>
<参数key="datamanagement" value="double_sparse_array"/>
<参数key="data_management" value="auto"/>
<自由进出rator activated="true" class="text:tokenize" compatibility="8.2.000" expanded="true" height="68" name="Tokenize" width="90" x="45" y="34">
<参数key="mode" value="non letters"/>
<参数key="characters" value=".:"/>
<参数key="language" value="English"/>
<参数key="max_token_length" value="3"/>
<自由进出rator activated="true" class="text:tokenize" compatibility="8.2.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="45" y="136">
<参数key="mode" value="linguistic sentences"/>
<参数key="characters" value=".:"/>
<参数key="language" value="English"/>
<参数key="max_token_length" value="3"/>
<自由进出rator activated="true" class="text:transform_cases" compatibility="8.2.000" expanded="true" height="68" name="Transform Cases" width="90" x="45" y="238">
<参数key="transform_to" value="lower case"/>
<自由进出rator activated="true" class="text:filter_stopwords_english" compatibility="8.2.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="179" y="238"/>
<自由进出rator activated="true" class="text:stem_porter" compatibility="8.2.000" expanded="true" height="68" name="Stem (Porter)" width="90" x="313" y="238"/>
<自由进出rator activated="true" class="text:filter_by_length" compatibility="8.2.000" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="447" y="238">
<参数key="min_chars" value="4"/>
<参数key="max_chars" value="25"/>
<自由进出rator activated="true" class="text:generate_n_grams_terms" compatibility="8.2.000" expanded="true" height="68" name="Generate n-Grams (Terms)" width="90" x="447" y="136">
<参数key="max_length" value="2"/>
<自由进出rator activated="true" class="text:wordlist_to_data" compatibility="8.2.000" expanded="true" height="82" name="WordList to Data" width="90" x="179" y="340"/>
<自由进出rator activated="true" class="write_excel" compatibility="9.5.001" expanded="true" height="103" name="Write Excel (5)" width="90" x="313" y="340">
<参数key="excel_file" value="C:\Users\anchb\Desktop\MA\RapidMiner\Test.xlsx"/>
<参数key="file_format" value="xlsx"/>
<参数key="sheet_name" value="RapidMiner Data"/>
<参数key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
<参数key="number_format" value="#.0"/>
<参数key="encoding" value="SYSTEM"/>
<自由进出rator activated="true" class="multiply" compatibility="9.5.001" expanded="true" height="103" name="Multiply" width="90" x="179" y="34"/>
<自由进出rator activated="true" class="concurrency:k_means" compatibility="9.5.001" expanded="true" height="82" name="Clustering" width="90" x="313" y="136">
<参数key="add_cluster_attribute" value="true"/>
<参数key="add_as_label" value="false"/>
<参数key="remove_unlabeled" value="false"/>
<参数key="k" value="5"/>
<参数key="max_runs" value="10"/>
<参数key="determine_good_start_values" value="false"/>
<参数key="measure_types" value="NumericalMeasures"/>
<参数key="mixed_measure" value="MixedEuclideanDistance"/>
<参数key="nominal_measure" value="NominalDistance"/>
<参数key="numerical_measure" value="CosineSimilarity"/>
<参数key="divergence" value="SquaredEuclideanDistance"/>
<参数key="kernel_type" value="radial"/>
<参数key="kernel_gamma" value="1.0"/>
<参数key="kernel_sigma1" value="1.0"/>
<参数key="kernel_sigma2" value="0.0"/>
<参数key="kernel_sigma3" value="2.0"/>
<参数key="kernel_degree" value="3.0"/>
<参数key="kernel_shift" value="1.0"/>
<参数key="kernel_a" value="1.0"/>
<参数键= " kernel_b " value = " 0.0 " / >
<参数key="max_optimization_steps" value="100"/>
<参数key="use_local_random_seed" value="false"/>
<参数key="local_random_seed" value="1992"/>
<自由进出rator activated="true" class="multiply" compatibility="9.5.001" expanded="true" height="103" name="Multiply (2)" width="90" x="447" y="136"/>
<自由进出rator activated="true" class="extract_prototypes" compatibility="9.5.001" expanded="true" height="82" name="Extract Cluster Prototypes" width="90" x="581" y="136"/>
<自由进出rator activated="true" class="write_excel" compatibility="9.5.001" expanded="true" height="103" name="Write Excel (2)" width="90" x="715" y="136">
<参数key="excel_file" value="C:\Users\anchb\Desktop\MA\RapidMiner\Microsoft Excel-Arbeitsblatt (neu) (4).xlsx"/>
<参数key="file_format" value="xlsx"/>
<参数key="sheet_name" value="RapidMiner Data"/>
<参数key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
<参数key="number_format" value="#.0"/>
<参数key="encoding" value="SYSTEM"/>
<自由进出rator activated="true" class="cluster_distance_performance" compatibility="9.5.001" expanded="true" height="103" name="Performance" width="90" x="581" y="238">
<参数key="main_criterion" value="Avg. within centroid distance"/>
<参数key="main_criterion_only" value="false"/>
<参数key="normalize" value="false"/>
<参数key="maximize" value="false"/>
<自由进出rator activated="true" class="write_excel" compatibility="9.5.001" expanded="true" height="103" name="Write Excel (4)" width="90" x="581" y="340">
<参数key="excel_file" value="C:\Users\anchb\Desktop\MA\RapidMiner\Microsoft Excel-Arbeitsblatt (neu) (2).xlsx"/>
<参数key="file_format" value="xlsx"/>
<参数key="sheet_name" value="RapidMiner Data"/>
<参数key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
<参数key="number_format" value="#.0"/>
<参数key="encoding" value="SYSTEM"/>
<操作符= " true " class = " performance_to_d激活ata" compatibility="9.5.001" expanded="true" height="82" name="Performance to Data" width="90" x="715" y="238"/>
<自由进出rator activated="true" class="write_excel" compatibility="9.5.001" expanded="true" height="103" name="Write Excel (3)" width="90" x="715" y="340">
<参数key="excel_file" value="C:\Users\anchb\Desktop\MA\RapidMiner\Microsoft Excel-Arbeitsblatt (neu) (3).xlsx"/>
<参数key="file_format" value="xlsx"/>
<参数key="sheet_name" value="RapidMiner Data"/>
<参数key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
<参数key="number_format" value="#.0"/>
<参数key="encoding" value="SYSTEM"/>
<自由进出rator activated="true" class="data_to_similarity" compatibility="9.5.001" expanded="true" height="82" name="Data to Similarity" width="90" x="313" y="34">
<参数key="measure_types" value="MixedMeasures"/>
<参数key="mixed_measure" value="MixedEuclideanDistance"/>
<参数key="nominal_measure" value="NominalDistance"/>
<参数key="numerical_measure" value="EuclideanDistance"/>
<参数key="divergence" value="GeneralizedIDivergence"/>
<参数key="kernel_type" value="radial"/>
<参数key="kernel_gamma" value="1.0"/>
<参数key="kernel_sigma1" value="1.0"/>
<参数key="kernel_sigma2" value="0.0"/>
<参数key="kernel_sigma3" value="2.0"/>
<参数key="kernel_degree" value="3.0"/>
<参数key="kernel_shift" value="1.0"/>
<参数key="kernel_a" value="1.0"/>
<参数键= " kernel_b " value = " 0.0 " / >
<自由进出rator activated="true" class="similarity_to_data" compatibility="9.5.001" expanded="true" height="82" name="Similarity to Data" width="90" x="447" y="34">
<参数key="table_type" value="long_table"/>
<自由进出rator activated="true" class="write_excel" compatibility="9.5.001" expanded="true" height="103" name="Write Excel" width="90" x="581" y="34">
<参数key="excel_file" value="C:\Users\anchb\Desktop\MA\RapidMiner\Microsoft Excel-Arbeitsblatt (neu).xlsx"/>
<参数key="file_format" value="xlsx"/>
<参数key="sheet_name" value="RapidMiner Data"/>
<参数key="date_format" value="yyyy-MM-dd HH:mm:ss"/>
<参数key="number_format" value="#.0"/>
<参数key="encoding" value="SYSTEM"/>
1
Answers