"Problem with classifier using SVM, vector/ exampleset"
Helo,i'm new with rapidminer and i have problems with my application,
i want to build a simple 2class text classifier with svm.. and then try to develop it.
i may not know how to build a example set from word vector...
PS: I bought the "How to extend rapidminer paper" but i only need to use existent methods
I read from "Integrating RapidMiner into your application"
I read other posts from forum (or i may have missed something)
if "ExamplesetWriter" and the "Process files from documents" components can transform from word vector to exampleset, why can't i do in java?
i hope for some quick answers what shoud i do..
Thanks
i get the msg on console:
import java.io.File;
import java.io.FileWriter;
import edu.udo.cs.wvtool.config.WVTConfiguration;
import edu.udo.cs.wvtool.config.WVTConfigurationFact;
import edu.udo.cs.wvtool.config.WVTConfigurationRule;
import edu.udo.cs.wvtool.generic.output.WordVectorWriter;
import edu.udo.cs.wvtool.generic.stemmer.DummyStemmer;
import edu.udo.cs.wvtool.generic.stemmer.LovinsStemmerWrapper;
import edu.udo.cs.wvtool.generic.stemmer.PorterStemmerWrapper;
import edu.udo.cs.wvtool.generic.stemmer.WVTStemmer;
import edu.udo.cs.wvtool.generic.tokenizer.NGramTokenizer;
import edu.udo.cs.wvtool.generic.tokenizer.WVTTokenizer;
import edu.udo.cs.wvtool.generic.vectorcreation.TFIDF;
import edu.udo.cs.wvtool.main.WVTDocumentInfo;
import edu.udo.cs.wvtool.main.WVTFileInputList;
import edu.udo.cs.wvtool.main.WVTWordVector;
import edu.udo.cs.wvtool.main.WVTool;
import edu.udo.cs.wvtool.wordlist.WVTWordList;
import com.rapidminer.*;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.ModelApplier;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorChain;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.learner.Learner;
import com.rapidminer.operator.learner.functions.kernel.LibSVMLearner;
import com.rapidminer.tools.OperatorService;
import java.io.IOException;
/**
* An example program on how to use the word vector tool.
*
*@authorMichael Wurst
*@version$Id$
*
*/
public class Test {
/*
public void SVMLearner(){
Learner learner = (Learner)OperatorService.createOperator(LibSVMLearner.class);
learner.
//((Operator)learner).setParameter(LibSVMLearner.PARAMETER_SVM_TYPE, new Integer(LibSVMLearner.SVM_TYPE_C_SVC).toString());
//((Operator)learner).setParameter(LibSVMLearner.PARAMETER_KERNEL_TYPE, "0");//linear
//((Operator)learner).setParameter(LibSVMLearner.PARAMETER_EPSILON, "0.001");
//((Operator)learner).setParameter(LibSVMLearner.PARAMETER_C, "0.0");
//((Operator)learner).setParameter(LibSVMLearner.PARAMETER_P, "0.1");
//((Operator)learner).setParameter(LibSVMLearner.PARAMETER_CONFIDENCE_FOR_MULTICLASS, "true");
model = learner.learn("c:/hjh.txt");
}*/
public static void main(String[] args) throws Exception {
//set properties to point to plugin directory
String pluginDirString = new File("D:\\Data\\Software\\Instalations\\RapidMiner5\\lib\\plugins").getAbsolutePath();
System.setProperty(RapidMiner.PROPERTY_RAPIDMINER_INIT_PLUGINS_LOCATION, pluginDirString);
//EXAMPLE HOW TO CALL THE PROGRAM FROM JAVA
//Initialize the WVTool
WVTool wvt = new WVTool(false);
//Initialize the configuration
WVTConfiguration config = new WVTConfiguration();
final WVTStemmer dummyStemmer = new DummyStemmer();
final WVTStemmer porterStemmer = new PorterStemmerWrapper();
//mine->
//final WVTTokenizer pop = new NGramTokenizer(2,fdsa);
//<-mine , oare am pus bine acelasi wvtokenizer
config.setConfigurationRule(WVTConfiguration.STEP_STEMMER, new WVTConfigurationRule() {
public Object getMatchingComponent(WVTDocumentInfo d) {
if (d.getContentLanguage().equals("english"))
return porterStemmer;
else
return dummyStemmer;
}
});
WVTStemmer stemmer = new LovinsStemmerWrapper();
config.setConfigurationRule(WVTConfiguration.STEP_STEMMER, new WVTConfigurationFact(stemmer));
//Initialize the input list with two classes
WVTFileInputList list = new WVTFileInputList(2);
//Add entries
list.addEntry(new WVTDocumentInfo("D:/CrawOut/txt_sentoken/pos", "txt", "", "english", 0));
list.addEntry(new WVTDocumentInfo("D:/CrawOut/txt_sentoken/neg", "txt", "", "english", 1));
//Generate the word list
WVTWordList wordList = wvt.createWordList(list, config);
//Prune the word list
wordList.pruneByFrequency(2, 5);
//Alternativ I: read an already created word list from a file
//WVTWordList wordList2 =
//new WVTWordList(new FileReader("/home/wurst/tmp/wordlisttest.txt"));
//Alternative II: Use predifined dimensions
//List dimensions = new Vector();
//dimensions.add("atheist");
//dimensions.add("christian");
//wordList =
//wvt.createWordList(list, config, dimensions, false);
//Store the word list in a file
wordList.storePlain(new FileWriter("d:/CrawOut/wordlist.txt"));
//WordList sd;
//Create the word vectors
//Set up an output filter (write sparse vectors to a file)
FileWriter outFile = new FileWriter("d:/CrawOut/wv.txt");
WordVectorWriter wvw = new WordVectorWriter(outFile, true);
config.setConfigurationRule(WVTConfiguration.STEP_OUTPUT, new WVTConfigurationFact(wvw));
config.setConfigurationRule(WVTConfiguration.STEP_VECTOR_CREATION, new WVTConfigurationFact(new TFIDF()));
WVTWordVector s;
//Create the vectors
wvt.createVectors(list, config, wordList);
Learner learner = (Learner)OperatorService.createOperator(LibSVMLearner.class);
((Operator)learner).setParameter(LibSVMLearner.PARAMETER_SVM_TYPE, new Integer(LibSVMLearner.SVM_TYPE_C_SVC).toString());
((Operator)learner).setParameter(LibSVMLearner.PARAMETER_KERNEL_TYPE, "0");//linear
((Operator)learner).setParameter(LibSVMLearner.PARAMETER_EPSILON, "0.001");
((Operator)learner).setParameter(LibSVMLearner.PARAMETER_C, "0.0");
((Operator)learner).setParameter(LibSVMLearner.PARAMETER_P, "0.1");
((Operator)learner).setParameter(LibSVMLearner.PARAMETER_CONFIDENCE_FOR_MULTICLASS, "true");
ExampleSet exampleSets;
exampleSets = (ExampleSet) wvw ;
Model model = learner.learn(exampleSets);
//Close the output file
wvw.close();
outFile.close();
//Just for demonstration: Create a vector from a String
WVTWordVector q = wvt.createVector("cmu harvard net", wordList);
}
}
/*import edu.udo.*;
import edu.udo.cs.wvtool.main.WVTDocumentInfo;
import edu.udo.cs.wvtool.main.WVTFileInputList;
public class Test {
public static void main(String[] args){
WVTFileInputList list = new WVTFileInputList(2);
list.addEntry(
new WVTDocumentInfo("D:/CrawOut/txt_sentoken/pos",
"txt","","english",0));
list.addEntry(
new WVTDocumentInfo("D:/CrawOut/txt_sentoken/neg",
"txt","","english",1));
list.
}
}
*/
Exception in thread "main" com.rapidminer.operator.OperatorCreationException: No operator description object given for 'com.rapidminer.operator.learner.functions.kernel.LibSVMLearner'
at com.rapidminer.tools.OperatorService.createOperator(OperatorService.java:564)
at Test.main(Test.java:137)
Tagged:
0
Answers
in my oppinion i have some problems with cast from wordvector to exampleset, but i don't know what else to do.
and i have this output:
first make sure before using RapidMiner to call this code fragment, otherwise you will not be able to use RapidMiner. In regards to your second post: You're catching an Exception without any output, so nobody can know what possibly went wrong..
然而,您不能从文件ExampleSet铸造,that cannot work.
Use this (you will need to adapt the method to actually get the InputStream, either by changing the location of the file or by creating an InputStream from your d:\CrawOut\Exampleset.ioo file yourself): Regards,
Marco