class Parser {
constructor() {
this.defaultConf = {
version: '1.3.13',
results: {
flat: [
['key', 'Keyword'],
['link', 'Link'],
['text', 'Text']
]
},
results_format: "$key - $link\\n$text\\n\\n",
search_engine: 'SE::Google',
SE_Google_preset: 'default',
SE_Yandex_preset: 'default',
HTML_ArticleExtractor_preset: 'default'
};
this.editableConf = [
['SE_Google_preset', ['combobox', 'SE::Google preset']],
['SE_Yandex_preset', ['combobox', 'SE::Yandex preset']],
['HTML_ArticleExtractor_preset', ['combobox', 'HTML::ArticleExtractor preset']],
['search_engine', ['combobox', 'Search engine', ['SE::Google', 'Google'], ['SE::Yandex', 'Yandex']]]
];
}
*parse(set, results) {
return yield* set.lvl == 0 ? this.parseSE(set, results) : this.parseText(set, results);
}
*parseSE(set, results) {
this.logger.put(`Use ${this.conf.search_engine}`);
let SEresp = yield this.parser.request(
this.conf.search_engine,
this.conf[this.conf.search_engine.replace(/::/g, '_') + '_preset'],
set.query
);
if(SEresp.info.success) {
let link_count = 0;
let step = this.conf.search_engine == 'SE::Google' ? 3 : 6;
for (let i = 0; i < SEresp.serp.length; i += step) {
this.query.add(SEresp.serp[i]);
link_count += 1;
}
this.logger.put(`Added ${link_count} links`);
results.success = 1;
results.SKIP = 1;
return results;
} else {
this.logger.put(`Error scraping ${this.conf.search_engine}`);
results.success = 0;
return results;
}
}
*parseText(set, results) {
this.logger.put(`Scraping text from ${set.query}`);
let response = yield this.parser.request('HTML::ArticleExtractor', this.conf.HTML_ArticleExtractor_preset, set.query);
results.key = set.first;
results.link = set.query;
if(response.info.success) {
//results.text = response.texts.join("\n");
results.text = response.content;
this.logger.put(`${set.query} - ${response.texts.length} blocks, ${results.text.length} chars`);
results.success = 1;
} else {
this.logger.put(`${set.query} - error scraping text`);
results.success = 0;
}
return results;
}
}