class Parser {
constructor() {
this.defaultConf = {
version: '1.2.113',
results: {
flat: [
['key', 'Keyword'],
['link', 'Link'],
['title', 'Title'],
['text', 'Text'],
]
},
results_format: "<keyword>$key</keyword><link>$link</link>\\n<title>$title</title>\\n<content>$text<\content>\\n\\n",
search_engine: 'SE::Google::Modern',
SE_Google_Modern_preset: 'default',
SE_Yandex_preset: 'default',
HTML_TextExtractor_preset: 'default',
};
this.editableConf = [
['SE_Google_Modern_preset', ['combobox', 'SE::Google::Modern preset']],
['SE_Yandex_preset', ['combobox', 'SE::Yandex preset']],
['HTML_TextExtractor_preset', ['combobox', 'HTML::TextExtractor preset']],
['search_engine', ['combobox', 'Search engine', ['SE::Google::Modern', 'GoogleModern'], ['SE::Yandex', 'Yandex']]],
];
}
*parse(set, results) {
return yield* set.lvl == 0 ? this.parseSE(set, results) : this.parseText(set, results);
}
*parseSE(set, results) {
this.logger.put("Start scraping query: " + set.query);
this.logger.put(`Use ${this.conf.search_engine}`);
let SEresp = yield this.parser.request(
this.conf.search_engine,
this.conf[this.conf.search_engine.replace(/::/, '_') + '_preset'],
set.query
);
if(SEresp.info.success) {
let link_count = 0;
let step = this.conf.search_engine == 'SE::Google::Modern' ? 3 : 6;
for (let i = 0; i < SEresp.serp.length; i += step) {
const link = SEresp.serp[i];
this.query.add(link);
link_count += 1;
}
this.logger.put("Added " + link_count + " links");
results.success = 1;
results.SKIP = 1;
return results;
}
else {
this.logger.put(`Error scraping ${this.conf.search_engine}`);
results.success = 0;
return results;
}
}
*parseText(set, results) {
this.logger.put("Scraping text from " + set.query);
let response = yield this.parser.request('HTML::TextExtractor', this.conf.HTML_TextExtractor_preset, set.query);
results.key = set.first;
results.link = set.query;
if(response.info.success) {
results.text = response.texts.join("\n");
results.title = response.data.match(/<title>\s*(.+?)\s*<\/title>/)[1];
this.logger.put(set.query + " - " + response.texts.length + " blocks, " + results.text.length + " chars filename:" + results.filename);
results.success = 1;
}
else {
this.logger.put(set.query + " - error scraping text");
results.success = 0;
}
return results;
}
}