Skip to content

Commit 2e32685

Browse files
2024-09-18-roberta_fine_tuned_text_classification_slovene_data_augmentation_pipeline_en (#14403)
* Add model 2024-09-17-distilbert_base_uncased_finetuned_squadv2_en * Add model 2024-09-13-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_abelll_pipeline_en * Add model 2024-09-19-xlm_roberta_base_finetuned_panx_french_aaa01101312_pipeline_en * Add model 2024-09-16-burmese_awesome_qa_model_dianechiang_en * Add model 2024-09-17-distilbert_uncased_assamese_hungarian_f1_score_pipeline_en * Add model 2024-09-19-whisper_jrb_small_tamil_en * Add model 2024-09-17-whisper_small_ndonga_en * Add model 2024-09-18-xlm_roberta_base_finetuned_panx_english_jbreunig_en * Add model 2024-09-18-xlm_roberta_base_finetuned_panx_english_jamie613_en * Add model 2024-09-19-xlm_roberta_base_xnli_english_trimmed_english_60000_en * Add model 2024-09-19-xlm_roberta_base_single_finetuned_on_cedr_augmented_en * Add model 2024-09-19-xlm_roberta_base_single_finetuned_on_cedr_augmented_pipeline_en * Add model 2024-09-19-xlm_roberta_base_final_vietnam_aug_backtranslation_2_en * Add model 2024-09-18-xlm_roberta_base_xnli_french_trimmed_french_15000_pipeline_en * Add model 2024-09-19-xlm_roberta_base_lr0_001_seed42_esp_kinyarwanda_eng_train_pipeline_en * Add model 2024-09-17-whisper_small_english_0327_pipeline_en * Add model 2024-09-19-facebook_commet_classification_base_en * Add model 2024-09-19-facebook_commet_classification_base_pipeline_en * Add model 2024-09-19-xlm_roberta_base_trimmed_arabic_10000_xnli_arabic_pipeline_en * Add model 2024-09-19-korean_clickbait_news_classifier_xlm_roberta_base_pipeline_en * Add model 2024-09-19-rulebert_v0_3_k4_it * Add model 2024-09-17-dipromats_subtask_1_base_train_pipeline_en * Add model 2024-09-19-xlm_roberta_base_trimmed_arabic_10000_xnli_arabic_en * Add model 2024-09-17-burmese_translation_helsinki2_en * Add model 2024-09-19-scenario_non_kd_po_copy_cdf_english_d2_data_english_cardiff_eng_only_gamma_en * Add model 2024-09-19-scenario_non_kd_po_copy_cdf_english_d2_data_english_cardiff_eng_only_gamma_pipeline_en * Add model 2024-09-17-finetuning_en * Add model 2024-09-10-burmese_awesome_model_waniafatima_en * Add model 2024-09-07-sent_hing_bert_hi * Add model 2024-09-19-whisper_tiny_1000_diverse_audios_pipeline_en * Add model 2024-09-18-xlm_roberta_base_nepal_bhasa_vietnam_aug_replace_w2v_1_pipeline_en * Add model 2024-09-18-svm_model_pipeline_en * Add model 2024-09-12-roberta_base_squad2_finetuned_covid2_en * Add model 2024-09-15-babyberta_aochildes_french_aochildes_2_5m_with_masking_finetuned_squad_en * Add model 2024-09-18-distilbert_base_uncased_odm_zphr_0st42sd_ut72ut1large43pfxnf_simsp_pipeline_en * Add model 2024-09-17-xlm_roberta_base_finetuned_panx_all_rupe_pipeline_en * Add model 2024-09-16-marian_finetuned_kde4_english_tonga_tonga_islands_french_willherbert27_en * Add model 2024-09-09-babyberta_wikipedia_french_aochildes_french_without_masking_seed6_finetuned_squad_en * Add model 2024-09-18-hate_hate_balance_random2_seed0_bernice_pipeline_en * Add model 2024-09-07-dummy_model_jfforero_pipeline_en * Add model 2024-09-16-finetuned_marianmtmodel_razxr_en * Add model 2024-09-18-roberta_base_ours_rundi_5_pipeline_en * Add model 2024-09-15-fernet_news_cs * Add model 2024-09-19-roberta_combined_generated_v1_1_en * Add model 2024-09-15-whisper_small_vietnamese_hi * Add model 2024-09-17-distilbert_base_uncased_finetuned_clinc_adriana213_en * Add model 2024-09-17-roberta_base_epoch_80_en * Add model 2024-09-18-bert_finetuned_ner_adigo_pipeline_en * Add model 2024-09-19-xlm_roberta_base_finetuned_panx_english_hhffxx_en * Add model 2024-09-10-takalane_sot_roberta_pipeline_en * Add model 2024-09-11-ternary_persian_sentiment_analysis_en * Add model 2024-09-16-kinyaroberta_large_kinte_finetuned_kinyarwanda_sent1_en * Add model 2024-09-18-trial_model_alexsaadfalcon_pipeline_en * Add model 2024-09-19-roberta_large_bne_socialdisner_es * Add model 2024-09-19-afriberta_small_finetuned_hausa_2e_3_pipeline_en * Add model 2024-09-17-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_jakeyunwookim_pipeline_en * Add model 2024-09-08-dummy_model____3_pipeline_en * Add model 2024-09-17-bidirection_translate_model_error_v0_4_pipeline_en * Add model 2024-09-16-bsc_bio_ehr_spanish_vih_10k_pipeline_en * Add model 2024-09-12-quran_whisper_tiny_v1_pipeline_ar * Add model 2024-09-16-distilroberta_topic_classification_5_pipeline_en * Add model 2024-09-17-opus_maltese_ft_2_en * Add model 2024-09-06-setfit_model_test_2_pipeline_en * Add model 2024-09-13-finetuning_insult_model_deberta_pipeline_en * Add model 2024-09-17-distilbert_base_uncased_finetuned_emotion_small_sets_pipeline_en * Add model 2024-09-11-cdgp_chilean_sign_language_roberta_dgen_pipeline_en * Add model 2024-09-17-distilbert_base_uncased_finetuned_squad_faliha_pipeline_en * Add model 2024-09-18-distilbert_base_uncased_distilled_optim_clinc_en * Add model 2024-09-19-burmese_awesome_model_yjoonjang_en * Add model 2024-09-14-marian_finetuned_kde4_korean_tonga_tonga_islands_english_pipeline_en * Add model 2024-09-19-multilingual_intent_classifier_final_distilbert_pipeline_xx * Add model 2024-09-18-distilbert_base_uncased_finetuned_emotion_tagch_pipeline_en * Add model 2024-09-18-opensesame_pipeline_en * Add model 2024-09-17-distilbert_base_uncased_finetuned_transcripts_calls_avitalby_pipeline_en * Add model 2024-09-19-psychbert_finetuned_multiclass_en * Add model 2024-09-16-opus_maltese_indonesian_english_open_subtitles_pipeline_en * Add model 2024-09-17-distilled_bert_finetuned_squad_en * Add model 2024-09-17-bert_gemma2b_sanity_vllm_0_en * Add model 2024-09-15-minilmv2_l6_h768_from_roberta_large_mrqa_en * Add model 2024-09-18-bert_base_uncased_finetuned_squad_summerzhang_en * Add model 2024-09-14-roberta_base_finetune_subjqa_pipeline_en * Add model 2024-09-18-distilbert_base_uncased_finetuned_emotion_lxlinghu_pipeline_en * Add model 2024-09-17-whisper_tiny_english_fine_tuned_en * Add model 2024-09-17-maltese_coref_english_german_gender_exp_en * Add model 2024-09-10-phil_sim_sentence_transformers_all_mpnet_base_v2_2024_03_11_21_44_34_pipeline_en * Add model 2024-09-08-test_model_000_en * Add model 2024-09-13-burmese_awesome_qa_model_koustavhazra_en * Add model 2024-09-17-distilbert_base_uncased_fake_news_checker_en * Add model 2024-09-17-tiny_english_combined_v4_4_0_32_1e_05_helpful_sweep_30_pipeline_en * Add model 2024-09-07-named_entity_recognition_en * Add model 2024-09-12-opus_maltese_english_romanian_finetuned_english_tonga_tonga_islands_romanian_ktadzjibov_en * Add model 2024-09-16-roberta_base_mnli_2_labels_pipeline_en * Add model 2024-09-10-sentence_transformers_all_mpnet_base_v2_10epoch_100perp_cosine_en * Add model 2024-09-18-distilroberta_base_rb156k_ep40_pipeline_en * Add model 2024-09-17-distil_bert_fintuned_issue_cfpb_complaints_pipeline_en * Add model 2024-09-19-team7_pipeline_en * Add model 2024-09-15-whisper_tiny_kor_430k_hf_ep100_en * Add model 2024-09-11-distilbert_base_multilingual_cased_qa_squad_v1_norwegian_bokml_xx * Add model 2024-09-03-pii_detection_roberta_v2_en * Add model 2024-09-08-electra_classifier_korean_senti_1_pipeline_ko * Add model 2024-09-13-roberta_large_squad2_fine_tuned_7e_pipeline_en * Add model 2024-09-17-distilbert_base_uncased_finetuned_squad_tsipi_en * Add model 2024-09-15-prompt_injection_bert_pipeline_en * Add model 2024-09-18-distilbert_base_uncased_finetuned_emotion_runelune_en * Add model 2024-09-10-test_story_en * Add model 2024-09-18-roberta_base_bne_finetuned_nepal_bhasa_oriya_used_title_gonchisi_en * Add model 2024-09-16-imdb_distilbert_funetuned_pipeline_en * Add model 2024-09-16-xlm_roberta_base_finetuned_panx_german_rakeshpardeshi25_en * Add model 2024-09-18-ooc_patch_v1_en * Add model 2024-09-18-finetuning_sentiment_analysis_asif1997_pipeline_en * Add model 2024-09-15-eli5_mlm_model_iliyaml_pipeline_en * Add model 2024-09-09-marian_finetuned_kde4_english_tonga_tonga_islands_french_accelerate_den_sota_pipeline_en * Add model 2024-09-15-2020_q2_50p_filtered_combined90_pipeline_en * Add model 2024-09-17-burmese_awesome_qa_model_adalee1001_pipeline_en * Add model 2024-09-16-whisper_small_persian_farsi_benchmarkcentral_fa * Add model 2024-09-15-sent_bert_base_japanese_ssuw_ja * Add model 2024-09-18-results_ealeon16_en * Add model 2024-09-14-xlm_roberta_base_finetuned_panx_italian_hbtemari_en * Add model 2024-09-15-formalberta2_pipeline_en * Add model 2024-09-15-distilbert_finetuned_kai1014_en * Add model 2024-09-16-opus_maltese_romanian_french_finetuned_romanian_tonga_tonga_islands_rup_pipeline_en * Add model 2024-09-08-burmese_awesome_qa_model2_jvasdigital_pipeline_en * Add model 2024-09-19-distilbert_base_cased_distilled_squad_finetuned_squad_test3_pipeline_en * Add model 2024-09-17-extension_model_pipeline_en * Add model 2024-09-19-burmese_awesome_qa_model_colabdash_pipeline_en * Add model 2024-09-18-xlm_roberta_base_nepal_bhasa_vietnam_aug_insert_synonym_en * Add model 2024-09-19-xlm_roberta_base_finetuned_panx_english_yurit04_pipeline_en * Add model 2024-09-11-distilbert_sarcascm_classifier_pipeline_en * Add model 2024-09-13-stock_twitter_sentiment_bert_en * Add model 2024-09-18-roberta_tiny_2l_10m_en * Add model 2024-09-18-autotrain_bertindo_44746112628_en * Add model 2024-09-19-roberta_large_bne_socialdisner_pipeline_es * Add model 2024-09-17-chinese_roberta_wwm_ext_3_0_8_en * Add model 2024-09-19-burmese_awesome_model_adithya5243_pipeline_en * Add model 2024-09-18-xlmr_english_chinese_all_shuffled_764_test1000_pipeline_en * Add model 2024-09-19-sent_fintwitbert_en * Add model 2024-09-15-sent_german_financial_statements_bert_pipeline_de * Add model 2024-09-15-indonesian_hoax_classification_indobertweet_base_uncased_pipeline_en * Add model 2024-09-18-englishessay_scoring_lm_pipeline_en * Add model 2024-09-17-clasificador_onestop_english_en * Add model 2024-09-19-bat32_lr10_5_epo10_warm20_en * Add model 2024-09-19-sent_bert_base_historic_dutch_cased_en * Add model 2024-09-19-sent_bert_base_historic_dutch_cased_pipeline_en * Add model 2024-09-19-phishing_email_detection_sender_pipeline_en * Add model 2024-09-16-translate_model_error_v0_4_gshields_pipeline_en * Add model 2024-09-17-turkishdistilbert_pipeline_en * Add model 2024-09-08-bislama_all_bs192_hardneg_finetuned_webnlg2020_data_coverage_pipeline_en * Add model 2024-09-19-bsc_bio_ehr_spanish_carmen_anon_pipeline_es * Add model 2024-09-19-bsc_bio_ehr_spanish_carmen_anon_es * Add model 2024-09-17-whisper_tiny_chinese_developerbws_pipeline_en * Add model 2024-09-19-roberta_large_switchboard_earnings21_non_normalized_en * Add model 2024-09-18-distilled_bert_finetuned_squadv2_pipeline_en * Add model 2024-09-19-multilingual_intent_classifier_final_distilbert_xx * Add model 2024-09-14-marathi_marh_val_g3_pipeline_mr * Add model 2024-09-17-mod_thai_cross_encoder_minilm_pipeline_en * Add model 2024-09-19-roberta_large_bne_ctebmsp_pipeline_es * Add model 2024-09-17-distilbert_base_uncased_finetuned_squad_akshaykumarcp_en * Add model 2024-09-13-deberta_v3_base_finetuned_mrpc_pipeline_en * Add model 2024-09-10-spea_0_en * Add model 2024-09-14-final_ft__roberta_base_biomedical_clinical_spanish__70k_ultrasounds_pipeline_en * Add model 2024-09-17-your_output_directory_tiennn_pipeline_en * Add model 2024-09-11-roberta_finetuned_subjqa_movies_2_quocc_pipeline_en * Add model 2024-09-19-cybert_our_data_pipeline_en * Add model 2024-09-19-nuner_v1_fewnerd_coarse_super_en * Add model 2024-09-19-roberta_large_finetuned_abbr_unfiltered_plod_pipeline_en * Add model 2024-09-19-bsc_bio_ehr_spanish_symptemist_fasttext_9_ner_en * Add model 2024-09-09-xlm_roberta_base_finetuned_panx_german_french_sungkwangjoong_pipeline_en * Add model 2024-09-10-twitter_roberta_base_finetuned_twitter_user_desc_pipeline_en * Add model 2024-09-17-qqp_distilled_bartlarge_cross_roberta_pipeline_en * Add model 2024-09-19-roberta_conll_epoch_6_pipeline_en * Add model 2024-09-19-icf_domains_pipeline_nl * Add model 2024-09-16-results_metrics_distilbert_en * Add model 2024-09-18-custommodelv1c_isom5240_en * Add model 2024-09-18-autotrain_bertindo_44746112628_pipeline_en * Add model 2024-09-19-roberta_finetuned_ner_longforms_en * Add model 2024-09-17-cuenb_en * Add model 2024-09-18-hard_roberta_pipeline_en * Add model 2024-09-19-roberta_combined_generated_epoch_6_en * Add model 2024-09-19-finetune_output_pipeline_en * Add model 2024-09-17-distilbert_base_uncased_finetuned_squad_liorba_en * Add model 2024-09-17-cosmicroberta_en * Add model 2024-09-19-roberta_tagalog_base_ft_udpos213_nigerian_pidgin_pipeline_tl * Add model 2024-09-19-roberta_base_ner_demo_dizu1113_mn * Add model 2024-09-19-unibert_roberta_1_en * Add model 2024-09-19-experiment1_system1_roberta_base_finetuned_ner_pipeline_en * Add model 2024-09-16-opus_maltese_ft_5_en * Add model 2024-09-18-distilbert_base_uncased_distilled_clinc_tatsuya_n_en * Add model 2024-09-18-model_test_pipeline_en * Add model 2024-09-16-sinbert_large_pipeline_si * Add model 2024-09-18-mongolian_roberta_large_pipeline_en * Add model 2024-09-13-xlm_roberta_base_finetuned_panx_all_guroruseru_pipeline_en * Add model 2024-09-13-burmese_awesome_qa_model_cheapcoder_en * Add model 2024-09-19-bert_base_uncased_sijia_w_en * Add model 2024-09-19-bert_base_arabertv02_finetuned_sandouq_ar * Add model 2024-09-19-splade_v3_lexical_nirantk_en * Add model 2024-09-08-distilbert_base_uncased_org_address_question_answering_pipeline_en * Add model 2024-09-11-moralfoundationsclassifier_en * Add model 2024-09-13-lr1e4_bs8_distilbert_qa_pytorch_full_pipeline_en * Add model 2024-09-15-whisper_tiny_norwegian_faroese_100h_5k_steps_v2_en * Add model 2024-09-17-class_poems_spanish_pipeline_en * Add model 2024-09-11-babyberta_ochildes_2_5m_aochildes_french_without_masking_finetuned_squad_en * Add model 2024-09-17-burmese_awesome_qa_model_nada_ghazouani_en * Add model 2024-09-18-roberta_finetuned_hate_speech_jigsaw_toxic_comments_pipeline_en * Add model 2024-09-19-twroberta_baseb_5epoch_pipeline_en * Add model 2024-09-19-roberta_base_task2_fact_updates_pipeline_en * Add model 2024-09-19-roberta_large_news_relevance_v1_en * Add model 2024-09-18-40_langdetect_v01_pipeline_en * Add model 2024-09-19-roberta_large_news_relevance_v1_pipeline_en * Add model 2024-09-19-roberta_base_finetuned_lower_fabric_pipeline_en * Add model 2024-09-18-regression_xlm_roberta_divemt_nld_en * Add model 2024-09-19-nerd_nerd_random1_seed1_twitter_roberta_large_2022_154m_pipeline_en * Add model 2024-09-19-emoji_emoji_random3_seed2_roberta_base_pipeline_en * Add model 2024-09-17-squad_mbert_english_german_spanish_vietnamese_chinese_model_pipeline_en * Add model 2024-09-19-jerteh355sentneg0_en * Add model 2024-09-19-roberta_large_sentiment_sst5_mapped_grouped_0_en * Add model 2024-09-19-jerteh355sentneg0_pipeline_en * Add model 2024-09-19-twroberta_baseb_3epoch_pipeline_en * Add model 2024-09-19-maria_ideologiamul_none_label_en * Add model 2024-09-19-maria_ideologiamul_none_label_pipeline_en * Add model 2024-09-19-tag_clf_pipeline_en * Add model 2024-09-19-citation_polarity_roberta_base_pipeline_en * Add model 2024-09-18-bpe_selfies_pubchem_shard00_50k_pipeline_en * Add model 2024-09-18-distilbert_lr_cosine_scheduler_en * Add model 2024-09-19-roberta_large_e2_noweight_en * Add model 2024-09-07-distillbert_finetuned_finer_4_v3_en * Add model 2024-09-17-finetuning_sentiment_model_3000_samples_hunnyopenxcell_pipeline_en * Add model 2024-09-18-sent_luxembert_en * Add model 2024-09-17-xlm_roberta_base_finetuned_panx_german_takapy_en * Add model 2024-09-19-roberta_sayula_popoluca_tagging_hosnahoseini_en * Add model 2024-09-19-roberta_large_switchboard_earnings21_non_normalized_pipeline_en * Add model 2024-09-18-finetuning_sentiment_model_3000_samples_sumittyagi25_pipeline_en * Add model 2024-09-19-bert_l10_h256_uncased_pipeline_en * Add model 2024-09-19-artificial_languages_des_bert_large_cased_en * Add model 2024-09-18-xlm_roberta_base_nepal_bhasa_vietnam_aug_replace_w2v_1_en * Add model 2024-09-18-burmese_awesome_qa_model_ahmed13245_pipeline_en * Add model 2024-09-12-finetuning_sentiment_model_roberta_zijuncheng_en * Add model 2024-09-09-dummy_model_maunei_pipeline_en * Add model 2024-09-19-artificial_languages_des_bert_large_cased_pipeline_en * Add model 2024-09-19-roberta_base_ner_akramhec_en * Add model 2024-09-18-distilbert_base_uncased_finetuned_cola_zhihengjasontou_pipeline_en * Add model 2024-09-17-burmese_awesome_model_yeshiovo_en * Add model 2024-09-18-distilroberta_financial_sentiment_model_2500_samples_fine_tune_pipeline_en * Add model 2024-09-09-xlmroberta_ner_manqingliu_base_finetuned_panx_pipeline_de * Add model 2024-09-14-xlm_roberta_base_finetuned_panx_all_royam0820_pipeline_en * Add model 2024-09-18-byt_malurl_dr_b_pipeline_en * Add model 2024-09-18-trial_model_alexsaadfalcon_en * Add model 2024-09-09-opus_maltese_english_chinese_finetuned_0_tonga_tonga_islands_1_pipeline_en * Add model 2024-09-19-cat_ner_spanish_2_en * Add model 2024-09-19-test_finetuned__roberta_base_biomedical_clinical_spanish__59k_ultrasounds_ner_en * Add model 2024-09-16-whisper_small_divehi_jensg_pipeline_dv --------- Co-authored-by: ahmedlone127 <[email protected]>
1 parent ba5ac12 commit 2e32685

File tree

1,416 files changed

+114934
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,416 files changed

+114934
-0
lines changed
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
---
2+
layout: model
3+
title: English distilbert_base_uncased_finetuned_lgbt_classification_pipeline pipeline DistilBertForSequenceClassification from savinda99
4+
author: John Snow Labs
5+
name: distilbert_base_uncased_finetuned_lgbt_classification_pipeline
6+
date: 2024-09-02
7+
tags: [en, open_source, pipeline, onnx]
8+
task: Text Classification
9+
language: en
10+
edition: Spark NLP 5.5.0
11+
spark_version: 3.0
12+
supported: true
13+
annotator: PipelineModel
14+
article_header:
15+
type: cover
16+
use_language_switcher: "Python-Scala-Java"
17+
---
18+
19+
## Description
20+
21+
Pretrained DistilBertForSequenceClassification, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_lgbt_classification_pipeline` is a English model originally trained by savinda99.
22+
23+
{:.btn-box}
24+
<button class="button button-orange" disabled>Live Demo</button>
25+
<button class="button button-orange" disabled>Open in Colab</button>
26+
[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_lgbt_classification_pipeline_en_5.5.0_3.0_1725291881658.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
27+
[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_lgbt_classification_pipeline_en_5.5.0_3.0_1725291881658.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
28+
29+
## How to use
30+
31+
32+
33+
<div class="tabs-box" markdown="1">
34+
{% include programmingLanguageSelectScalaPythonNLU.html %}
35+
```python
36+
37+
pipeline = PretrainedPipeline("distilbert_base_uncased_finetuned_lgbt_classification_pipeline", lang = "en")
38+
annotations = pipeline.transform(df)
39+
40+
```
41+
```scala
42+
43+
val pipeline = new PretrainedPipeline("distilbert_base_uncased_finetuned_lgbt_classification_pipeline", lang = "en")
44+
val annotations = pipeline.transform(df)
45+
46+
```
47+
</div>
48+
49+
{:.model-param}
50+
## Model Information
51+
52+
{:.table-model}
53+
|---|---|
54+
|Model Name:|distilbert_base_uncased_finetuned_lgbt_classification_pipeline|
55+
|Type:|pipeline|
56+
|Compatibility:|Spark NLP 5.5.0+|
57+
|License:|Open Source|
58+
|Edition:|Official|
59+
|Language:|en|
60+
|Size:|249.5 MB|
61+
62+
## References
63+
64+
https://huggingface.co/savinda99/distilbert-base-uncased-finetuned-lgbt-classification
65+
66+
## Included Models
67+
68+
- DocumentAssembler
69+
- TokenizerModel
70+
- DistilBertForSequenceClassification
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
---
2+
layout: model
3+
title: English al_roberta_base_pipeline pipeline RoBertaEmbeddings from macedonizer
4+
author: John Snow Labs
5+
name: al_roberta_base_pipeline
6+
date: 2024-09-03
7+
tags: [en, open_source, pipeline, onnx]
8+
task: Embeddings
9+
language: en
10+
edition: Spark NLP 5.5.0
11+
spark_version: 3.0
12+
supported: true
13+
annotator: PipelineModel
14+
article_header:
15+
type: cover
16+
use_language_switcher: "Python-Scala-Java"
17+
---
18+
19+
## Description
20+
21+
Pretrained RoBertaEmbeddings, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`al_roberta_base_pipeline` is a English model originally trained by macedonizer.
22+
23+
{:.btn-box}
24+
<button class="button button-orange" disabled>Live Demo</button>
25+
<button class="button button-orange" disabled>Open in Colab</button>
26+
[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/al_roberta_base_pipeline_en_5.5.0_3.0_1725375613038.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
27+
[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/al_roberta_base_pipeline_en_5.5.0_3.0_1725375613038.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
28+
29+
## How to use
30+
31+
32+
33+
<div class="tabs-box" markdown="1">
34+
{% include programmingLanguageSelectScalaPythonNLU.html %}
35+
```python
36+
37+
pipeline = PretrainedPipeline("al_roberta_base_pipeline", lang = "en")
38+
annotations = pipeline.transform(df)
39+
40+
```
41+
```scala
42+
43+
val pipeline = new PretrainedPipeline("al_roberta_base_pipeline", lang = "en")
44+
val annotations = pipeline.transform(df)
45+
46+
```
47+
</div>
48+
49+
{:.model-param}
50+
## Model Information
51+
52+
{:.table-model}
53+
|---|---|
54+
|Model Name:|al_roberta_base_pipeline|
55+
|Type:|pipeline|
56+
|Compatibility:|Spark NLP 5.5.0+|
57+
|License:|Open Source|
58+
|Edition:|Official|
59+
|Language:|en|
60+
|Size:|311.7 MB|
61+
62+
## References
63+
64+
https://huggingface.co/macedonizer/al-roberta-base
65+
66+
## Included Models
67+
68+
- DocumentAssembler
69+
- TokenizerModel
70+
- RoBertaEmbeddings
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
---
2+
layout: model
3+
title: English pii_detection_roberta_v2 RoBertaForTokenClassification from zmilczarek
4+
author: John Snow Labs
5+
name: pii_detection_roberta_v2
6+
date: 2024-09-03
7+
tags: [en, open_source, onnx, token_classification, roberta, ner]
8+
task: Named Entity Recognition
9+
language: en
10+
edition: Spark NLP 5.5.0
11+
spark_version: 3.0
12+
supported: true
13+
engine: onnx
14+
annotator: RoBertaForTokenClassification
15+
article_header:
16+
type: cover
17+
use_language_switcher: "Python-Scala-Java"
18+
---
19+
20+
## Description
21+
22+
Pretrained RoBertaForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pii_detection_roberta_v2` is a English model originally trained by zmilczarek.
23+
24+
{:.btn-box}
25+
<button class="button button-orange" disabled>Live Demo</button>
26+
<button class="button button-orange" disabled>Open in Colab</button>
27+
[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pii_detection_roberta_v2_en_5.5.0_3.0_1725383976347.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
28+
[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pii_detection_roberta_v2_en_5.5.0_3.0_1725383976347.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
29+
30+
## How to use
31+
32+
33+
34+
<div class="tabs-box" markdown="1">
35+
{% include programmingLanguageSelectScalaPythonNLU.html %}
36+
```python
37+
38+
documentAssembler = DocumentAssembler() \
39+
.setInputCol('text') \
40+
.setOutputCol('document')
41+
42+
tokenizer = Tokenizer() \
43+
.setInputCols(['document']) \
44+
.setOutputCol('token')
45+
46+
tokenClassifier = RoBertaForTokenClassification.pretrained("pii_detection_roberta_v2","en") \
47+
.setInputCols(["documents","token"]) \
48+
.setOutputCol("ner")
49+
50+
pipeline = Pipeline().setStages([documentAssembler, tokenizer, tokenClassifier])
51+
data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text")
52+
pipelineModel = pipeline.fit(data)
53+
pipelineDF = pipelineModel.transform(data)
54+
55+
```
56+
```scala
57+
58+
val documentAssembler = new DocumentAssembler()
59+
.setInputCols("text")
60+
.setOutputCols("document")
61+
62+
val tokenizer = new Tokenizer()
63+
.setInputCols("document")
64+
.setOutputCol("token")
65+
66+
val tokenClassifier = RoBertaForTokenClassification.pretrained("pii_detection_roberta_v2", "en")
67+
.setInputCols(Array("documents","token"))
68+
.setOutputCol("ner")
69+
70+
val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, tokenClassifier))
71+
val data = Seq("I love spark-nlp").toDS.toDF("text")
72+
val pipelineModel = pipeline.fit(data)
73+
val pipelineDF = pipelineModel.transform(data)
74+
75+
```
76+
</div>
77+
78+
{:.model-param}
79+
## Model Information
80+
81+
{:.table-model}
82+
|---|---|
83+
|Model Name:|pii_detection_roberta_v2|
84+
|Compatibility:|Spark NLP 5.5.0+|
85+
|License:|Open Source|
86+
|Edition:|Official|
87+
|Input Labels:|[document, token]|
88+
|Output Labels:|[ner]|
89+
|Language:|en|
90+
|Size:|450.6 MB|
91+
92+
## References
93+
94+
https://huggingface.co/zmilczarek/pii-detection-roberta-v2
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
---
2+
layout: model
3+
title: English sent_xlm_align XlmRoBertaSentenceEmbeddings from CZWin32768
4+
author: John Snow Labs
5+
name: sent_xlm_align
6+
date: 2024-09-03
7+
tags: [en, open_source, onnx, sentence_embeddings, xlm_roberta]
8+
task: Embeddings
9+
language: en
10+
edition: Spark NLP 5.5.0
11+
spark_version: 3.0
12+
supported: true
13+
engine: onnx
14+
annotator: XlmRoBertaSentenceEmbeddings
15+
article_header:
16+
type: cover
17+
use_language_switcher: "Python-Scala-Java"
18+
---
19+
20+
## Description
21+
22+
Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sent_xlm_align` is a English model originally trained by CZWin32768.
23+
24+
{:.btn-box}
25+
<button class="button button-orange" disabled>Live Demo</button>
26+
<button class="button button-orange" disabled>Open in Colab</button>
27+
[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sent_xlm_align_en_5.5.0_3.0_1725398318931.zip){:.button.button-orange.button-orange-trans.arr.button-icon}
28+
[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sent_xlm_align_en_5.5.0_3.0_1725398318931.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3}
29+
30+
## How to use
31+
32+
33+
34+
<div class="tabs-box" markdown="1">
35+
{% include programmingLanguageSelectScalaPythonNLU.html %}
36+
```python
37+
38+
documentAssembler = DocumentAssembler() \
39+
.setInputCol("text") \
40+
.setOutputCol("document")
41+
42+
sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") \
43+
.setInputCols(["document"]) \
44+
.setOutputCol("sentence")
45+
46+
embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_align","en") \
47+
.setInputCols(["sentence"]) \
48+
.setOutputCol("embeddings")
49+
50+
pipeline = Pipeline().setStages([documentAssembler, sentenceDL, embeddings])
51+
data = spark.createDataFrame([["I love spark-nlp"]]).toDF("text")
52+
pipelineModel = pipeline.fit(data)
53+
pipelineDF = pipelineModel.transform(data)
54+
55+
```
56+
```scala
57+
58+
val documentAssembler = new DocumentAssembler()
59+
.setInputCol("text")
60+
.setOutputCol("document")
61+
62+
val sentenceDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx")
63+
.setInputCols(Array("document"))
64+
.setOutputCol("sentence")
65+
66+
val embeddings = XlmRoBertaSentenceEmbeddings.pretrained("sent_xlm_align","en")
67+
.setInputCols(Array("sentence"))
68+
.setOutputCol("embeddings")
69+
70+
val pipeline = new Pipeline().setStages(Array(documentAssembler, sentenceDL, embeddings))
71+
val data = Seq("I love spark-nlp").toDF("text")
72+
val pipelineModel = pipeline.fit(data)
73+
val pipelineDF = pipelineModel.transform(data)
74+
75+
```
76+
</div>
77+
78+
{:.model-param}
79+
## Model Information
80+
81+
{:.table-model}
82+
|---|---|
83+
|Model Name:|sent_xlm_align|
84+
|Compatibility:|Spark NLP 5.5.0+|
85+
|License:|Open Source|
86+
|Edition:|Official|
87+
|Input Labels:|[sentence]|
88+
|Output Labels:|[embeddings]|
89+
|Language:|en|
90+
|Size:|659.6 MB|
91+
92+
## References
93+
94+
https://huggingface.co/CZWin32768/xlm-align

0 commit comments

Comments
 (0)