#!pip install huggingface_hub==0.23.5


from huggingface_hub import HfApi
api = HfApi()

D:\Learning\MyWebsite\FinalGithub\AlreadyPublihsed\blogs\HuggingFace\env_hugg_face\lib\site-packages\tqdm\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm


## shows list of available models 
#api = HfApi()
#list(api.list_models())


from huggingface_hub import ModelFilter

models = api.list_models(
      filter=ModelFilter(
      task="summarization"),
      sort="downloads",
      direction=-1,
      limit=2
)
modelList = list(models)
modelList

D:\Learning\MyWebsite\FinalGithub\AlreadyPublihsed\blogs\HuggingFace\env_hugg_face\lib\site-packages\huggingface_hub\utils\endpoint_helpers.py:247: FutureWarning: 'ModelFilter' is deprecated and will be removed in huggingface_hub>=0.24. Please pass the filter parameters as keyword arguments directly to the `list_models` method.
  warnings.warn(

[ModelInfo(id='google-t5/t5-small', author='google-t5', sha='df1b051c49625cf57a3d0d8d3863ed4d13564fe4', created_at=datetime.datetime(2022, 3, 2, 23, 29, 4, tzinfo=datetime.timezone.utc), last_modified=datetime.datetime(2023, 6, 30, 2, 31, 26, tzinfo=datetime.timezone.utc), private=False, gated=False, disabled=None, downloads=7478752, likes=365, library_name='transformers', tags=['transformers', 'pytorch', 'tf', 'jax', 'rust', 'onnx', 'safetensors', 't5', 'text2text-generation', 'summarization', 'translation', 'en', 'fr', 'ro', 'de', 'multilingual', 'dataset:c4', 'arxiv:1805.12471', 'arxiv:1708.00055', 'arxiv:1704.05426', 'arxiv:1606.05250', 'arxiv:1808.09121', 'arxiv:1810.12885', 'arxiv:1905.10044', 'arxiv:1910.09700', 'license:apache-2.0', 'autotrain_compatible', 'text-generation-inference', 'endpoints_compatible', 'region:us'], pipeline_tag='translation', mask_token=None, card_data=None, widget_data=None, model_index=None, config=None, transformers_info=None, siblings=[RepoSibling(rfilename='.gitattributes', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='README.md', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='config.json', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='flax_model.msgpack', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='generation_config.json', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='model.safetensors', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='onnx/decoder_model.onnx', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='onnx/decoder_model_merged.onnx', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='onnx/decoder_model_merged_quantized.onnx', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='onnx/decoder_model_quantized.onnx', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='onnx/decoder_with_past_model.onnx', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='onnx/decoder_with_past_model_quantized.onnx', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='onnx/encoder_model.onnx', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='onnx/encoder_model_quantized.onnx', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='pytorch_model.bin', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='rust_model.ot', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='spiece.model', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='tf_model.h5', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='tokenizer.json', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='tokenizer_config.json', size=None, blob_id=None, lfs=None)], spaces=None, safetensors=None),
 ModelInfo(id='facebook/bart-large-cnn', author='facebook', sha='37f520fa929c961707657b28798b30c003dd100b', created_at=datetime.datetime(2022, 3, 2, 23, 29, 5, tzinfo=datetime.timezone.utc), last_modified=datetime.datetime(2024, 2, 13, 18, 2, 5, tzinfo=datetime.timezone.utc), private=False, gated=False, disabled=None, downloads=5060118, likes=1205, library_name='transformers', tags=['transformers', 'pytorch', 'tf', 'jax', 'rust', 'safetensors', 'bart', 'text2text-generation', 'summarization', 'en', 'dataset:cnn_dailymail', 'arxiv:1910.13461', 'license:mit', 'model-index', 'autotrain_compatible', 'endpoints_compatible', 'region:us'], pipeline_tag='summarization', mask_token=None, card_data=None, widget_data=None, model_index=None, config=None, transformers_info=None, siblings=[RepoSibling(rfilename='.gitattributes', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='README.md', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='config.json', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='flax_model.msgpack', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='generation_config.json', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='generation_config_for_summarization.json', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='merges.txt', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='model.safetensors', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='pytorch_model.bin', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='rust_model.ot', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='tf_model.h5', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='tokenizer.json', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='vocab.json', size=None, blob_id=None, lfs=None)], spaces=None, safetensors=None)]


# Import AutoModel
from transformers import AutoModel


#pip install torch torchvision torchaudio


modelId = 'google-t5/t5-small'
model = AutoModel.from_pretrained(modelId)


# Save the model to a local directory
model.save_pretrained(save_directory=f"models/{modelId}")


#!pip install datasets
#!pip install fsspec==2023.9.2
#!pip install -U datasets


from datasets import load_dataset_builder

data_builder = load_dataset_builder("imdb")


data_builder.info.description

''


data_builder.info.features

{'text': Value(dtype='string', id=None),
 'label': ClassLabel(names=['neg', 'pos'], id=None)}


from datasets import load_dataset
data = load_dataset("imdb_urdu_reviews")


data = load_dataset("imdb", split="train")


#data = load_dataset("wikipedia", "20220301.en")


from datasets import load_dataset

ds = load_dataset("fka/awesome-chatgpt-prompts")


ds_train = load_dataset("fka/awesome-chatgpt-prompts", split="train")


ds_train

Dataset({
    features: ['act', 'prompt'],
    num_rows: 170
})


print(ds_train.shape)

(170, 2)


import pprint
pprint.pprint(ds_train[1])

{'act': 'SEO Prompt',
 'prompt': 'Using WebPilot, create an outline for an article that will be '
           "2,000 words on the keyword 'Best SEO prompts' based on the top 10 "
           'results from Google. Include every relevant heading possible. Keep '
           'the keyword density of the headings high. For each section of the '
           'outline, include the word count. Include FAQs section in the '
           'outline too, based on people also ask section from Google for the '
           'keyword. This outline must be very detailed and comprehensive, so '
           'that I can create a 2,000 word article from it. Generate a long '
           'list of LSI and NLP keywords related to my keyword. Also include '
           'any other words related to the keyword. Give me a list of 3 '
           'relevant external links to include and the recommended anchor '
           'text. Make sure they’re not competing articles. Split the outline '
           'into part 1 and part 2.'}


from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


from transformers import BertForSequenceClassification
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


from transformers import BertModel

# Load pretrained BERT-base model with 12 encoder and 110M parameters
model_BERT_base = BertModel.from_pretrained('bert-base-uncased')


from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")


from transformers import pipeline

classifier = pipeline("sentiment-analysis")
result = classifier("I love Hugging Face!")
result

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.

[{'label': 'POSITIVE', 'score': 0.9998641014099121}]


from transformers import (
    TextClassificationPipeline, 
    SummarizationPipeline,
    ImageSegmentationPipeline,
    AudioClassificationPipeline
)


models = api.list_models(
      filter=ModelFilter(
      task="question-answering"),
      sort="downloads",
      direction=-1,
      limit=2
)
modelList = list(models)
modelList[0]

D:\Learning\MyWebsite\FinalGithub\AlreadyPublihsed\blogs\HuggingFace\env_hugg_face\lib\site-packages\huggingface_hub\utils\endpoint_helpers.py:247: FutureWarning: 'ModelFilter' is deprecated and will be removed in huggingface_hub>=0.24. Please pass the filter parameters as keyword arguments directly to the `list_models` method.
  warnings.warn(

ModelInfo(id='deepset/roberta-base-squad2', author='deepset', sha='adc3b06f79f797d1c575d5479d6f5efe54a9e3b4', created_at=datetime.datetime(2022, 3, 2, 23, 29, 5, tzinfo=datetime.timezone.utc), last_modified=datetime.datetime(2024, 9, 24, 15, 48, 47, tzinfo=datetime.timezone.utc), private=False, gated=False, disabled=None, downloads=1405258, likes=809, library_name='transformers', tags=['transformers', 'pytorch', 'tf', 'jax', 'rust', 'safetensors', 'roberta', 'question-answering', 'en', 'dataset:squad_v2', 'base_model:FacebookAI/roberta-base', 'base_model:finetune:FacebookAI/roberta-base', 'license:cc-by-4.0', 'model-index', 'endpoints_compatible', 'region:us'], pipeline_tag='question-answering', mask_token=None, card_data=None, widget_data=None, model_index=None, config=None, transformers_info=None, siblings=[RepoSibling(rfilename='.gitattributes', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='README.md', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='config.json', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='flax_model.msgpack', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='merges.txt', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='model.safetensors', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='pytorch_model.bin', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='rust_model.ot', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='special_tokens_map.json', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='tf_model.h5', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='tokenizer_config.json', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='vocab.json', size=None, blob_id=None, lfs=None)], spaces=None, safetensors=None)


models = api.list_models(
      filter=ModelFilter(
      task="question-answering"),
      sort="likes",
      direction=-1,
      limit=2
)
modelList = list(models)
modelList[0]

ModelInfo(id='deepset/roberta-base-squad2', author='deepset', sha='adc3b06f79f797d1c575d5479d6f5efe54a9e3b4', created_at=datetime.datetime(2022, 3, 2, 23, 29, 5, tzinfo=datetime.timezone.utc), last_modified=datetime.datetime(2024, 9, 24, 15, 48, 47, tzinfo=datetime.timezone.utc), private=False, gated=False, disabled=None, downloads=1405258, likes=809, library_name='transformers', tags=['transformers', 'pytorch', 'tf', 'jax', 'rust', 'safetensors', 'roberta', 'question-answering', 'en', 'dataset:squad_v2', 'base_model:FacebookAI/roberta-base', 'base_model:finetune:FacebookAI/roberta-base', 'license:cc-by-4.0', 'model-index', 'endpoints_compatible', 'region:us'], pipeline_tag='question-answering', mask_token=None, card_data=None, widget_data=None, model_index=None, config=None, transformers_info=None, siblings=[RepoSibling(rfilename='.gitattributes', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='README.md', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='config.json', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='flax_model.msgpack', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='merges.txt', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='model.safetensors', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='pytorch_model.bin', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='rust_model.ot', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='special_tokens_map.json', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='tf_model.h5', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='tokenizer_config.json', size=None, blob_id=None, lfs=None), RepoSibling(rfilename='vocab.json', size=None, blob_id=None, lfs=None)], spaces=None, safetensors=None)


from transformers import BertForMaskedLM, pipeline

nlp = pipeline(task="fill-mask", 
               model='bert-base-cased'
              )

print(type(nlp.model))
preds = nlp(f"If you don’t know how to swim, you will  {nlp.tokenizer.mask_token} in this lake.")

for p in preds:
    print(f"Token:{p['token_str']}. Score: {100*p['score']:,.2f}%")

BertForMaskedLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you're using `trust_remote_code=True`, you can get rid of this warning by loading the model with an auto class. See https://huggingface.co/docs/transformers/en/model_doc/auto#auto-classes
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.
Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

<class 'transformers.models.bert.modeling_bert.BertForMaskedLM'>
Token:drown. Score: 72.56%
Token:die. Score: 23.95%
Token:be. Score: 0.63%
Token:drowned. Score: 0.45%
Token:fall. Score: 0.39%


nlp = pipeline(task="fill-mask")

No model was supplied, defaulted to distilbert/distilroberta-base and revision fb53ab8 (https://huggingface.co/distilbert/distilroberta-base).
Using a pipeline without specifying a model name and revision in production is not recommended.
Some weights of the model checkpoint at distilbert/distilroberta-base were not used when initializing RobertaForMaskedLM: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


from transformers import DistilBertForSequenceClassification

model_BERT_base = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')
#my_pipeline =  pipeline(model=model_BERT_base)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


from transformers import DistilBertTokenizerFast

sequence_clf_model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased',)
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

pipe = pipeline("text-classification", model=sequence_clf_model, tokenizer=tokenizer)
pipe('Please add Here We Go by Dispatch to my road trip playlist')

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

[{'label': 'LABEL_0', 'score': 0.529563844203949}]


from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')


print(tokenizer.backend_tokenizer.normalizer.normalize_str('Hey yo, how aré yoü?'))

hey yo, how are you?


from transformers import GPT2Tokenizer
input = "Hey yo, how aré yoü?"

gpt_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")


gpt_tokens = gpt_tokenizer.tokenize(text=input)
gpt_tokens

['Hey', 'Ġyo', ',', 'Ġhow', 'Ġar', 'Ã©', 'Ġyo', 'Ã¼', '?']


from transformers import image_transforms


from PIL import Image

original_image = Image.open("./data/image_1.jpg")
original_image


import numpy as np

image_array = np.array(original_image)
image_array

array([[[  0,  82, 140],
        [  0,  82, 140],
        [  1,  83, 141],
        ...,
        [ 84, 157, 202],
        [ 92, 154, 205],
        [ 73, 125, 183]],

       [[  0,  82, 140],
        [  0,  82, 140],
        [  1,  83, 141],
        ...,
        [ 58, 145, 188],
        [ 69, 145, 194],
        [ 61, 127, 179]],

       [[  0,  82, 140],
        [  0,  82, 140],
        [  1,  83, 141],
        ...,
        [ 35, 138, 179],
        [ 49, 140, 185],
        [ 58, 136, 184]],

       ...,

       [[208, 206,  59],
        [205, 199,  63],
        [200, 191,  70],
        ...,
        [ 42,  55,   0],
        [ 32,  44,   0],
        [ 24,  34,   0]],

       [[164, 182,   0],
        [163, 178,   0],
        [159, 169,   0],
        ...,
        [ 58,  74,   9],
        [ 43,  57,   6],
        [ 35,  47,  11]],

       [[155, 176,  11],
        [160, 178,  18],
        [157, 172,  17],
        ...,
        [ 46,  62,  17],
        [ 30,  43,  15],
        [ 21,  31,  20]]], dtype=uint8)


image_cropped = image_transforms.center_crop(
    image=image_array,
    size=(500,500)
)
image_cropped

array([[[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       ...,

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]]], dtype=uint8)


from transformers import pipeline

classifier = pipeline(task="image-classification",
                      model="google/vit-base-patch16-224")

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


from PIL import Image
original_image = Image.open("./data/image_2.jpg")
original_image


classifier(original_image)

[{'label': 'reflex camera', 'score': 0.5019135475158691},
 {'label': 'binoculars, field glasses, opera glasses',
  'score': 0.43400973081588745},
 {'label': 'lens cap, lens cover', 'score': 0.03254600986838341},
 {'label': 'tripod', 'score': 0.008797748945653439},
 {'label': "loupe, jeweler's loupe", 'score': 0.0027222465723752975}]


# top_k option limits number of labels to return
results = classifier(original_image, top_k=2)
print(results[0]['label'])

reflex camera


results

[{'label': 'reflex camera', 'score': 0.5019135475158691},
 {'label': 'binoculars, field glasses, opera glasses',
  'score': 0.43400973081588745}]


from transformers import pipeline


#pip install sentencepiece
#pip install protobuf


doc_que_anw = pipeline(
    task="document-question-answering",
    model="naver-clova-ix/donut-base-finetuned-docvqa")

Config of the encoder: <class 'transformers.models.donut.modeling_donut_swin.DonutSwinModel'> is overwritten by shared encoder config: DonutSwinConfig {
  "attention_probs_dropout_prob": 0.0,
  "depths": [
    2,
    2,
    14,
    2
  ],
  "drop_path_rate": 0.1,
  "embed_dim": 128,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 1024,
  "image_size": [
    2560,
    1920
  ],
  "initializer_range": 0.02,
  "layer_norm_eps": 1e-05,
  "mlp_ratio": 4.0,
  "model_type": "donut-swin",
  "num_channels": 3,
  "num_heads": [
    4,
    8,
    16,
    32
  ],
  "num_layers": 4,
  "patch_size": 4,
  "path_norm": true,
  "qkv_bias": true,
  "transformers_version": "4.46.3",
  "use_absolute_embeddings": false,
  "window_size": 10
}

Config of the decoder: <class 'transformers.models.mbart.modeling_mbart.MBartForCausalLM'> is overwritten by shared decoder config: MBartConfig {
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_cross_attention": true,
  "add_final_layer_norm": true,
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 4,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "forced_eos_token_id": 2,
  "init_std": 0.02,
  "is_decoder": true,
  "is_encoder_decoder": false,
  "max_position_embeddings": 128,
  "model_type": "mbart",
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "scale_embedding": true,
  "transformers_version": "4.46.3",
  "use_cache": true,
  "vocab_size": 57532
}


pic_ = Image.open("./data/image_3.jpg")
pic_


doc_image = "./data/image_3.jpg"
question_text = "what is this image?"
result = doc_que_anw(doc_image, question_text, max_answer_len=25)
result

D:\Learning\MyWebsite\FinalGithub\AlreadyPublihsed\blogs\HuggingFace\env_hugg_face\lib\site-packages\transformers\generation\utils.py:1375: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.
  warnings.warn(
MBartModel is using MBartSdpaAttention, but `torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True` or `layer_head_mask` not None. Falling back to the manual attention implementation, but specifying the manual implementation will be required from Transformers version v5.0.0 onwards. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.

[{'answer': 'mil'}]


visual_que_anw = pipeline(
    taks="visual-question-answering",
    model="dandelin/vilt-b32-finetuned-vqa"
)


result = visual_que_anw(
    image="./data/image_3.jpg",
    question="what is this image?")
result

[{'score': 0.9652839303016663, 'answer': 'parrot'},
 {'score': 0.43784281611442566, 'answer': 'bird'},
 {'score': 0.16257372498512268, 'answer': 'parrots'},
 {'score': 0.0582440085709095, 'answer': 'birds'},
 {'score': 0.025064218789339066, 'answer': 'zoo'}]


image_1 = Image.open("./data/image_4.jpg")
image_1


result = visual_que_anw(
    image="./data/image_4.jpg",
    question="what is status of person, happy, frown, sad?")
result

[{'score': 0.61009681224823, 'answer': 'happy'},
 {'score': 0.3348560035228729, 'answer': 'happiness'},
 {'score': 0.0971321165561676, 'answer': 'smiling'},
 {'score': 0.0769975483417511, 'answer': 'smile'},
 {'score': 0.038463376462459564, 'answer': 'laughing'}]


#pip install librosa
#pip install soundfile


from datasets import Audio, Dataset

dataset = Dataset.from_dict({"audio": ['./data/sample_file.wav']})
songs = dataset.cast_column("audio", Audio(sampling_rate=16_000))


songs

Dataset({
    features: ['audio'],
    num_rows: 1
})


import pprint
pprint.pprint(songs[0])

{'audio': {'array': array([-1.35140567e-07,  2.25905501e-06, -2.25680424e-06, ...,
        6.85696193e-07, -4.86526346e-07, -4.92819936e-08]),
           'path': './data/sample_file.wav',
           'sampling_rate': 16000}}


import librosa

duration = []

for row in songs["audio"]:
    duration.append(librosa.get_duration(path=row['path']))
    
songs = songs.add_column("duration", duration)

#songs = songs.filter(
#    lambda d: d < 5.0, input_columns=["duration"]
#)


songs

Dataset({
    features: ['audio', 'duration'],
    num_rows: 1
})


from transformers import pipeline

audio_classifier = pipeline(task="audio-classification",
                            model="superb/wav2vec2-base-superb-ks")

D:\Learning\MyWebsite\FinalGithub\AlreadyPublihsed\blogs\HuggingFace\env_hugg_face\lib\site-packages\transformers\configuration_utils.py:306: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`.
  warnings.warn(


genreClassifier = pipeline(task="audio-classification", trust_remote_code=True,
                           model="mtg-upf/discogs-maest-30s-pw-73e-ts")


songs['audio'][0]['array']

array([-1.35140567e-07,  2.25905501e-06, -2.25680424e-06, ...,
        6.85696193e-07, -4.86526346e-07, -4.92819936e-08])


audio = songs['audio'][0]['array']
prediction = genreClassifier(audio)
print(prediction)

[{'score': 0.1338157057762146, 'label': 'Electronic---Ambient'}, {'score': 0.041763655841350555, 'label': 'Rock---Math Rock'}, {'score': 0.03641388565301895, 'label': 'Rock---Post Rock'}, {'score': 0.03486892580986023, 'label': 'Rock---Acoustic'}, {'score': 0.026921866461634636, 'label': 'Electronic---Downtempo'}]


transcriber = pipeline(task="automatic-speech-recognition",
                       model="facebook/wav2vec2-base-960h")

Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


#pip install ffmpeg-python

# Path to audio file
transcriber("sample_file.wav")

# Numpy array
transcriber(numpy_audio_array)

# Dictionary
transcriber({"sampling_rate": 16_000, "raw": "sample_file.wav"})

sampling_rate = 16_000
dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))
input = data[0]['audio']['array']
prediction = transcriber(input)

# Compute the WER between predictions and reference
wer_score = wer.compute(predictions=[prediction],
                        references=[reference]
                       )
print(wer_score)


from transformers import AutoModelForSequenceClassification
model_name = "bert-base-cased"
model = AutoModelForSequenceClassification.from_pretrained(model_name)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Prepare the dataset
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)


import pandas as pd
# load training data set
df_qa = pd.read_csv('./data/train-squad.csv')


from datasets import Dataset
qa_dataset = Dataset.from_pandas(df_qa)
qa_dataset

Dataset({
    features: ['Unnamed: 0', 'context', 'question', 'id', 'answer_start', 'text'],
    num_rows: 86821
})

def preprocess(data):
    return tokenizer(data['question'], data['text'], truncation=True, padding=True, max_length=512)

qa_dataset = qa_dataset.map(preprocess, batched=True)

Table of Contents

Introduction¶

Select Hugging Face Model¶

Save Model Locally¶

Datasets¶

Hugging Face Pipelines¶

Auto Classes¶

Pipeline Module¶

Pipelines for Images and Audios¶

Image Classification¶

Visual Question and Answering¶

Audio Classification¶

Automatic Speech Recognition¶

Pipeline for ASR¶

Fine-tuning¶

Hugging Face `Trainer` Module¶

Training Arguments (Using `TrainingArguments`)¶

Fine-Tuning the Model¶

Training Process¶

Saving the Model¶

Using the Fine-Tuned Model¶

Table of Contents

Introduction¶

Select Hugging Face Model¶

Save Model Locally¶

Datasets¶

Hugging Face Pipelines¶

Auto Classes¶

Pipeline Module¶

Pipelines for Images and Audios¶

Image Classification¶

Question Answering and Multi-modal Tasks¶

Visual Question and Answering¶

Audio Classification¶

Automatic Speech Recognition¶

Pipeline for ASR¶

Fine-tuning¶

Hugging Face Trainer Module¶

Training Arguments (Using TrainingArguments)¶

Fine-Tuning the Model¶

Training Process¶

Saving the Model¶

Using the Fine-Tuned Model¶

Hugging Face `Trainer` Module¶

Training Arguments (Using `TrainingArguments`)¶