Azure Blob存储损坏的文件

eanckbw9  于 2023-10-22  发布在  其他
关注(0)|答案(1)|浏览(99)

我正在尝试将文件存储在Azure Blob存储中。我甚至仔细设置内容类型,即使记住扩展名,上传的项目仍然无法预览我也无法下载,它们看起来像损坏的文件。

from azure.storage.blob import BlobServiceClient,ContentSettings
import requests
import uuid
import re
import datetime
connection_string = "CONNECTION STRING"



blob_service_client = BlobServiceClient.from_connection_string(connection_string)

container_client = blob_service_client.get_container_client("test-data")

def getFileType(file_id):
    api_key = 'GOOGLE_DRIVE_API_KEY'
    # Replace 'file_id' with the file ID from the shareable link
    
    # Construct the API URL for file metadata
    url = f'https://www.googleapis.com/drive/v3/files/{file_id}?key={api_key}'
    try:
        response = requests.get(url)
        if response.status_code == 200:
            file_info = response.json()
            file_type = file_info.get('mimeType')
            return file_type
        
    except Exception as e:
        print(f"An error occurred: {e}")

def copy_image_to_blob(image_url, container_client):
  try:  
    blob_name = datetime.datetime.now().strftime("%d%m%Y%H%M%S%f")
    blob_client = container_client.get_blob_client(blob_name+".jpg")

    response = requests.get(image_url, stream=True)

    if response.status_code == 200:
        
        if getFileType(re.search(r'/file/d/([^/]+)', image_url).group(1))=="image/jpeg":
                blob_client = container_client.get_blob_client(blob_name+".jpg")
        else:
                blob_client = container_client.get_blob_client(blob_name+".pdf")

        content_settings = ContentSettings(content_type=getFileType( re.search(r'/file/d/([^/]+)', image_url).group(1) ) ) 
            
        blob_client.upload_blob(response.content, content_settings=content_settings)
        return blob_client.url
 
  except Exception as e:
     return "Invalid urls"

def main(urls):
    urlList=[]
    for image_url in urls:        
        urlList.append( copy_image_to_blob(image_url, container_client) )
    
    return urlList



if __name__=="__main__":
    urls=['google_drive_public_url','google_drive_public_url','google_drive_public_url','google_drive_public_url']

    print(main(urls=urls))

我正在使用正则表达式从文件URL中获取文件ID。我能够获得blob存储中文件的URL,但它们似乎已损坏。(我无法通过下载查看它们)。请提出一些修改意见,这样就可以了。此外,我不想下载的图像本地,然后sotre它的斑点存储,是的,我提供的驱动器链接是公共的,并有所有需要的权限。

rm5edbpk

rm5edbpk1#

我在我的环境中复制,下面是我的预期结果:
如果您使用API Key访问文件,则您将获得文件的元数据,如下面所示,而不是实际数据:

{
  "kind": "drive#file",
  "id": "1fwKFyH85T93UjeD",
  "name": "test.jpg",
  "mimeType": "image/jpeg"
}

The code which worked for me is :

import requests
import datetime
from azure.storage.blob import BlobServiceClient, ContentSettings
import azure.functions as func

def copy_and_send(f_id, container_client):
    url1 = f'https://www.googleapis.com/drive/v3/files/{f_id}?key=AIzaSyBec0Z6AbJIM'
    result = requests.get(url1)
    f_info = result.json()
    mime_type1 = f_info.get('mimeType')
    blob_name = datetime.datetime.now().strftime("%d%m%Y%H%M%S%f")
    con_type =  mime_type1.split("/")[-1]
        
    if con_type:
        blob_client = container_client.get_blob_client(blob_name + "." + con_type)     
        rith_url = f'https://drive.google.com/uc?id={f_id}'
        res = requests.get(rith_url, stream=True)          
        consettings = ContentSettings(content_type=con_type)
        blob_client.upload_blob(res.content, content_settings=consettings)
        return blob_client.url
            

def main(req: func.HttpRequest) -> func.HttpResponse:

    f_id = '1fwKFyH85jeD'
    con_string = f"DefaultEndpointsProtocol=https;AccountName=rithwik;AccountKey=pPBW9jWu77Sqp5F+JOWkA==;EndpointSuffix=core.windows.net"
    bsc = BlobServiceClient.from_connection_string(con_string)
    c_c = bsc.get_container_client("rithwik")
    blob_url = copy_and_send(f_id, c_c)
    return func.HttpResponse(f"Hello Rithwik Blob URL is : {blob_url} and the image is send to Storage Account", status_code=200)

Output:

相关问题