找到你要的答案

Q:How to fix the aspect of MySql Query in Python

Q:如何解决在Python MySQL查询方面

I am trying to write the following script to call around 0.3 million files from a remote server. It is generally working fine, but could work only upto 65 to 70 files. After this, it is just printing the file names and not processing anything. If anyone may kindly suggest what I am doing wrong?

import pymysql
import pymysql.cursors
import os
import win32com.client
from gensim.models import Word2Vec
import nltk
from nltk.corpus import stopwords
import pyPdf
from pyth.plugins.rtf15.reader import Rtf15Reader
from pyth.plugins.plaintext.writer import PlaintextWriter
import nltk
import zipfile, re
import time

#READING DOC FILE FROM REMOTE LOCATION
def readfilesq9(n):
    connection = pymysql.connect(host='xxx.xxx.x.xxx',
                             user='abcd',
                             passwd='pwd1',
                             db='rep_db',
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)
    list1=[]
    with connection.cursor() as cursor:
        # Read a single record
        sql = "SELECT candidateid,cnd.FirstName, cnd.LastName,Concat('\\xxx.xxx.x.xxx\File\Cand_Res/',orgguid,'/',DATE_FORMAT(cnd.createddate,'%Y%m'),'/',candidateguid,'/',Resume) as ResumePath  from candidate cnd join mstorganization org on cnd.orgid = org.OrgId where Resume <> '' and Resume is not null order by cnd.modifieddate limit 100000"
        cursor.execute(sql)
        result = cursor.fetchall()
        #print result
        #list1=[]
        for i in result:
            try:
                #print i
                item_1=i.items()
                item_2=item_1[2][1]
                print item_2
                item_3=item_2.index("/")
                file1=item_2[item_2:]
                string1='\\\\xxx.xxx.x.xxx\\Resumes\\Cand_Res'
                file1e=file1.encode('ascii', 'ignore')
                urls=file1e.replace("/","\\")
                file_full=string1+urls
                time.sleep(1)
                #osp="C:\\Python27"
                os1=os.path.abspath(os.curdir)
                osp2=os.path.join(os1,file_full)
                print "Path1:",osp2
                file_name1=osp2
                print "Path:",file_name1
                #IDENTIFICATION OF FILE KIND
                #DOC CONVERSION
                if ".doc" in file_name1:
                    #EXTRACTING ONLY .DOC FILES
                    if ".docx" not in file_name1:
                        #print "It is A Doc file$$:",file_name
                        try:
                            doc = win32com.client.GetObject(file_name1)
                            text = doc.Range().Text
                            text1=text.encode('ascii','ignore')
                            text_word=text1.split()
                            #print "The Text Word is:",text_word
                            #print "Text for Document File Is:",text1
                            list1.append(text_word)
                            #print "List for Doc File Is:",list3
                            #print "It is a Doc file"
                        except:
                            print "DOC ISSUE"
                    #EXTRACTING ONLY .DOCX FILES
                    elif ".docx" in file_name1:
                        #print "It is DOCX FILE:",file_name
                        docx1=zipfile.ZipFile(file_name1)
                        content = docx1.read('word/document.xml').decode('utf-8')
                        cleaned = re.sub('<(.|\n)*?>','',content).encode('ascii','ignore')
                        cleaned_word=cleaned.split()
                        #print "The Cleaned Document Is:",cleaned
                        list1.append(cleaned_word)
                        #print "List for DocX file Is:",list4
                    else:
                        print "NONE1"
                else:
                    print "It is not a Doc file" 


            except:
                print "OOPS1"

I am using Python2.7.6 on Enthought Canopy. It is not my default Python. My default Python is in location,"C:\Python27". I am using MySql and Windows 7 Professional. Apology for any indentation error.

I am trying to write the following script to call around 0.3 million files from a remote server. It is generally working fine, but could work only upto 65 to 70 files. After this, it is just printing the file names and not processing anything. If anyone may kindly suggest what I am doing wrong?

import pymysql
import pymysql.cursors
import os
import win32com.client
from gensim.models import Word2Vec
import nltk
from nltk.corpus import stopwords
import pyPdf
from pyth.plugins.rtf15.reader import Rtf15Reader
from pyth.plugins.plaintext.writer import PlaintextWriter
import nltk
import zipfile, re
import time

#READING DOC FILE FROM REMOTE LOCATION
def readfilesq9(n):
    connection = pymysql.connect(host='xxx.xxx.x.xxx',
                             user='abcd',
                             passwd='pwd1',
                             db='rep_db',
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)
    list1=[]
    with connection.cursor() as cursor:
        # Read a single record
        sql = "SELECT candidateid,cnd.FirstName, cnd.LastName,Concat('\\xxx.xxx.x.xxx\File\Cand_Res/',orgguid,'/',DATE_FORMAT(cnd.createddate,'%Y%m'),'/',candidateguid,'/',Resume) as ResumePath  from candidate cnd join mstorganization org on cnd.orgid = org.OrgId where Resume <> '' and Resume is not null order by cnd.modifieddate limit 100000"
        cursor.execute(sql)
        result = cursor.fetchall()
        #print result
        #list1=[]
        for i in result:
            try:
                #print i
                item_1=i.items()
                item_2=item_1[2][1]
                print item_2
                item_3=item_2.index("/")
                file1=item_2[item_2:]
                string1='\\\\xxx.xxx.x.xxx\\Resumes\\Cand_Res'
                file1e=file1.encode('ascii', 'ignore')
                urls=file1e.replace("/","\\")
                file_full=string1+urls
                time.sleep(1)
                #osp="C:\\Python27"
                os1=os.path.abspath(os.curdir)
                osp2=os.path.join(os1,file_full)
                print "Path1:",osp2
                file_name1=osp2
                print "Path:",file_name1
                #IDENTIFICATION OF FILE KIND
                #DOC CONVERSION
                if ".doc" in file_name1:
                    #EXTRACTING ONLY .DOC FILES
                    if ".docx" not in file_name1:
                        #print "It is A Doc file$$:",file_name
                        try:
                            doc = win32com.client.GetObject(file_name1)
                            text = doc.Range().Text
                            text1=text.encode('ascii','ignore')
                            text_word=text1.split()
                            #print "The Text Word is:",text_word
                            #print "Text for Document File Is:",text1
                            list1.append(text_word)
                            #print "List for Doc File Is:",list3
                            #print "It is a Doc file"
                        except:
                            print "DOC ISSUE"
                    #EXTRACTING ONLY .DOCX FILES
                    elif ".docx" in file_name1:
                        #print "It is DOCX FILE:",file_name
                        docx1=zipfile.ZipFile(file_name1)
                        content = docx1.read('word/document.xml').decode('utf-8')
                        cleaned = re.sub('<(.|\n)*?>','',content).encode('ascii','ignore')
                        cleaned_word=cleaned.split()
                        #print "The Cleaned Document Is:",cleaned
                        list1.append(cleaned_word)
                        #print "List for DocX file Is:",list4
                    else:
                        print "NONE1"
                else:
                    print "It is not a Doc file" 


            except:
                print "OOPS1"

I am using Python2.7.6 on Enthought Canopy. It is not my default Python. My default Python is in location,"C:\Python27". I am using MySql and Windows 7 Professional. Apology for any indentation error.

answer1: 回答1:

Try using cursor.fetchone() in a while statement instead of cursor.fetchall(), so you can fetch one row at a time and break when there are no more results. Without looking at the documentation for pymysql, I assume cursor.fetchone() will return None when there are no more results. If that does not work run your query directly in MySQL and verify the output.

Try using cursor.fetchone() in a while statement instead of cursor.fetchall(), so you can fetch one row at a time and break when there are no more results. Without looking at the documentation for pymysql, I assume cursor.fetchone() will return None when there are no more results. If that does not work run your query directly in MySQL and verify the output.

python  mysql  windows  python-2.7